From 07d02e345e852b814dbd302d53d7e424c24a027c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Petazzoni?= Date: Tue, 4 Jan 2022 12:50:20 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=A0=EF=B8=8F=20=20Add=20script=20to=20?= =?UTF-8?q?find=20unmerged=20changes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- slides/find-unmerged-changes.sh | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100755 slides/find-unmerged-changes.sh diff --git a/slides/find-unmerged-changes.sh b/slides/find-unmerged-changes.sh new file mode 100755 index 00000000..5baf8c1c --- /dev/null +++ b/slides/find-unmerged-changes.sh @@ -0,0 +1,60 @@ +#!/bin/sh + +# The materials for a given training live in their own branch. +# Sometimes, we write custom content (or simply new content) for a training, +# and that content doesn't get merged back to main. This script tries to +# detect that with the following heuristics: +# - list all remote branches +# - for each remote branch, list the changes that weren't merged into main +# (using "diff main...$BRANCH", three dots) +# - ignore a bunch of training-specific files that change all the time anyway +# - for the remaining files, compute the diff between main and the branch +# (using "diff main..$BRANCH", two dots) +# - ignore changes of less than 10 lines +# - also ignore a few red herrings +# - display whatever is left + +# For "git diff" (in the filter function) to work correctly, we must be +# at the root of the repo. +cd $(git rev-parse --show-toplevel) + +BRANCHES=$(git branch -r | grep -v origin/HEAD | grep origin/2) + +filter() { + threshold=10 + while read filename; do + case $filename in + # Generic training-specific files + slides/*.html) continue;; + slides/*.yml) continue;; + slides/logistics*.md) continue;; + # Specific content that can be ignored + #slides/containers/Local_Environment.md) threshold=100;; + # Content that was moved/refactored enough to confuse us + slides/containers/Local_Environment.md) threshold=100;; + slides/exercises.md) continue;; + slides/k8s/batch-jobs) threshold=20;; + # Renames + */{*}*) continue;; + esac + git diff --find-renames --numstat main..$BRANCH -- "$filename" | { + # If the files are identical, the diff will be empty, and "read" will fail. + read plus minus filename || return + # Ignore binary files (FIXME though?) + if [ $plus = - ]; then + return + fi + diff=$((plus-minus)) + if [ $diff -gt $threshold ]; then + echo git diff main..$BRANCH -- $filename + fi + } + done +} + +for BRANCH in $BRANCHES; do + if FILES=$(git diff --find-renames --name-only main...$BRANCH | filter | grep .); then + echo "🌳 $BRANCH:" + echo "$FILES" + fi +done