Zum Inhalt springen

Script Videoverzeichnisse vergleichen und Info ausgeben

Aus LHlab Wiki

Bei der Bearbeitung von Video-Files (mkv clean, rename usw) kann es zu Fehlern kommen. Mit diesem Script lassen sich zwei Ordner vergleichen und entsprechende Infos zu der Differenz ausgeben und Fehler schneller identifizieren.

#!/usr/bin/env bash
# -----------------------------------------------------------------------------
# Script: compare_folder.sh
# Author: Manuel Wendel
# Date:   2025-11-29
# Version: 2025-11-29.8
# Purpose: Compare two folder trees for common video files >10MB
#          Shows count, size (MB, GB), % difference and filetype breakdown
# -----------------------------------------------------------------------------
# Changelog:
# 2025-11-29.1 Initial creation: list folder counts
# 2025-11-29.2 Added size calculation per folder
# 2025-11-29.3 Added name cleansing (remove () and {} )
# 2025-11-29.4 Color output green/red for count differences
# 2025-11-29.5 Added MB/GB columns
# 2025-11-29.6 Added % diff column, rounded to 0.01
# 2025-11-29.7 Added multiple video formats + filetype breakdown
# 2025-11-29.8 Removed quotes from filetypes, fixed header/trenner spacing
# -----------------------------------------------------------------------------

set -euo pipefail

# Base folders
base1="/mnt/work/completed_cleaned/series/_"
base2="/mnt/work/completed_renamed/TV Shows"

declare -A map1 map2

RED="\e[31m"
GREEN="\e[32m"
RESET="\e[0m"

VIDEO_EXTENSIONS="mp4|mkv|avi|mov|flv|wmv|m4v|mpeg|mpg"

# Clean folder names: remove () {} and extra spaces
clean_name() {
    local n="$1"
    n="$(printf "%s" "$n" | sed -E 's/\(.*\)//g; s/\{.*\}//g; s/[[:space:]]+/ /g; s/^ //; s/ $//')"
    printf "%s" "$n"
}

# Count video files >10MB, total size, and filetype breakdown
get_count_size_types() {
    local dir="$1"
    if [ -z "$dir" ]; then
        echo "0 0 ''"
        return
    fi
    local total_bytes
    total_bytes=$(find "$dir" -type f -regextype posix-extended -iregex ".*\.($VIDEO_EXTENSIONS)$" -size +10M -printf "%s\n" | awk '{s+=$1} END{print s+0}')
    local count
    count=$(find "$dir" -type f -regextype posix-extended -iregex ".*\.($VIDEO_EXTENSIONS)$" -size +10M | wc -l)

    # Filetype breakdown
    local types
    types=$(find "$dir" -type f -regextype posix-extended -iregex ".*\.($VIDEO_EXTENSIONS)$" -size +10M \
            | sed -E 's/.*\.//' | tr '[:upper:]' '[:lower:]' \
            | sort | uniq -c | awk '{printf "%s (%s), ", $2, $1}' | sed 's/, $//')
    echo "$count $total_bytes $types"
}

# Build folder maps
for base in "$base1" "$base2"; do
    while IFS= read -r d; do
        raw="$(basename "$d")"
        clean="$(clean_name "$raw")"
        if [ "$base" = "$base1" ]; then
            map1["$clean"]="$d"
        else
            map2["$clean"]="$d"
        fi
    done < <(find "$base" -maxdepth 1 -mindepth 1 -type d -print)
done


# All folder names
allnames=$(printf "%s\n" "${!map1[@]}" "${!map2[@]}" | sort -u)

# Calculate max folder name length for formatting
max_len=0
while IFS= read -r name; do
    len=${#name}
    ((len > max_len)) && max_len=$len
done <<< "$allnames"

# Column widths
num_width=5
size_mb_width=8
size_gb_width=8
diff_width=8
space=" "

# Header
echo "Basisordner:"
echo "1: $base1"
echo "2: $base2"
echo
printf "%-${max_len}s${space}%${num_width}s %${size_mb_width}s %${size_gb_width}s${space}%${num_width}s %${size_mb_width}s %${size_gb_width}s %${diff_width}s %s\n" \
       "Ordner" "1" "MB" "GB" "2" "MB" "GB" "% diff" "Filetypes"

# Trenner mit durchgehenden Linien
printf "%-${max_len}s${space}%${num_width}s %${size_mb_width}s %${size_gb_width}s${space}%${num_width}s %${size_mb_width}s %${size_gb_width}s %${diff_width}s %s\n" \
       "$(printf '─%.0s' $(seq 1 $max_len))" \
       "$(printf '─%.0s' $(seq 1 $num_width))" \
       "$(printf '─%.0s' $(seq 1 $size_mb_width))" \
       "$(printf '─%.0s' $(seq 1 $size_gb_width))" \
       "$(printf '─%.0s' $(seq 1 $num_width))" \
       "$(printf '─%.0s' $(seq 1 $size_mb_width))" \
       "$(printf '─%.0s' $(seq 1 $size_gb_width))" \
       "$(printf '─%.0s' $(seq 1 $diff_width))" \
       "$(printf '─%.0s' $(seq 1 20))"
       
# Print each folder row
while IFS= read -r name; do
    d1="${map1[$name]-}"
    d2="${map2[$name]-}"

    read count1 size1b types1 <<< $(get_count_size_types "$d1")
    read count2 size2b types2 <<< $(get_count_size_types "$d2")

    # MB: integer
    size1mb=$((size1b/1024/1024))
    size2mb=$((size2b/1024/1024))

    # GB: 2 decimals, rounded up
    size1g=$(awk "BEGIN{printf \"%.2f\", ($size1b/1024/1024/1024 + 0.005)}")
    size2g=$(awk "BEGIN{printf \"%.2f\", ($size2b/1024/1024/1024 + 0.005)}")

    display_count1=$([ "$count1" -eq 0 ] && echo "-" || echo "$count1")
    display_count2=$([ "$count2" -eq 0 ] && echo "-" || echo "$count2")
    display_size1mb=$([ "$count1" -eq 0 ] && echo "-" || echo "$size1mb")
    display_size2mb=$([ "$count2" -eq 0 ] && echo "-" || echo "$size2mb")
    display_size1g=$([ "$count1" -eq 0 ] && echo "-" || echo "$size1g")
    display_size2g=$([ "$count2" -eq 0 ] && echo "-" || echo "$size2g")

    # % diff rounded up
    if [ "$size1b" -eq 0 ] && [ "$size2b" -eq 0 ]; then
        diff_perc="-"
    else
        if [ "$size1b" -eq 0 ]; then
            diff_perc="inf"
        else
            diff_perc=$(awk "BEGIN{v=(($size2b-$size1b)/$size1b*100); printf \"%.2f\", (v>0 && v<0.01)?0.01:(v<0 && v>-0.01?-0.01:v)}")
        fi
    fi

    color=$([ "$count1" = "$count2" ] && echo "$GREEN" || echo "$RED")

    printf "${color}%-${max_len}s${space}%${num_width}s %${size_mb_width}s %${size_gb_width}s${space}%${num_width}s %${size_mb_width}s %${size_gb_width}s %${diff_width}s   %s${RESET}\n" \
           "$name" "$display_count1" "$display_size1mb" "$display_size1g" \
           "$display_count2" "$display_size2mb" "$display_size2g" "$diff_perc" "$types2"
done <<< "$allnames"
Basisordner:
1: /mnt/completed_cleaned/series
2: /mnt/completed_renamed/series

Ordner                    1       MB       GB     2       MB       GB   % diff   Filetypes
────────────────────── ───── ──────── ──────── ───── ──────── ──────── ──────── ───────────────────
series 1                   8     4629    4.53       8     4629    4.53    0.01   mkv (6) avi (2)
series 2                  14    13865   13.55      14    13865   13.55    0.01   mkv (14)
series 3                  38    60573   59.16      38    60573   59.16    0.01   mkv (38)
[..]