CD-RipperFlexibleLabeler/rip.sh

548 lines
20 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env bash
# CD ripper — multi-drive parallel ripping with MusicBrainz metadata and audio-hash deduplication
# Layout: MUSIC_DIR/Artist/Album/NN - Title.mp3
#
# Usage:
# ./rip.sh auto-detect all drives
# ./rip.sh --drives /dev/sr0 ... specify drives explicitly
# ./rip.sh --index build hash index from existing MP3s
set -euo pipefail
MUSIC_DIR="${MUSIC_DIR:-./music}"
TEMP_DIR="$(mktemp -d)"
MB_API="https://musicbrainz.org/ws/2"
MB_UA="CD-Ripper/1.0 (amir@abdelbaki.eu)"
HASH_INDEX= # set after MUSIC_DIR is created
cleanup() { rm -rf "$TEMP_DIR"; }
trap cleanup EXIT
log() { printf '[%s] %s\n' "$(date '+%H:%M:%S')" "$*"; }
dlog() { printf '[%s][%-5s] %s\n' "$(date '+%H:%M:%S')" "$1" "${*:2}"; }
die() { printf 'ERROR: %s\n' "$*" >&2; exit 1; }
warn() { printf 'WARN: %s\n' "$*"; }
drive_label() { basename "$1"; } # /dev/sr0 -> sr0
drive_dir() { echo "$TEMP_DIR/$(drive_label "$1")"; }
# ---------------------------------------------------------------------------
# Dependencies
# ---------------------------------------------------------------------------
check_deps() {
local missing=()
for cmd in cdparanoia ffmpeg lame curl jq flock; do
command -v "$cmd" &>/dev/null || missing+=("$cmd")
done
[[ ${#missing[@]} -eq 0 ]] || die "Missing required tools: ${missing[*]}"
command -v discid &>/dev/null || \
command -v cd-discid &>/dev/null || \
warn "'discid'/'cd-discid' not found — MusicBrainz lookup disabled"
command -v id3v2 &>/dev/null || \
warn "'id3v2' not found — MP3 tags will not be written"
}
# ---------------------------------------------------------------------------
# Drive detection
# ---------------------------------------------------------------------------
detect_drives() {
local real_seen=":" # colon-delimited set of real paths already seen
local -a found=()
# Prefer /dev/sr* (real devices), then common symlink names
for dev in /dev/sr{0..7} /dev/cdrom /dev/cdrw /dev/dvd /dev/dvdrw; do
[[ -b "$dev" ]] || { [[ -L "$dev" ]] && [[ -b "$(realpath "$dev" 2>/dev/null || true)" ]]; } || continue
local real
real=$(realpath "$dev" 2>/dev/null) || continue
[[ -b "$real" ]] || continue
# Deduplicate by real path (cdrom / dvd are usually symlinks to sr0)
[[ "$real_seen" == *":$real:"* ]] && continue
real_seen+=":$real:"
log "Probing $dev..."
if timeout 15 cdparanoia -d "$dev" -Q &>/dev/null 2>&1; then
log " Audio disc detected in $dev"
found+=("$dev")
else
log " No audio disc in $dev"
fi
done
printf '%s\n' "${found[@]+"${found[@]}"}"
}
# ---------------------------------------------------------------------------
# CD helpers (drive-parameterised)
# ---------------------------------------------------------------------------
count_cd_tracks() {
local drive="$1"
local out
out=$(cdparanoia -d "$drive" -Q 2>&1) || true
printf '%s\n' "$out" | grep -cE '^\s+[0-9]+\.' || true
}
get_disc_id() {
local drive="$1"
if command -v discid &>/dev/null; then
discid "$drive" 2>/dev/null || true
elif command -v cd-discid &>/dev/null; then
cd-discid "$drive" 2>/dev/null | awk '{print $1}' || true
fi
}
# ---------------------------------------------------------------------------
# MusicBrainz
# ---------------------------------------------------------------------------
query_mb() {
local disc_id="$1"
curl -sS --max-time 15 -A "$MB_UA" \
"${MB_API}/discid/${disc_id}?fmt=json&inc=artists+recordings+artist-credits" \
2>/dev/null || true
}
# Prints selected release_id; returns 1 if user cancels
pick_release() {
local response="$1" label="$2"
local releases_json
releases_json=$(echo "$response" | jq -c '
if .releases then .releases
elif .release then [.release]
else []
end
' 2>/dev/null) || return 1
local count
count=$(echo "$releases_json" | jq 'length' 2>/dev/null) || return 1
[[ "$count" -gt 0 ]] || return 1
if [[ "$count" -eq 1 ]]; then
echo "$releases_json" | jq -r '.[0].id'
return 0
fi
echo ""
echo "[$label] Multiple releases found — pick one:"
local i=0
while IFS=$'\t' read -r _id title date artist; do
echo " $((i+1))) $artist $title ($date)"
((i++))
done < <(echo "$releases_json" | jq -r \
'.[] | "\(.id)\t\(.title)\t\(.date // "?")\t\((.[\"artist-credit\"] // []) | map(.name) | join(\", \"))"')
echo " $((i+1))) None (manual input)"
local choice
while true; do
read -rp " [$label] Select [1-$((i+1))]: " choice
[[ "$choice" =~ ^[0-9]+$ ]] \
&& [[ "$choice" -ge 1 ]] \
&& [[ "$choice" -le "$((i+1))" ]] \
&& break
done
[[ "$choice" -eq "$((i+1))" ]] && return 1
echo "$releases_json" | jq -r ".[$((choice-1))].id"
}
# Write release fields into drive_dir files (artist, album, year, tracks)
parse_mb_release() {
local response="$1" release_id="$2" ddir="$3"
local rel
rel=$(echo "$response" | jq --arg rid "$release_id" '
if .releases then .releases[] | select(.id == $rid)
elif .release then .release
else empty
end
' 2>/dev/null) || return 1
echo "$rel" | jq -r '(.[\"artist-credit\"] // []) | map(.name) | join(", ")' > "$ddir/artist"
echo "$rel" | jq -r '.title' > "$ddir/album"
echo "$rel" | jq -r '.date // "" | split("-")[0]' > "$ddir/year"
echo "$rel" | jq -r '
(.media // [{}])[0].tracks // [] | sort_by(.position) | .[].title
' > "$ddir/tracks"
}
# ---------------------------------------------------------------------------
# Audio hashing — raw PCM only, strips all headers/tags
# Normalises to 44100 Hz / stereo / 16-bit so the hash is stable across formats.
# ---------------------------------------------------------------------------
audio_hash() {
ffmpeg -i "$1" -vn -f s16le -acodec pcm_s16le -ar 44100 -ac 2 - 2>/dev/null \
| sha256sum | cut -d' ' -f1
}
# Atomically check for a duplicate and, if none, claim the hash entry.
# Prints the existing path if a duplicate exists; prints nothing if the hash was
# new and is now claimed. Returns 0 in both cases (check the output instead).
# Uses flock so concurrent rip jobs don't race on the index.
check_and_claim_hash() {
local hash="$1" path="$2"
local lock="${HASH_INDEX}.lock"
(
flock -x 200
local existing
existing=$(grep -m1 "^${hash}|" "$HASH_INDEX" 2>/dev/null | cut -d'|' -f2- || true)
if [[ -n "$existing" ]]; then
printf '%s' "$existing"
else
printf '%s|%s\n' "$hash" "$path" >> "$HASH_INDEX"
fi
) 200>"$lock"
}
# ---------------------------------------------------------------------------
# Build hash index from existing MP3s (first-run or repair)
# ---------------------------------------------------------------------------
build_index() {
log "Scanning existing tracks to build hash index..."
local count=0
while IFS= read -r -d '' mp3; do
local h
h=$(audio_hash "$mp3")
if [[ -n "$h" ]]; then
printf '%s|%s\n' "$h" "$mp3" >> "$HASH_INDEX"
((count++))
fi
done < <(find "$MUSIC_DIR" -name "*.mp3" -print0 2>/dev/null)
log "Indexed $count track(s)"
}
# ---------------------------------------------------------------------------
# Misc
# ---------------------------------------------------------------------------
sanitize() {
printf '%s' "$1" | tr '/:*?"<>|\\' '_' | sed 's/ */ /g; s/^ //; s/ $//'
}
# ---------------------------------------------------------------------------
# Phase 1: gather metadata for one drive (interactive, sequential)
# Writes results to drive_dir files; returns 1 if drive should be skipped.
# ---------------------------------------------------------------------------
gather_metadata() {
local drive="$1"
local label ddir
label=$(drive_label "$drive")
ddir=$(drive_dir "$drive")
mkdir -p "$ddir"
local num_tracks
num_tracks=$(count_cd_tracks "$drive")
if [[ "${num_tracks:-0}" -eq 0 ]]; then
warn "[$label] No audio tracks — skipping"
echo "skip" > "$ddir/status"
return 1
fi
echo "$num_tracks" > "$ddir/num_tracks"
local disc_id
disc_id=$(get_disc_id "$drive")
printf '%s' "$disc_id" > "$ddir/disc_id"
local use_mb=false
if [[ -n "$disc_id" ]]; then
log "[$label] Disc ID: $disc_id — querying MusicBrainz..."
local mb_resp release_id
mb_resp=$(query_mb "$disc_id")
if [[ -n "$mb_resp" ]] && ! echo "$mb_resp" | jq -e '.error' &>/dev/null 2>&1; then
if release_id=$(pick_release "$mb_resp" "$label"); then
if parse_mb_release "$mb_resp" "$release_id" "$ddir"; then
local artist album track_count
artist=$(cat "$ddir/artist" 2>/dev/null || true)
album=$(cat "$ddir/album" 2>/dev/null || true)
track_count=$(wc -l < "$ddir/tracks" 2>/dev/null || echo 0)
if [[ -n "$artist" ]] && [[ -n "$album" ]] && [[ "$track_count" -gt 0 ]]; then
use_mb=true
log "[$label] MB match: $artist $album"
fi
fi
fi
fi
else
log "[$label] No disc ID — falling back to manual input"
fi
if [[ "$use_mb" == false ]]; then
echo ""
echo "=== [$label] Manual metadata ($num_tracks tracks) ==="
local artist album year
read -rp " Artist : " artist
read -rp " Album : " album
read -rp " Year (blank to skip): " year
printf '%s' "$artist" > "$ddir/artist"
printf '%s' "$album" > "$ddir/album"
printf '%s' "$year" > "$ddir/year"
: > "$ddir/tracks"
for ((i=1; i<=num_tracks; i++)); do
local t
read -rp " Track $i: " t
printf '%s\n' "$t" >> "$ddir/tracks"
done
fi
# Warn on track count mismatch between CD and MB
local mb_count
mb_count=$(wc -l < "$ddir/tracks" 2>/dev/null || echo 0)
if [[ "$mb_count" -ne "$num_tracks" ]]; then
warn "[$label] MB has $mb_count track(s) but CD has $num_tracks — titles may be offset"
fi
return 0
}
# ---------------------------------------------------------------------------
# Phase 2: show confirmation table, catch disc mixups
# ---------------------------------------------------------------------------
confirm_drives() {
local -a drives=("$@")
echo ""
echo "┌────────────────────────────────────────────────────────────────────┐"
echo "│ DISC ASSIGNMENT SUMMARY │"
echo "└────────────────────────────────────────────────────────────────────┘"
printf ' %-8s %-36s %-6s %s\n' "Drive" "Artist Album" "Year" "Tracks"
printf ' %-8s %-36s %-6s %s\n' "--------" "------------------------------------" "------" "------"
local disc_id_seen=":" # ":id1::id2:" for duplicate disc-ID detection
for drive in "${drives[@]}"; do
local label ddir artist album year num_tracks disc_id display
label=$(drive_label "$drive")
ddir=$(drive_dir "$drive")
artist=$(cat "$ddir/artist" 2>/dev/null || echo "?")
album=$(cat "$ddir/album" 2>/dev/null || echo "?")
year=$(cat "$ddir/year" 2>/dev/null || echo "")
num_tracks=$(cat "$ddir/num_tracks" 2>/dev/null || echo "?")
disc_id=$(cat "$ddir/disc_id" 2>/dev/null || echo "")
display="$artist $album"
[[ ${#display} -gt 36 ]] && display="${display:0:33}..."
printf ' %-8s %-36s %-6s %s\n' \
"$label" "$display" "${year:---}" "$num_tracks tracks"
if [[ -n "$disc_id" ]]; then
if [[ "$disc_id_seen" == *":$disc_id:"* ]]; then
echo ""
warn "Disc ID '$disc_id' appears in multiple drives!"
warn "You may have inserted the same CD twice — please check."
echo ""
fi
disc_id_seen+=":$disc_id:"
fi
done
echo ""
echo " Physically verify that each drive contains the disc shown above."
echo " If anything looks wrong, answer N and re-seat the discs."
echo ""
read -rp " All discs confirmed? [Y/n] " ok
[[ "${ok,,}" == "n" ]] && return 1
return 0
}
# ---------------------------------------------------------------------------
# Phase 3: rip one drive — called in a background subshell
# ---------------------------------------------------------------------------
rip_drive() {
local drive="$1"
local label ddir
label=$(drive_label "$drive")
ddir=$(drive_dir "$drive")
local artist album year num_tracks
artist=$(cat "$ddir/artist" 2>/dev/null || echo "Unknown Artist")
album=$(cat "$ddir/album" 2>/dev/null || echo "Unknown Album")
year=$(cat "$ddir/year" 2>/dev/null || echo "")
num_tracks=$(cat "$ddir/num_tracks" 2>/dev/null || echo 0)
local -a tracks=()
[[ -f "$ddir/tracks" ]] && readarray -t tracks < "$ddir/tracks"
local out_dir="$MUSIC_DIR/$(sanitize "$artist")/$(sanitize "$album")"
mkdir -p "$out_dir"
local wav_dir="$ddir/wav"
mkdir -p "$wav_dir"
local ripped=0 skipped=0 failed=0
for ((n=1; n<=num_tracks; n++)); do
local title="${tracks[$((n-1))]:-"Track $(printf '%02d' "$n")"}"
local wav="$wav_dir/track$(printf '%02d' "$n").wav"
local mp3="$out_dir/$(printf '%02d' "$n") - $(sanitize "$title").mp3"
dlog "$label" "[$n/$num_tracks] $title"
# Rip track
if ! cdparanoia -d "$drive" "$n" "$wav" 2>/dev/null; then
dlog "$label" " FAILED to rip track $n"
((failed++)); continue
fi
# Deduplicate: atomically claim hash or detect collision
local hash dup
hash=$(audio_hash "$wav")
dup=$(check_and_claim_hash "$hash" "$mp3")
if [[ -n "$dup" ]]; then
dlog "$label" " DUPLICATE of '$dup' — skipping"
((skipped++)); rm -f "$wav"; continue
fi
# Encode
if ! lame -V2 --quiet "$wav" "$mp3" 2>/dev/null; then
dlog "$label" " FAILED to encode track $n"
((failed++)); rm -f "$wav"; continue
fi
# Tag
if command -v id3v2 &>/dev/null; then
local tag_args=(-a "$artist" -A "$album" -t "$title" -T "$n/$num_tracks")
[[ -n "$year" ]] && tag_args+=(-y "$year")
id3v2 "${tag_args[@]}" "$mp3" 2>/dev/null || true
fi
rm -f "$wav"
((ripped++))
dlog "$label" " -> $mp3"
done
printf '%d %d %d\n' "$ripped" "$skipped" "$failed" > "$ddir/result"
dlog "$label" "Finished: $ripped ripped, $skipped skipped, $failed failed"
}
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
usage() {
cat <<'EOF'
Usage: rip.sh [OPTIONS]
Rip audio CDs to MP3, organised as MUSIC_DIR/Artist/Album/NN - Title.mp3.
MusicBrainz metadata is used where available, with a manual-input fallback.
Duplicate tracks are detected by audio-content hash (headers/tags ignored).
Options:
--drives DEV... Specify one or more CD device paths (e.g. /dev/sr0 /dev/sr1).
Default: auto-detect all drives that contain an audio disc.
--index Scan MUSIC_DIR for existing MP3s and build/rebuild the
audio hash index used for duplicate detection. Run this once
if you have tracks ripped before using this script.
--help Show this help and exit.
Environment:
MUSIC_DIR Root of the music library (default: ./music)
Examples:
rip.sh
rip.sh --drives /dev/sr0 /dev/sr1
MUSIC_DIR=/mnt/nas/music rip.sh
rip.sh --index
EOF
}
main() {
check_deps
mkdir -p "$MUSIC_DIR"
HASH_INDEX="$MUSIC_DIR/.audio_hashes"
touch "$HASH_INDEX"
if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then
usage; exit 0
fi
if [[ "${1:-}" == "--index" ]]; then
build_index; exit 0
fi
# Drive selection: explicit or auto-detected
local -a ALL_DRIVES=()
if [[ "${1:-}" == "--drives" ]]; then
ALL_DRIVES=("${@:2}")
[[ ${#ALL_DRIVES[@]} -gt 0 ]] || die "--drives requires at least one device path"
else
log "Scanning for drives with audio discs..."
while IFS= read -r d; do
[[ -n "$d" ]] && ALL_DRIVES+=("$d")
done < <(detect_drives)
fi
[[ ${#ALL_DRIVES[@]} -gt 0 ]] || die "No drives with audio discs found."
log "Using ${#ALL_DRIVES[@]} drive(s): ${ALL_DRIVES[*]}"
# ── Phase 1: metadata (sequential — user interaction required) ────────
echo ""
local -a ACTIVE_DRIVES=()
for drive in "${ALL_DRIVES[@]}"; do
local label
label=$(drive_label "$drive")
echo "━━━━━━━━━━━━ $label ━━━━━━━━━━━━"
if gather_metadata "$drive"; then
ACTIVE_DRIVES+=("$drive")
fi
echo ""
done
[[ ${#ACTIVE_DRIVES[@]} -gt 0 ]] || die "No drives with usable metadata."
# ── Phase 2: confirmation table — disc mixup guard ────────────────────
if ! confirm_drives "${ACTIVE_DRIVES[@]}"; then
echo "Aborted. Correct the disc placement and re-run."
exit 1
fi
# ── Phase 3: parallel ripping ─────────────────────────────────────────
echo ""
log "Starting parallel rip on ${#ACTIVE_DRIVES[@]} drive(s)..."
local -a pids=()
for drive in "${ACTIVE_DRIVES[@]}"; do
rip_drive "$drive" &
pids+=($!)
done
for pid in "${pids[@]}"; do
wait "$pid" || true
done
# ── Phase 4: combined report ──────────────────────────────────────────
echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
printf ' %-8s %7s %7s %7s\n' "Drive" "Ripped" "Skipped" "Failed"
echo " ──────────────────────────────────────────"
local total_r=0 total_s=0 total_f=0
for drive in "${ACTIVE_DRIVES[@]}"; do
local label result r=0 s=0 f=0
label=$(drive_label "$drive")
result=$(cat "$(drive_dir "$drive")/result" 2>/dev/null || echo "0 0 0")
read -r r s f <<< "$result"
printf ' %-8s %7d %7d %7d\n' "$label" "$r" "$s" "$f"
((total_r += r)); ((total_s += s)); ((total_f += f))
done
echo " ──────────────────────────────────────────"
printf ' %-8s %7d %7d %7d\n' "TOTAL" "$total_r" "$total_s" "$total_f"
echo ""
log "Output: $MUSIC_DIR"
}
main "$@"