#!/usr/bin/env bash # CD ripper — multi-drive parallel ripping with MusicBrainz metadata and audio-hash deduplication # Layout: MUSIC_DIR/Artist/Album/NN - Title.mp3 # # Usage: # ./rip.sh auto-detect all drives # ./rip.sh --drives /dev/sr0 ... specify drives explicitly # ./rip.sh --index build hash index from existing MP3s set -euo pipefail MUSIC_DIR="${MUSIC_DIR:-./music}" TEMP_DIR="$(mktemp -d)" MB_API="https://musicbrainz.org/ws/2" MB_UA="CD-Ripper/1.0 (amir@abdelbaki.eu)" HASH_INDEX= # set after MUSIC_DIR is created cleanup() { rm -rf "$TEMP_DIR"; } trap cleanup EXIT log() { printf '[%s] %s\n' "$(date '+%H:%M:%S')" "$*"; } dlog() { printf '[%s][%-5s] %s\n' "$(date '+%H:%M:%S')" "$1" "${*:2}"; } die() { printf 'ERROR: %s\n' "$*" >&2; exit 1; } warn() { printf 'WARN: %s\n' "$*"; } drive_label() { basename "$1"; } # /dev/sr0 -> sr0 drive_dir() { echo "$TEMP_DIR/$(drive_label "$1")"; } # --------------------------------------------------------------------------- # Dependencies # --------------------------------------------------------------------------- check_deps() { local missing=() for cmd in cdparanoia ffmpeg lame curl jq flock; do command -v "$cmd" &>/dev/null || missing+=("$cmd") done [[ ${#missing[@]} -eq 0 ]] || die "Missing required tools: ${missing[*]}" command -v discid &>/dev/null || \ command -v cd-discid &>/dev/null || \ warn "'discid'/'cd-discid' not found — MusicBrainz lookup disabled" command -v id3v2 &>/dev/null || \ warn "'id3v2' not found — MP3 tags will not be written" } # --------------------------------------------------------------------------- # Drive detection # --------------------------------------------------------------------------- detect_drives() { local real_seen=":" # colon-delimited set of real paths already seen local -a found=() # Prefer /dev/sr* (real devices), then common symlink names for dev in /dev/sr{0..7} /dev/cdrom /dev/cdrw /dev/dvd /dev/dvdrw; do [[ -b "$dev" ]] || { [[ -L "$dev" ]] && [[ -b "$(realpath "$dev" 2>/dev/null || true)" ]]; } || continue local real real=$(realpath "$dev" 2>/dev/null) || continue [[ -b "$real" ]] || continue # Deduplicate by real path (cdrom / dvd are usually symlinks to sr0) [[ "$real_seen" == *":$real:"* ]] && continue real_seen+=":$real:" log "Probing $dev..." if timeout 15 cdparanoia -d "$dev" -Q &>/dev/null 2>&1; then log " Audio disc detected in $dev" found+=("$dev") else log " No audio disc in $dev" fi done printf '%s\n' "${found[@]+"${found[@]}"}" } # --------------------------------------------------------------------------- # CD helpers (drive-parameterised) # --------------------------------------------------------------------------- count_cd_tracks() { local drive="$1" local out out=$(cdparanoia -d "$drive" -Q 2>&1) || true printf '%s\n' "$out" | grep -cE '^\s+[0-9]+\.' || true } get_disc_id() { local drive="$1" if command -v discid &>/dev/null; then discid "$drive" 2>/dev/null || true elif command -v cd-discid &>/dev/null; then cd-discid "$drive" 2>/dev/null | awk '{print $1}' || true fi } # --------------------------------------------------------------------------- # MusicBrainz # --------------------------------------------------------------------------- query_mb() { local disc_id="$1" curl -sS --max-time 15 -A "$MB_UA" \ "${MB_API}/discid/${disc_id}?fmt=json&inc=artists+recordings+artist-credits" \ 2>/dev/null || true } # Prints selected release_id; returns 1 if user cancels pick_release() { local response="$1" label="$2" local releases_json releases_json=$(echo "$response" | jq -c ' if .releases then .releases elif .release then [.release] else [] end ' 2>/dev/null) || return 1 local count count=$(echo "$releases_json" | jq 'length' 2>/dev/null) || return 1 [[ "$count" -gt 0 ]] || return 1 if [[ "$count" -eq 1 ]]; then echo "$releases_json" | jq -r '.[0].id' return 0 fi echo "" echo "[$label] Multiple releases found — pick one:" local i=0 while IFS=$'\t' read -r _id title date artist; do echo " $((i+1))) $artist – $title ($date)" ((i++)) done < <(echo "$releases_json" | jq -r \ '.[] | "\(.id)\t\(.title)\t\(.date // "?")\t\((.[\"artist-credit\"] // []) | map(.name) | join(\", \"))"') echo " $((i+1))) None (manual input)" local choice while true; do read -rp " [$label] Select [1-$((i+1))]: " choice [[ "$choice" =~ ^[0-9]+$ ]] \ && [[ "$choice" -ge 1 ]] \ && [[ "$choice" -le "$((i+1))" ]] \ && break done [[ "$choice" -eq "$((i+1))" ]] && return 1 echo "$releases_json" | jq -r ".[$((choice-1))].id" } # Write release fields into drive_dir files (artist, album, year, tracks) parse_mb_release() { local response="$1" release_id="$2" ddir="$3" local rel rel=$(echo "$response" | jq --arg rid "$release_id" ' if .releases then .releases[] | select(.id == $rid) elif .release then .release else empty end ' 2>/dev/null) || return 1 echo "$rel" | jq -r '(.[\"artist-credit\"] // []) | map(.name) | join(", ")' > "$ddir/artist" echo "$rel" | jq -r '.title' > "$ddir/album" echo "$rel" | jq -r '.date // "" | split("-")[0]' > "$ddir/year" echo "$rel" | jq -r ' (.media // [{}])[0].tracks // [] | sort_by(.position) | .[].title ' > "$ddir/tracks" } # --------------------------------------------------------------------------- # Audio hashing — raw PCM only, strips all headers/tags # Normalises to 44100 Hz / stereo / 16-bit so the hash is stable across formats. # --------------------------------------------------------------------------- audio_hash() { ffmpeg -i "$1" -vn -f s16le -acodec pcm_s16le -ar 44100 -ac 2 - 2>/dev/null \ | sha256sum | cut -d' ' -f1 } # Atomically check for a duplicate and, if none, claim the hash entry. # Prints the existing path if a duplicate exists; prints nothing if the hash was # new and is now claimed. Returns 0 in both cases (check the output instead). # Uses flock so concurrent rip jobs don't race on the index. check_and_claim_hash() { local hash="$1" path="$2" local lock="${HASH_INDEX}.lock" ( flock -x 200 local existing existing=$(grep -m1 "^${hash}|" "$HASH_INDEX" 2>/dev/null | cut -d'|' -f2- || true) if [[ -n "$existing" ]]; then printf '%s' "$existing" else printf '%s|%s\n' "$hash" "$path" >> "$HASH_INDEX" fi ) 200>"$lock" } # --------------------------------------------------------------------------- # Build hash index from existing MP3s (first-run or repair) # --------------------------------------------------------------------------- build_index() { log "Scanning existing tracks to build hash index..." local count=0 while IFS= read -r -d '' mp3; do local h h=$(audio_hash "$mp3") if [[ -n "$h" ]]; then printf '%s|%s\n' "$h" "$mp3" >> "$HASH_INDEX" ((count++)) fi done < <(find "$MUSIC_DIR" -name "*.mp3" -print0 2>/dev/null) log "Indexed $count track(s)" } # --------------------------------------------------------------------------- # Misc # --------------------------------------------------------------------------- sanitize() { printf '%s' "$1" | tr '/:*?"<>|\\' '_' | sed 's/ */ /g; s/^ //; s/ $//' } # --------------------------------------------------------------------------- # Phase 1: gather metadata for one drive (interactive, sequential) # Writes results to drive_dir files; returns 1 if drive should be skipped. # --------------------------------------------------------------------------- gather_metadata() { local drive="$1" local label ddir label=$(drive_label "$drive") ddir=$(drive_dir "$drive") mkdir -p "$ddir" local num_tracks num_tracks=$(count_cd_tracks "$drive") if [[ "${num_tracks:-0}" -eq 0 ]]; then warn "[$label] No audio tracks — skipping" echo "skip" > "$ddir/status" return 1 fi echo "$num_tracks" > "$ddir/num_tracks" local disc_id disc_id=$(get_disc_id "$drive") printf '%s' "$disc_id" > "$ddir/disc_id" local use_mb=false if [[ -n "$disc_id" ]]; then log "[$label] Disc ID: $disc_id — querying MusicBrainz..." local mb_resp release_id mb_resp=$(query_mb "$disc_id") if [[ -n "$mb_resp" ]] && ! echo "$mb_resp" | jq -e '.error' &>/dev/null 2>&1; then if release_id=$(pick_release "$mb_resp" "$label"); then if parse_mb_release "$mb_resp" "$release_id" "$ddir"; then local artist album track_count artist=$(cat "$ddir/artist" 2>/dev/null || true) album=$(cat "$ddir/album" 2>/dev/null || true) track_count=$(wc -l < "$ddir/tracks" 2>/dev/null || echo 0) if [[ -n "$artist" ]] && [[ -n "$album" ]] && [[ "$track_count" -gt 0 ]]; then use_mb=true log "[$label] MB match: $artist – $album" fi fi fi fi else log "[$label] No disc ID — falling back to manual input" fi if [[ "$use_mb" == false ]]; then echo "" echo "=== [$label] Manual metadata ($num_tracks tracks) ===" local artist album year read -rp " Artist : " artist read -rp " Album : " album read -rp " Year (blank to skip): " year printf '%s' "$artist" > "$ddir/artist" printf '%s' "$album" > "$ddir/album" printf '%s' "$year" > "$ddir/year" : > "$ddir/tracks" for ((i=1; i<=num_tracks; i++)); do local t read -rp " Track $i: " t printf '%s\n' "$t" >> "$ddir/tracks" done fi # Warn on track count mismatch between CD and MB local mb_count mb_count=$(wc -l < "$ddir/tracks" 2>/dev/null || echo 0) if [[ "$mb_count" -ne "$num_tracks" ]]; then warn "[$label] MB has $mb_count track(s) but CD has $num_tracks — titles may be offset" fi return 0 } # --------------------------------------------------------------------------- # Phase 2: show confirmation table, catch disc mixups # --------------------------------------------------------------------------- confirm_drives() { local -a drives=("$@") echo "" echo "┌────────────────────────────────────────────────────────────────────┐" echo "│ DISC ASSIGNMENT SUMMARY │" echo "└────────────────────────────────────────────────────────────────────┘" printf ' %-8s %-36s %-6s %s\n' "Drive" "Artist – Album" "Year" "Tracks" printf ' %-8s %-36s %-6s %s\n' "--------" "------------------------------------" "------" "------" local disc_id_seen=":" # ":id1::id2:" for duplicate disc-ID detection for drive in "${drives[@]}"; do local label ddir artist album year num_tracks disc_id display label=$(drive_label "$drive") ddir=$(drive_dir "$drive") artist=$(cat "$ddir/artist" 2>/dev/null || echo "?") album=$(cat "$ddir/album" 2>/dev/null || echo "?") year=$(cat "$ddir/year" 2>/dev/null || echo "") num_tracks=$(cat "$ddir/num_tracks" 2>/dev/null || echo "?") disc_id=$(cat "$ddir/disc_id" 2>/dev/null || echo "") display="$artist – $album" [[ ${#display} -gt 36 ]] && display="${display:0:33}..." printf ' %-8s %-36s %-6s %s\n' \ "$label" "$display" "${year:---}" "$num_tracks tracks" if [[ -n "$disc_id" ]]; then if [[ "$disc_id_seen" == *":$disc_id:"* ]]; then echo "" warn "Disc ID '$disc_id' appears in multiple drives!" warn "You may have inserted the same CD twice — please check." echo "" fi disc_id_seen+=":$disc_id:" fi done echo "" echo " Physically verify that each drive contains the disc shown above." echo " If anything looks wrong, answer N and re-seat the discs." echo "" read -rp " All discs confirmed? [Y/n] " ok [[ "${ok,,}" == "n" ]] && return 1 return 0 } # --------------------------------------------------------------------------- # Phase 3: rip one drive — called in a background subshell # --------------------------------------------------------------------------- rip_drive() { local drive="$1" local label ddir label=$(drive_label "$drive") ddir=$(drive_dir "$drive") local artist album year num_tracks artist=$(cat "$ddir/artist" 2>/dev/null || echo "Unknown Artist") album=$(cat "$ddir/album" 2>/dev/null || echo "Unknown Album") year=$(cat "$ddir/year" 2>/dev/null || echo "") num_tracks=$(cat "$ddir/num_tracks" 2>/dev/null || echo 0) local -a tracks=() [[ -f "$ddir/tracks" ]] && readarray -t tracks < "$ddir/tracks" local out_dir="$MUSIC_DIR/$(sanitize "$artist")/$(sanitize "$album")" mkdir -p "$out_dir" local wav_dir="$ddir/wav" mkdir -p "$wav_dir" local ripped=0 skipped=0 failed=0 for ((n=1; n<=num_tracks; n++)); do local title="${tracks[$((n-1))]:-"Track $(printf '%02d' "$n")"}" local wav="$wav_dir/track$(printf '%02d' "$n").wav" local mp3="$out_dir/$(printf '%02d' "$n") - $(sanitize "$title").mp3" dlog "$label" "[$n/$num_tracks] $title" # Rip track if ! cdparanoia -d "$drive" "$n" "$wav" 2>/dev/null; then dlog "$label" " FAILED to rip track $n" ((failed++)); continue fi # Deduplicate: atomically claim hash or detect collision local hash dup hash=$(audio_hash "$wav") dup=$(check_and_claim_hash "$hash" "$mp3") if [[ -n "$dup" ]]; then dlog "$label" " DUPLICATE of '$dup' — skipping" ((skipped++)); rm -f "$wav"; continue fi # Encode if ! lame -V2 --quiet "$wav" "$mp3" 2>/dev/null; then dlog "$label" " FAILED to encode track $n" ((failed++)); rm -f "$wav"; continue fi # Tag if command -v id3v2 &>/dev/null; then local tag_args=(-a "$artist" -A "$album" -t "$title" -T "$n/$num_tracks") [[ -n "$year" ]] && tag_args+=(-y "$year") id3v2 "${tag_args[@]}" "$mp3" 2>/dev/null || true fi rm -f "$wav" ((ripped++)) dlog "$label" " -> $mp3" done printf '%d %d %d\n' "$ripped" "$skipped" "$failed" > "$ddir/result" dlog "$label" "Finished: $ripped ripped, $skipped skipped, $failed failed" } # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- usage() { cat <<'EOF' Usage: rip.sh [OPTIONS] Rip audio CDs to MP3, organised as MUSIC_DIR/Artist/Album/NN - Title.mp3. MusicBrainz metadata is used where available, with a manual-input fallback. Duplicate tracks are detected by audio-content hash (headers/tags ignored). Options: --drives DEV... Specify one or more CD device paths (e.g. /dev/sr0 /dev/sr1). Default: auto-detect all drives that contain an audio disc. --index Scan MUSIC_DIR for existing MP3s and build/rebuild the audio hash index used for duplicate detection. Run this once if you have tracks ripped before using this script. --help Show this help and exit. Environment: MUSIC_DIR Root of the music library (default: ./music) Examples: rip.sh rip.sh --drives /dev/sr0 /dev/sr1 MUSIC_DIR=/mnt/nas/music rip.sh rip.sh --index EOF } main() { check_deps mkdir -p "$MUSIC_DIR" HASH_INDEX="$MUSIC_DIR/.audio_hashes" touch "$HASH_INDEX" if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then usage; exit 0 fi if [[ "${1:-}" == "--index" ]]; then build_index; exit 0 fi # Drive selection: explicit or auto-detected local -a ALL_DRIVES=() if [[ "${1:-}" == "--drives" ]]; then ALL_DRIVES=("${@:2}") [[ ${#ALL_DRIVES[@]} -gt 0 ]] || die "--drives requires at least one device path" else log "Scanning for drives with audio discs..." while IFS= read -r d; do [[ -n "$d" ]] && ALL_DRIVES+=("$d") done < <(detect_drives) fi [[ ${#ALL_DRIVES[@]} -gt 0 ]] || die "No drives with audio discs found." log "Using ${#ALL_DRIVES[@]} drive(s): ${ALL_DRIVES[*]}" # ── Phase 1: metadata (sequential — user interaction required) ──────── echo "" local -a ACTIVE_DRIVES=() for drive in "${ALL_DRIVES[@]}"; do local label label=$(drive_label "$drive") echo "━━━━━━━━━━━━ $label ━━━━━━━━━━━━" if gather_metadata "$drive"; then ACTIVE_DRIVES+=("$drive") fi echo "" done [[ ${#ACTIVE_DRIVES[@]} -gt 0 ]] || die "No drives with usable metadata." # ── Phase 2: confirmation table — disc mixup guard ──────────────────── if ! confirm_drives "${ACTIVE_DRIVES[@]}"; then echo "Aborted. Correct the disc placement and re-run." exit 1 fi # ── Phase 3: parallel ripping ───────────────────────────────────────── echo "" log "Starting parallel rip on ${#ACTIVE_DRIVES[@]} drive(s)..." local -a pids=() for drive in "${ACTIVE_DRIVES[@]}"; do rip_drive "$drive" & pids+=($!) done for pid in "${pids[@]}"; do wait "$pid" || true done # ── Phase 4: combined report ────────────────────────────────────────── echo "" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" printf ' %-8s %7s %7s %7s\n' "Drive" "Ripped" "Skipped" "Failed" echo " ──────────────────────────────────────────" local total_r=0 total_s=0 total_f=0 for drive in "${ACTIVE_DRIVES[@]}"; do local label result r=0 s=0 f=0 label=$(drive_label "$drive") result=$(cat "$(drive_dir "$drive")/result" 2>/dev/null || echo "0 0 0") read -r r s f <<< "$result" printf ' %-8s %7d %7d %7d\n' "$label" "$r" "$s" "$f" ((total_r += r)); ((total_s += s)); ((total_f += f)) done echo " ──────────────────────────────────────────" printf ' %-8s %7d %7d %7d\n' "TOTAL" "$total_r" "$total_s" "$total_f" echo "" log "Output: $MUSIC_DIR" } main "$@"