Files
voice.sori.studio/scripts/prepare_reference.sh
zenn 7101fdcd65 Initial commit: Korean voice-cloning TTS prototype
FastAPI backend, web UI, CosyVoice3/F5-TTS setup scripts, and handoff docs for GPU PC continuation.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-04 13:36:37 +09:00

39 lines
841 B
Bash
Executable File

#!/usr/bin/env bash
# reference WAV를 mono 24kHz로 정규화
set -euo pipefail
if [[ $# -lt 1 ]]; then
echo "Usage: $0 input.wav [output.wav]"
exit 1
fi
IN="$1"
OUT="${2:-${IN%.wav}_24k_mono.wav}"
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
PY="${ROOT}/.venvs/api/bin/python"
if [[ ! -x "$PY" ]]; then
PY=python3
fi
"$PY" - <<PY
import sys
try:
import soundfile as sf
import numpy as np
except ImportError:
print("soundfile 필요: pip install soundfile")
sys.exit(1)
data, sr = sf.read("$IN", always_2d=False)
if data.ndim > 1:
data = data.mean(axis=1)
target_sr = 24000
if sr != target_sr:
import librosa
data = librosa.resample(data.astype(float), orig_sr=sr, target_sr=target_sr)
sr = target_sr
sf.write("$OUT", data, sr, subtype="PCM_16")
print(f"Saved: $OUT ({sr} Hz mono)")
PY