Initial commit: Korean voice-cloning TTS prototype
FastAPI backend, web UI, CosyVoice3/F5-TTS setup scripts, and handoff docs for GPU PC continuation. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
38
scripts/prepare_reference.sh
Executable file
38
scripts/prepare_reference.sh
Executable file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env bash
|
||||
# reference WAV를 mono 24kHz로 정규화
|
||||
set -euo pipefail
|
||||
|
||||
if [[ $# -lt 1 ]]; then
|
||||
echo "Usage: $0 input.wav [output.wav]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
IN="$1"
|
||||
OUT="${2:-${IN%.wav}_24k_mono.wav}"
|
||||
|
||||
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
PY="${ROOT}/.venvs/api/bin/python"
|
||||
if [[ ! -x "$PY" ]]; then
|
||||
PY=python3
|
||||
fi
|
||||
|
||||
"$PY" - <<PY
|
||||
import sys
|
||||
try:
|
||||
import soundfile as sf
|
||||
import numpy as np
|
||||
except ImportError:
|
||||
print("soundfile 필요: pip install soundfile")
|
||||
sys.exit(1)
|
||||
|
||||
data, sr = sf.read("$IN", always_2d=False)
|
||||
if data.ndim > 1:
|
||||
data = data.mean(axis=1)
|
||||
target_sr = 24000
|
||||
if sr != target_sr:
|
||||
import librosa
|
||||
data = librosa.resample(data.astype(float), orig_sr=sr, target_sr=target_sr)
|
||||
sr = target_sr
|
||||
sf.write("$OUT", data, sr, subtype="PCM_16")
|
||||
print(f"Saved: $OUT ({sr} Hz mono)")
|
||||
PY
|
||||
Reference in New Issue
Block a user