FastAPI backend, web UI, CosyVoice3/F5-TTS setup scripts, and handoff docs for GPU PC continuation. Co-authored-by: Cursor <cursoragent@cursor.com>
39 lines
841 B
Bash
Executable File
39 lines
841 B
Bash
Executable File
#!/usr/bin/env bash
|
|
# reference WAV를 mono 24kHz로 정규화
|
|
set -euo pipefail
|
|
|
|
if [[ $# -lt 1 ]]; then
|
|
echo "Usage: $0 input.wav [output.wav]"
|
|
exit 1
|
|
fi
|
|
|
|
IN="$1"
|
|
OUT="${2:-${IN%.wav}_24k_mono.wav}"
|
|
|
|
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
PY="${ROOT}/.venvs/api/bin/python"
|
|
if [[ ! -x "$PY" ]]; then
|
|
PY=python3
|
|
fi
|
|
|
|
"$PY" - <<PY
|
|
import sys
|
|
try:
|
|
import soundfile as sf
|
|
import numpy as np
|
|
except ImportError:
|
|
print("soundfile 필요: pip install soundfile")
|
|
sys.exit(1)
|
|
|
|
data, sr = sf.read("$IN", always_2d=False)
|
|
if data.ndim > 1:
|
|
data = data.mean(axis=1)
|
|
target_sr = 24000
|
|
if sr != target_sr:
|
|
import librosa
|
|
data = librosa.resample(data.astype(float), orig_sr=sr, target_sr=target_sr)
|
|
sr = target_sr
|
|
sf.write("$OUT", data, sr, subtype="PCM_16")
|
|
print(f"Saved: $OUT ({sr} Hz mono)")
|
|
PY
|