Initial commit: Korean voice-cloning TTS prototype
FastAPI backend, web UI, CosyVoice3/F5-TTS setup scripts, and handoff docs for GPU PC continuation. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
149
scripts/run_ab_compare.py
Normal file
149
scripts/run_ab_compare.py
Normal file
@@ -0,0 +1,149 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
F5-TTS vs CosyVoice3 A/B 비교.
|
||||
각 모델 전용 venv의 worker를 subprocess로 호출합니다.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
CONFIG = ROOT / "config"
|
||||
F5_PY = ROOT / ".venvs" / "f5tts" / "bin" / "python"
|
||||
COSY_PY = ROOT / ".venvs" / "cosyvoice" / "bin" / "python"
|
||||
F5_WORKER = ROOT / "scripts" / "workers" / "f5_infer.py"
|
||||
COSY_WORKER = ROOT / "scripts" / "workers" / "cosy_infer.py"
|
||||
|
||||
|
||||
def load_sentences() -> list[dict]:
|
||||
with open(CONFIG / "test_sentences.json", encoding="utf-8") as f:
|
||||
return json.load(f)["cases"]
|
||||
|
||||
|
||||
def resolve_ref_audio(ref_arg: str) -> tuple[Path, str]:
|
||||
"""(wav_path, ref_text for F5)"""
|
||||
if ref_arg == "auto":
|
||||
try:
|
||||
from importlib.resources import files
|
||||
|
||||
wav = files("f5_tts").joinpath("infer/examples/basic/basic_ref_en.wav")
|
||||
ref_path = Path(str(wav))
|
||||
ref_text = "some call me nature, others call me mother nature."
|
||||
return ref_path, ref_text
|
||||
except Exception:
|
||||
samples = list((ROOT / "samples").glob("*.wav"))
|
||||
if not samples:
|
||||
raise SystemExit(
|
||||
"reference 없음: samples/*.wav 녹음하거나 f5-tts venv 설치 후 --ref-audio auto"
|
||||
)
|
||||
ref_path = samples[0]
|
||||
else:
|
||||
ref_path = Path(ref_arg)
|
||||
if not ref_path.is_file():
|
||||
raise SystemExit(f"ref audio not found: {ref_path}")
|
||||
|
||||
ref_text = ""
|
||||
txt_candidates = [
|
||||
ref_path.with_suffix(".txt"),
|
||||
ROOT / "samples" / "my_voice_ref.txt",
|
||||
]
|
||||
for t in txt_candidates:
|
||||
if t.is_file():
|
||||
ref_text = t.read_text(encoding="utf-8").strip()
|
||||
break
|
||||
if not ref_text and ref_arg != "auto":
|
||||
ref_text = "참조 음성의 대본을 여기에 입력하세요."
|
||||
return ref_path, ref_text
|
||||
|
||||
|
||||
def run_worker(python: Path, worker: Path, cmd: list[str]) -> bool:
|
||||
if not python.is_file():
|
||||
print(f"SKIP: venv missing ({python.parent.parent.name})", file=sys.stderr)
|
||||
return False
|
||||
r = subprocess.run([str(python), str(worker), *cmd], cwd=str(ROOT))
|
||||
return r.returncode == 0
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--ref-audio", default="auto", help="WAV path or 'auto'")
|
||||
parser.add_argument("--models", default="both", choices=("both", "f5_tts", "cosyvoice"))
|
||||
parser.add_argument("--out-dir", default=str(ROOT / "outputs" / "ab_compare"))
|
||||
args = parser.parse_args()
|
||||
|
||||
ref_path, ref_text = resolve_ref_audio(args.ref_audio)
|
||||
out_base = Path(args.out_dir)
|
||||
out_base.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
cases = load_sentences()
|
||||
print(f"Reference: {ref_path}")
|
||||
print(f"Cases: {len(cases)}")
|
||||
print(f"Output: {out_base}\n")
|
||||
|
||||
ok = 0
|
||||
fail = 0
|
||||
for case in cases:
|
||||
cid = case["id"]
|
||||
text = case["text"]
|
||||
print(f"=== {cid}: {case['label']} ===")
|
||||
|
||||
if args.models in ("both", "f5_tts"):
|
||||
out_f5 = out_base / "f5_tts" / f"{cid}.wav"
|
||||
if run_worker(
|
||||
F5_PY,
|
||||
F5_WORKER,
|
||||
[
|
||||
"--ref-audio",
|
||||
str(ref_path),
|
||||
"--ref-text",
|
||||
ref_text,
|
||||
"--gen-text",
|
||||
text,
|
||||
"--out",
|
||||
str(out_f5),
|
||||
],
|
||||
):
|
||||
ok += 1
|
||||
else:
|
||||
fail += 1
|
||||
|
||||
if args.models in ("both", "cosyvoice"):
|
||||
out_cosy = out_base / "cosyvoice" / f"{cid}.wav"
|
||||
if run_worker(
|
||||
COSY_PY,
|
||||
COSY_WORKER,
|
||||
[
|
||||
"--ref-audio",
|
||||
str(ref_path),
|
||||
"--gen-text",
|
||||
text,
|
||||
"--prompt-text",
|
||||
ref_text,
|
||||
"--out",
|
||||
str(out_cosy),
|
||||
],
|
||||
):
|
||||
ok += 1
|
||||
else:
|
||||
fail += 1
|
||||
|
||||
manifest = {
|
||||
"ref_audio": str(ref_path),
|
||||
"ref_text": ref_text,
|
||||
"cases": cases,
|
||||
"output_dir": str(out_base),
|
||||
}
|
||||
(out_base / "manifest.json").write_text(
|
||||
json.dumps(manifest, ensure_ascii=False, indent=2), encoding="utf-8"
|
||||
)
|
||||
print(f"\n완료: success={ok} fail={fail}")
|
||||
print(f"manifest: {out_base / 'manifest.json'}")
|
||||
return 0 if fail == 0 else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user