from __future__ import annotations import subprocess import sys from pathlib import Path from backend.app.config import project_root from backend.app.tts.base import TTSEngine ROOT = project_root() class SubprocessEngine(TTSEngine): def __init__(self, venv_name: str, worker_name: str) -> None: self._python = ROOT / ".venvs" / venv_name / "bin" / "python" self._worker = ROOT / "scripts" / "workers" / worker_name def _run(self, args: list[str]) -> None: if not self._python.is_file(): raise RuntimeError( f"{self._python.parent.parent.name} venv 없음. " f"scripts/setup_{self._python.parent.parent.name}.sh 실행" ) cmd = [str(self._python), str(self._worker), *args] proc = subprocess.run(cmd, cwd=str(ROOT), capture_output=True, text=True) if proc.returncode != 0: raise RuntimeError( f"{self.name} inference failed:\n{proc.stderr or proc.stdout}" ) class F5TTSEngine(SubprocessEngine): name = "f5_tts" def __init__(self) -> None: super().__init__("f5tts", "f5_infer.py") def synthesize( self, text: str, ref_audio: Path, ref_text: str, out_path: Path, ) -> Path: out_path.parent.mkdir(parents=True, exist_ok=True) self._run( [ "--ref-audio", str(ref_audio), "--ref-text", ref_text or "reference audio transcript", "--gen-text", text, "--out", str(out_path), ] ) return out_path class CosyVoiceEngine(SubprocessEngine): name = "cosyvoice" def __init__(self, model_dir: Path, prompt_prefix: str) -> None: super().__init__("cosyvoice", "cosy_infer.py") self._model_dir = model_dir self._prompt_prefix = prompt_prefix def synthesize( self, text: str, ref_audio: Path, ref_text: str, out_path: Path, ) -> Path: out_path.parent.mkdir(parents=True, exist_ok=True) self._run( [ "--ref-audio", str(ref_audio), "--gen-text", text, "--prompt-text", ref_text or "", "--out", str(out_path), "--model-dir", str(self._model_dir), "--prompt-prefix", self._prompt_prefix, ] ) return out_path def create_engine(model: str, model_dir: Path, prompt_prefix: str) -> TTSEngine: if model == "f5_tts": return F5TTSEngine() if model == "cosyvoice": return CosyVoiceEngine(model_dir, prompt_prefix) raise ValueError(f"Unknown model: {model}. Use cosyvoice or f5_tts.")