Initial commit: Korean voice-cloning TTS prototype

FastAPI backend, web UI, CosyVoice3/F5-TTS setup scripts, and handoff docs for GPU PC continuation. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-04 13:36:37 +09:00
commit 7101fdcd65
36 changed files with 1937 additions and 0 deletions
--- a/config/model_choice.json
+++ b/config/model_choice.json
@@ -0,0 +1,10 @@
+{
+  "selected_model": "cosyvoice",
+  "selection_criteria": [
+    "korean_naturalness",
+    "prosody",
+    "speaker_similarity",
+    "long_sentence_stability"
+  ],
+  "notes": "품질 우선 기준으로 CosyVoice3를 기본 엔진으로 사용합니다. F5-TTS는 scripts/run_ab_compare.py로 동일 조건 비교 후 변경 가능합니다."
+}
--- a/config/settings.yaml
+++ b/config/settings.yaml
@@ -0,0 +1,26 @@
+# TTS 프로토타입 설정 (한국어 품질 우선)
+default_model: cosyvoice  # cosyvoice | f5_tts
+
+paths:
+  samples_dir: samples
+  outputs_dir: outputs
+  models_dir: models
+  uploads_dir: backend/data/uploads
+
+cosyvoice:
+  repo_dir: external/CosyVoice
+  model_dir: models/Fun-CosyVoice3-0.5B
+  # reference WAV에 대응하는 프롬프트 텍스트 (CosyVoice3 zero-shot 형식)
+  prompt_prefix: "You are a helpful assistant.<|endofprompt|>"
+
+f5_tts:
+  model: F5TTS_v1_Base
+
+generation:
+  chunk_max_chars: 120
+  cross_fade_duration: 0.15
+  speed: 1.0
+
+server:
+  host: 0.0.0.0
+  port: 8000
--- a/config/test_sentences.json
+++ b/config/test_sentences.json
@@ -0,0 +1,29 @@
+{
+  "cases": [
+    {
+      "id": "short",
+      "label": "짧은 문장",
+      "text": "안녕하세요. 오늘 날씨가 정말 좋네요."
+    },
+    {
+      "id": "long",
+      "label": "긴 문장",
+      "text": "인공지능 음성 합성 기술은 짧은 문장뿐 아니라 긴 설명문에서도 자연스러운 억양과 호흡을 유지해야 하며, 특히 한국어에서는 조사와 어미 변화가 발음 품질에 큰 영향을 줍니다."
+    },
+    {
+      "id": "numbers",
+      "label": "숫자/단위",
+      "text": "회의는 3월 15일 오후 2시 30분에 시작하며, 예산은 약 1,250,000원입니다."
+    },
+    {
+      "id": "mixed",
+      "label": "영어/기호 혼합",
+      "text": "GitHub에서 API 키를 발급받은 뒤, README.md 파일을 확인해 주세요."
+    },
+    {
+      "id": "emotion",
+      "label": "감정/강조",
+      "text": "정말 기뻐요! 드디어 프로젝트가 완성됐어요. 고생 많으셨습니다."
+    }
+  ]
+}