Initial commit: Korean voice-cloning TTS prototype

FastAPI backend, web UI, CosyVoice3/F5-TTS setup scripts, and handoff docs for GPU PC continuation.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-06-04 13:36:37 +09:00
commit 7101fdcd65
36 changed files with 1937 additions and 0 deletions

26
config/settings.yaml Normal file
View File

@@ -0,0 +1,26 @@
# TTS 프로토타입 설정 (한국어 품질 우선)
default_model: cosyvoice # cosyvoice | f5_tts
paths:
samples_dir: samples
outputs_dir: outputs
models_dir: models
uploads_dir: backend/data/uploads
cosyvoice:
repo_dir: external/CosyVoice
model_dir: models/Fun-CosyVoice3-0.5B
# reference WAV에 대응하는 프롬프트 텍스트 (CosyVoice3 zero-shot 형식)
prompt_prefix: "You are a helpful assistant.<|endofprompt|>"
f5_tts:
model: F5TTS_v1_Base
generation:
chunk_max_chars: 120
cross_fade_duration: 0.15
speed: 1.0
server:
host: 0.0.0.0
port: 8000