Initial commit: Korean voice-cloning TTS prototype

FastAPI backend, web UI, CosyVoice3/F5-TTS setup scripts, and handoff docs for GPU PC continuation. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-04 13:36:37 +09:00
commit 7101fdcd65
36 changed files with 1937 additions and 0 deletions
--- a/scripts/setup_cosyvoice.sh
+++ b/scripts/setup_cosyvoice.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+# CosyVoice3 전용 venv + 레포 클론 + 모델 다운로드
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+VENV="$ROOT/.venvs/cosyvoice"
+REPO="$ROOT/external/CosyVoice"
+MODEL_DIR="$ROOT/models/Fun-CosyVoice3-0.5B"
+
+mkdir -p "$ROOT/external" "$ROOT/models"
+
+if [[ ! -d "$REPO/.git" ]]; then
+  echo "CosyVoice 레포 클론..."
+  git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git "$REPO"
+  cd "$REPO"
+  git submodule update --init --recursive
+else
+  echo "CosyVoice 레포 이미 존재: $REPO"
+fi
+
+python3 -m venv "$VENV"
+"$VENV/bin/pip" install -U pip wheel
+"$VENV/bin/pip" install torch torchaudio --index-url https://download.pytorch.org/whl/cu124
+"$VENV/bin/pip" install -r "$REPO/requirements.txt"
+"$VENV/bin/pip" install huggingface_hub modelscope
+
+echo "CosyVoice3 모델 다운로드 (Hugging Face)..."
+"$VENV/bin/python" - <<PY
+from huggingface_hub import snapshot_download
+snapshot_download(
+    'FunAudioLLM/Fun-CosyVoice3-0.5B-2512',
+    local_dir='$MODEL_DIR',
+)
+print('Model saved to $MODEL_DIR')
+PY
+
+echo "CosyVoice venv 준비 완료: $VENV"
+echo "테스트: $VENV/bin/python $ROOT/scripts/workers/cosy_infer.py --help"