Initial commit: Korean voice-cloning TTS prototype

FastAPI backend, web UI, CosyVoice3/F5-TTS setup scripts, and handoff docs for GPU PC continuation. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-04 13:36:37 +09:00
commit 7101fdcd65
36 changed files with 1937 additions and 0 deletions
--- a/backend/app/text_preprocess.py
+++ b/backend/app/text_preprocess.py
@@ -0,0 +1,95 @@
+"""한국어 TTS용 간단한 텍스트 정규화."""
+from __future__ import annotations
+
+import re
+
+
+_RE_MULTI_SPACE = re.compile(r"\s+")
+_RE_EMAIL = re.compile(r"[\w.+-]+@[\w.-]+\.\w+")
+_RE_URL = re.compile(r"https?://\S+")
+
+
+def _digits_to_korean(num_str: str) -> str:
+    """정수 문자열을 한글 읽기로 변환 (간단 버전)."""
+    if not num_str.isdigit():
+        return num_str
+    n = int(num_str.replace(",", ""))
+    if n == 0:
+        return "영"
+    units = ["", "만", "억", "조"]
+    small = ["", "일", "이", "삼", "사", "오", "육", "칠", "팔", "구"]
+    ten = ["", "십", "백", "천"]
+
+    def chunk_to_korean(x: int) -> str:
+        if x == 0:
+            return ""
+        parts: list[str] = []
+        s = f"{x:04d}"
+        for i, d in enumerate(s):
+            di = int(d)
+            if di == 0:
+                continue
+            if i == 0 and di == 1 and len(s) > 1:
+                parts.append(ten[3 - i])
+            elif di == 1 and i > 0:
+                parts.append(ten[3 - i])
+            else:
+                parts.append(small[di] + ten[3 - i])
+        return "".join(parts)
+
+    if n < 10000:
+        return chunk_to_korean(n)
+
+    result: list[str] = []
+    u = 0
+    while n > 0 and u < len(units):
+        part = n % 10000
+        n //= 10000
+        if part:
+            result.append(chunk_to_korean(part) + units[u])
+        u += 1
+    return "".join(reversed(result)) or num_str
+
+
+def _replace_numbers(text: str) -> str:
+    def repl(m: re.Match[str]) -> str:
+        raw = m.group(0).replace(",", "")
+        return _digits_to_korean(raw)
+
+    return re.sub(r"\d[\d,]*", repl, text)
+
+
+def preprocess_korean(text: str) -> str:
+    t = text.strip()
+    t = _RE_URL.sub(" 링크 ", t)
+    t = _RE_EMAIL.sub(" 이메일 ", t)
+    t = t.replace("&", " 앤드 ")
+    t = t.replace("%", " 퍼센트 ")
+    t = _replace_numbers(t)
+    t = _RE_MULTI_SPACE.sub(" ", t)
+    return t.strip()
+
+
+def split_sentences(text: str, max_chars: int = 120) -> list[str]:
+    """긴 텍스트를 문장 단위로 분리."""
+    parts = re.split(r"(?<=[.!?…])\s+|\n+", preprocess_korean(text))
+    chunks: list[str] = []
+    buf = ""
+    for p in parts:
+        p = p.strip()
+        if not p:
+            continue
+        if len(buf) + len(p) + 1 <= max_chars:
+            buf = f"{buf} {p}".strip() if buf else p
+        else:
+            if buf:
+                chunks.append(buf)
+            if len(p) <= max_chars:
+                buf = p
+            else:
+                for i in range(0, len(p), max_chars):
+                    chunks.append(p[i : i + max_chars])
+                buf = ""
+    if buf:
+        chunks.append(buf)
+    return chunks or [text]