Initial commit: Korean voice-cloning TTS prototype

FastAPI backend, web UI, CosyVoice3/F5-TTS setup scripts, and handoff docs for GPU PC continuation.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-06-04 13:36:37 +09:00
commit 7101fdcd65
36 changed files with 1937 additions and 0 deletions

101
web/app.js Normal file
View File

@@ -0,0 +1,101 @@
const $ = (id) => document.getElementById(id);
async function fetchHealth() {
try {
const res = await fetch("/api/health");
const data = await res.json();
$("healthInfo").textContent = `모델: ${data.model} · 샘플 ${data.samples_count}`;
} catch {
$("healthInfo").textContent = "API 서버에 연결할 수 없습니다.";
}
}
async function loadSamples() {
const select = $("sampleSelect");
try {
const res = await fetch("/api/voice-samples");
const data = await res.json();
for (const s of data.samples) {
const opt = document.createElement("option");
opt.value = s.path;
opt.textContent = `${s.label}/${s.id}${s.has_transcript ? "" : " (대본 없음)"}`;
select.appendChild(opt);
}
} catch (e) {
console.warn("samples load failed", e);
}
}
async function uploadIfNeeded() {
const fileInput = $("fileUpload");
if (!fileInput.files?.length) return null;
const form = new FormData();
form.append("file", fileInput.files[0]);
const refText = $("refText").value.trim();
if (refText) form.append("ref_text", refText);
const res = await fetch("/api/voice-sample", { method: "POST", body: form });
if (!res.ok) {
const err = await res.json().catch(() => ({}));
throw new Error(err.detail || "업로드 실패");
}
const data = await res.json();
return data.path;
}
$("generateBtn").addEventListener("click", async () => {
const text = $("text").value.trim();
if (!text) {
$("status").textContent = "텍스트를 입력하세요.";
return;
}
const btn = $("generateBtn");
btn.disabled = true;
$("status").textContent = "음성 생성 중… (GPU 추론은 수십 초 걸릴 수 있습니다)";
$("resultSection").hidden = true;
try {
let refAudio = $("sampleSelect").value || null;
const uploaded = await uploadIfNeeded();
if (uploaded) refAudio = uploaded;
const body = {
text,
preprocess: true,
ref_text: $("refText").value.trim() || null,
ref_audio: refAudio,
};
const res = await fetch("/api/tts", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(body),
});
if (!res.ok) {
const err = await res.json().catch(() => ({}));
const detail =
typeof err.detail === "string"
? err.detail
: JSON.stringify(err.detail || err);
throw new Error(detail || res.statusText);
}
const data = await res.json();
const url = data.audio_url + "?t=" + Date.now();
$("player").src = url;
$("downloadLink").href = url;
$("downloadLink").download = `${data.job_id}.wav`;
$("resultSection").hidden = false;
$("status").textContent = `완료 (모델: ${data.model}, job: ${data.job_id})`;
} catch (e) {
$("status").textContent = `오류: ${e.message}`;
} finally {
btn.disabled = false;
}
});
fetchHealth();
loadSamples();

64
web/index.html Normal file
View File

@@ -0,0 +1,64 @@
<!DOCTYPE html>
<html lang="ko">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>한국어 보이스 클로닝 TTS</title>
<link rel="stylesheet" href="/style.css" />
</head>
<body>
<main class="container">
<header>
<h1>한국어 보이스 클로닝 TTS</h1>
<p class="subtitle">텍스트를 입력하면 reference 음성을 바탕으로 음성을 생성합니다.</p>
</header>
<section class="card">
<label for="text">읽을 텍스트</label>
<textarea
id="text"
rows="5"
placeholder="안녕하세요. 오늘 날씨가 정말 좋네요."
></textarea>
<div class="row">
<div class="field">
<label for="sampleSelect">Reference 음성</label>
<select id="sampleSelect">
<option value="">기본 샘플 사용</option>
</select>
</div>
<div class="field">
<label for="refText">Reference 대본 (선택)</label>
<input
id="refText"
type="text"
placeholder="녹음한 내용과 동일한 텍스트"
/>
</div>
</div>
<div class="field">
<label for="fileUpload">새 음성 업로드 (WAV)</label>
<input id="fileUpload" type="file" accept=".wav,audio/wav" />
</div>
<button id="generateBtn" type="button">음성 생성</button>
<p id="status" class="status" aria-live="polite"></p>
</section>
<section class="card" id="resultSection" hidden>
<h2>결과</h2>
<audio id="player" controls></audio>
<p>
<a id="downloadLink" href="#" download>WAV 다운로드</a>
</p>
</section>
<footer>
<span id="healthInfo">서버 확인 중…</span>
</footer>
</main>
<script src="/app.js"></script>
</body>
</html>

133
web/style.css Normal file
View File

@@ -0,0 +1,133 @@
:root {
--bg: #0f1419;
--card: #1a2332;
--text: #e7ecf3;
--muted: #8b9bb4;
--accent: #3d8bfd;
--accent-hover: #5ca0ff;
--border: #2a3a52;
}
* {
box-sizing: border-box;
}
body {
margin: 0;
font-family: "Pretendard", "Apple SD Gothic Neo", system-ui, sans-serif;
background: var(--bg);
color: var(--text);
line-height: 1.5;
}
.container {
max-width: 720px;
margin: 0 auto;
padding: 2rem 1.25rem 3rem;
}
header h1 {
margin: 0 0 0.25rem;
font-size: 1.75rem;
}
.subtitle {
color: var(--muted);
margin: 0 0 1.5rem;
}
.card {
background: var(--card);
border: 1px solid var(--border);
border-radius: 12px;
padding: 1.25rem;
margin-bottom: 1rem;
}
label {
display: block;
font-size: 0.875rem;
color: var(--muted);
margin-bottom: 0.35rem;
}
textarea,
input,
select {
width: 100%;
padding: 0.65rem 0.75rem;
border-radius: 8px;
border: 1px solid var(--border);
background: #0d1218;
color: var(--text);
font-size: 1rem;
}
textarea {
resize: vertical;
min-height: 120px;
}
.row {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 1rem;
margin-top: 1rem;
}
@media (max-width: 600px) {
.row {
grid-template-columns: 1fr;
}
}
.field {
margin-bottom: 1rem;
}
button {
width: 100%;
padding: 0.85rem;
border: none;
border-radius: 8px;
background: var(--accent);
color: #fff;
font-size: 1rem;
font-weight: 600;
cursor: pointer;
}
button:hover:not(:disabled) {
background: var(--accent-hover);
}
button:disabled {
opacity: 0.6;
cursor: not-allowed;
}
.status {
margin-top: 0.75rem;
font-size: 0.9rem;
color: var(--muted);
min-height: 1.25rem;
}
footer {
font-size: 0.8rem;
color: var(--muted);
}
#resultSection h2 {
margin-top: 0;
font-size: 1.1rem;
}
audio {
width: 100%;
margin-bottom: 0.5rem;
}
a {
color: var(--accent);
}