Initial commit: Korean voice-cloning TTS prototype
FastAPI backend, web UI, CosyVoice3/F5-TTS setup scripts, and handoff docs for GPU PC continuation. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
101
web/app.js
Normal file
101
web/app.js
Normal file
@@ -0,0 +1,101 @@
|
||||
const $ = (id) => document.getElementById(id);
|
||||
|
||||
async function fetchHealth() {
|
||||
try {
|
||||
const res = await fetch("/api/health");
|
||||
const data = await res.json();
|
||||
$("healthInfo").textContent = `모델: ${data.model} · 샘플 ${data.samples_count}개`;
|
||||
} catch {
|
||||
$("healthInfo").textContent = "API 서버에 연결할 수 없습니다.";
|
||||
}
|
||||
}
|
||||
|
||||
async function loadSamples() {
|
||||
const select = $("sampleSelect");
|
||||
try {
|
||||
const res = await fetch("/api/voice-samples");
|
||||
const data = await res.json();
|
||||
for (const s of data.samples) {
|
||||
const opt = document.createElement("option");
|
||||
opt.value = s.path;
|
||||
opt.textContent = `${s.label}/${s.id}${s.has_transcript ? "" : " (대본 없음)"}`;
|
||||
select.appendChild(opt);
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn("samples load failed", e);
|
||||
}
|
||||
}
|
||||
|
||||
async function uploadIfNeeded() {
|
||||
const fileInput = $("fileUpload");
|
||||
if (!fileInput.files?.length) return null;
|
||||
|
||||
const form = new FormData();
|
||||
form.append("file", fileInput.files[0]);
|
||||
const refText = $("refText").value.trim();
|
||||
if (refText) form.append("ref_text", refText);
|
||||
|
||||
const res = await fetch("/api/voice-sample", { method: "POST", body: form });
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({}));
|
||||
throw new Error(err.detail || "업로드 실패");
|
||||
}
|
||||
const data = await res.json();
|
||||
return data.path;
|
||||
}
|
||||
|
||||
$("generateBtn").addEventListener("click", async () => {
|
||||
const text = $("text").value.trim();
|
||||
if (!text) {
|
||||
$("status").textContent = "텍스트를 입력하세요.";
|
||||
return;
|
||||
}
|
||||
|
||||
const btn = $("generateBtn");
|
||||
btn.disabled = true;
|
||||
$("status").textContent = "음성 생성 중… (GPU 추론은 수십 초 걸릴 수 있습니다)";
|
||||
$("resultSection").hidden = true;
|
||||
|
||||
try {
|
||||
let refAudio = $("sampleSelect").value || null;
|
||||
const uploaded = await uploadIfNeeded();
|
||||
if (uploaded) refAudio = uploaded;
|
||||
|
||||
const body = {
|
||||
text,
|
||||
preprocess: true,
|
||||
ref_text: $("refText").value.trim() || null,
|
||||
ref_audio: refAudio,
|
||||
};
|
||||
|
||||
const res = await fetch("/api/tts", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({}));
|
||||
const detail =
|
||||
typeof err.detail === "string"
|
||||
? err.detail
|
||||
: JSON.stringify(err.detail || err);
|
||||
throw new Error(detail || res.statusText);
|
||||
}
|
||||
|
||||
const data = await res.json();
|
||||
const url = data.audio_url + "?t=" + Date.now();
|
||||
$("player").src = url;
|
||||
$("downloadLink").href = url;
|
||||
$("downloadLink").download = `${data.job_id}.wav`;
|
||||
$("resultSection").hidden = false;
|
||||
$("status").textContent = `완료 (모델: ${data.model}, job: ${data.job_id})`;
|
||||
} catch (e) {
|
||||
$("status").textContent = `오류: ${e.message}`;
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
}
|
||||
});
|
||||
|
||||
fetchHealth();
|
||||
loadSamples();
|
||||
64
web/index.html
Normal file
64
web/index.html
Normal file
@@ -0,0 +1,64 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="ko">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>한국어 보이스 클로닝 TTS</title>
|
||||
<link rel="stylesheet" href="/style.css" />
|
||||
</head>
|
||||
<body>
|
||||
<main class="container">
|
||||
<header>
|
||||
<h1>한국어 보이스 클로닝 TTS</h1>
|
||||
<p class="subtitle">텍스트를 입력하면 reference 음성을 바탕으로 음성을 생성합니다.</p>
|
||||
</header>
|
||||
|
||||
<section class="card">
|
||||
<label for="text">읽을 텍스트</label>
|
||||
<textarea
|
||||
id="text"
|
||||
rows="5"
|
||||
placeholder="안녕하세요. 오늘 날씨가 정말 좋네요."
|
||||
></textarea>
|
||||
|
||||
<div class="row">
|
||||
<div class="field">
|
||||
<label for="sampleSelect">Reference 음성</label>
|
||||
<select id="sampleSelect">
|
||||
<option value="">기본 샘플 사용</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label for="refText">Reference 대본 (선택)</label>
|
||||
<input
|
||||
id="refText"
|
||||
type="text"
|
||||
placeholder="녹음한 내용과 동일한 텍스트"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="field">
|
||||
<label for="fileUpload">새 음성 업로드 (WAV)</label>
|
||||
<input id="fileUpload" type="file" accept=".wav,audio/wav" />
|
||||
</div>
|
||||
|
||||
<button id="generateBtn" type="button">음성 생성</button>
|
||||
<p id="status" class="status" aria-live="polite"></p>
|
||||
</section>
|
||||
|
||||
<section class="card" id="resultSection" hidden>
|
||||
<h2>결과</h2>
|
||||
<audio id="player" controls></audio>
|
||||
<p>
|
||||
<a id="downloadLink" href="#" download>WAV 다운로드</a>
|
||||
</p>
|
||||
</section>
|
||||
|
||||
<footer>
|
||||
<span id="healthInfo">서버 확인 중…</span>
|
||||
</footer>
|
||||
</main>
|
||||
<script src="/app.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
133
web/style.css
Normal file
133
web/style.css
Normal file
@@ -0,0 +1,133 @@
|
||||
:root {
|
||||
--bg: #0f1419;
|
||||
--card: #1a2332;
|
||||
--text: #e7ecf3;
|
||||
--muted: #8b9bb4;
|
||||
--accent: #3d8bfd;
|
||||
--accent-hover: #5ca0ff;
|
||||
--border: #2a3a52;
|
||||
}
|
||||
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
font-family: "Pretendard", "Apple SD Gothic Neo", system-ui, sans-serif;
|
||||
background: var(--bg);
|
||||
color: var(--text);
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.container {
|
||||
max-width: 720px;
|
||||
margin: 0 auto;
|
||||
padding: 2rem 1.25rem 3rem;
|
||||
}
|
||||
|
||||
header h1 {
|
||||
margin: 0 0 0.25rem;
|
||||
font-size: 1.75rem;
|
||||
}
|
||||
|
||||
.subtitle {
|
||||
color: var(--muted);
|
||||
margin: 0 0 1.5rem;
|
||||
}
|
||||
|
||||
.card {
|
||||
background: var(--card);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 12px;
|
||||
padding: 1.25rem;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
label {
|
||||
display: block;
|
||||
font-size: 0.875rem;
|
||||
color: var(--muted);
|
||||
margin-bottom: 0.35rem;
|
||||
}
|
||||
|
||||
textarea,
|
||||
input,
|
||||
select {
|
||||
width: 100%;
|
||||
padding: 0.65rem 0.75rem;
|
||||
border-radius: 8px;
|
||||
border: 1px solid var(--border);
|
||||
background: #0d1218;
|
||||
color: var(--text);
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
textarea {
|
||||
resize: vertical;
|
||||
min-height: 120px;
|
||||
}
|
||||
|
||||
.row {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 1rem;
|
||||
margin-top: 1rem;
|
||||
}
|
||||
|
||||
@media (max-width: 600px) {
|
||||
.row {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
}
|
||||
|
||||
.field {
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
button {
|
||||
width: 100%;
|
||||
padding: 0.85rem;
|
||||
border: none;
|
||||
border-radius: 8px;
|
||||
background: var(--accent);
|
||||
color: #fff;
|
||||
font-size: 1rem;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
button:hover:not(:disabled) {
|
||||
background: var(--accent-hover);
|
||||
}
|
||||
|
||||
button:disabled {
|
||||
opacity: 0.6;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.status {
|
||||
margin-top: 0.75rem;
|
||||
font-size: 0.9rem;
|
||||
color: var(--muted);
|
||||
min-height: 1.25rem;
|
||||
}
|
||||
|
||||
footer {
|
||||
font-size: 0.8rem;
|
||||
color: var(--muted);
|
||||
}
|
||||
|
||||
#resultSection h2 {
|
||||
margin-top: 0;
|
||||
font-size: 1.1rem;
|
||||
}
|
||||
|
||||
audio {
|
||||
width: 100%;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
a {
|
||||
color: var(--accent);
|
||||
}
|
||||
Reference in New Issue
Block a user