sori.studio/server/repositories/post-import-repository.js

import { mkdir, stat, writeFile } from 'node:fs/promises'
import { basename, extname, join } from 'node:path'
import { readZipBufferEntries } from '../utils/zip-reader'
import { upsertMediaMetadataCategory } from '../utils/media-library'
import { getPostgresClient } from './postgres-client'
import { createAdminPost } from './content-repository'

const UPLOAD_BASE_URL = '/uploads'
const MAX_IMPORT_POSTS = 1000
const MARKDOWN_EXTENSION_PATTERN = /\.md$/i

/**
 * 파일명에 안전한 문자열로 정리한다.
 * @param {string} value - 원본 문자열
 * @returns {string} 정리된 문자열
 */
const sanitizeFilenameSegment = (value) => String(value || '')
  .trim()
  .replace(/[\\/:*?"<>|]+/g, '-')
  .replace(/\s+/g, '-')
  .replace(/-+/g, '-')
  .replace(/^-|-$/g, '')
  .slice(0, 80) || 'asset'

/**
 * 슬러그에 안전한 문자열로 정리한다.
 * @param {string} value - 원본 슬러그
 * @returns {string} 정리된 슬러그
 */
const sanitizeSlug = (value) => String(value || '')
  .trim()
  .toLowerCase()
  .normalize('NFC')
  .replace(/[^a-z0-9가-힣]+/g, '-')
  .replace(/-+/g, '-')
  .replace(/^-|-$/g, '')
  || 'imported-post'

/**
 * YAML 문자열 따옴표를 해제한다.
 * @param {string} value - YAML 값
 * @returns {string} 문자열 값
 */
const unquoteYamlString = (value) => {
  const trimmed = String(value || '').trim()

  if (!trimmed) {
    return ''
  }

  if (trimmed.startsWith('"') && trimmed.endsWith('"')) {
    return trimmed
      .slice(1, -1)
      .replace(/\\"/g, '"')
      .replace(/\\\\/g, '\\')
  }

  return trimmed
}

/**
 * YAML 배열 값을 파싱한다.
 * @param {string} value - YAML 배열 문자열
 * @returns {Array<string>} 문자열 배열
 */
const parseYamlArray = (value) => {
  const trimmed = String(value || '').trim()

  if (!trimmed || trimmed === '[]') {
    return []
  }

  if (!trimmed.startsWith('[') || !trimmed.endsWith(']')) {
    return []
  }

  const inner = trimmed.slice(1, -1).trim()
  if (!inner) {
    return []
  }

  const values = []
  let current = ''
  let inQuote = false
  let escaped = false

  for (const char of inner) {
    if (escaped) {
      current += char
      escaped = false
      continue
    }

    if (char === '\\') {
      current += char
      escaped = true
      continue
    }

    if (char === '"') {
      current += char
      inQuote = !inQuote
      continue
    }

    if (char === ',' && !inQuote) {
      values.push(unquoteYamlString(current))
      current = ''
      continue
    }

    current += char
  }

  if (current.trim()) {
    values.push(unquoteYamlString(current))
  }

  return values.map((item) => item.trim()).filter(Boolean)
}

/**
 * YAML 단일 값을 파싱한다.
 * @param {string} value - YAML 값
 * @returns {unknown} 파싱된 값
 */
const parseYamlValue = (value) => {
  const trimmed = String(value || '').trim()

  if (trimmed === 'null') {
    return null
  }

  if (trimmed === 'true') {
    return true
  }

  if (trimmed === 'false') {
    return false
  }

  if (trimmed.startsWith('[')) {
    return parseYamlArray(trimmed)
  }

  return unquoteYamlString(trimmed)
}

/**
 * Markdown frontmatter와 본문을 분리한다.
 * @param {string} markdown - Markdown 문서
 * @returns {{ frontmatter: Object, content: string }} 분리 결과
 */
const parseMarkdownDocument = (markdown) => {
  const normalized = String(markdown || '').replace(/^\uFEFF/, '')

  if (!normalized.startsWith('---\n')) {
    return {
      frontmatter: {},
      content: normalized
    }
  }

  const endIndex = normalized.indexOf('\n---', 4)

  if (endIndex < 0) {
    return {
      frontmatter: {},
      content: normalized
    }
  }

  const frontmatterText = normalized.slice(4, endIndex)
  const content = normalized.slice(endIndex + 4).replace(/^\n/, '')
  const frontmatter = {}

  for (const line of frontmatterText.split('\n')) {
    const separatorIndex = line.indexOf(':')
    if (separatorIndex < 0) {
      continue
    }

    const key = line.slice(0, separatorIndex).trim()
    const value = line.slice(separatorIndex + 1)

    if (key) {
      frontmatter[key] = parseYamlValue(value)
    }
  }

  return {
    frontmatter,
    content
  }
}

/**
 * ZIP 엔트리를 경로 기준 Map으로 만든다.
 * @param {Array<{ path: string, data: Buffer }>} entries - ZIP 엔트리
 * @returns {Map<string, Buffer>} 엔트리 맵
 */
const createZipEntryMap = (entries) => new Map(entries.map((entry) => [entry.path, entry.data]))

/**
 * Markdown 파일의 상위 폴더를 조회한다.
 * @param {string} path - ZIP 내부 경로
 * @returns {string} 상위 폴더
 */
const getPostFolder = (path) => {
  const parts = String(path || '').split('/').filter(Boolean)
  parts.pop()
  return parts.join('/')
}

/**
 * 자산 경로를 Markdown 파일 기준 ZIP 엔트리 경로로 해석한다.
 * @param {string} postFolder - 게시물 폴더
 * @param {string} assetPath - Markdown 안의 자산 경로
 * @returns {string} ZIP 엔트리 경로
 */
const resolveAssetEntryPath = (postFolder, assetPath) => {
  const cleaned = String(assetPath || '')
    .split(/[?#]/)[0]
    .replace(/^\.\/+/, '')
    .replace(/^\/+/, '')

  if (!cleaned) {
    return ''
  }

  const base = postFolder ? `${postFolder}/${cleaned}` : cleaned

  return base
    .split('/')
    .filter((part) => part && part !== '.' && part !== '..')
    .join('/')
}

/**
 * 저장할 고유 파일명을 고른다.
 * @param {string} directoryPath - 저장 디렉터리
 * @param {string} originalName - 원본 파일명
 * @returns {Promise<{ fileName: string, filePath: string }>} 저장 파일명과 경로
 */
const pickUniqueDiskFileName = async (directoryPath, originalName) => {
  const extension = extname(originalName || '') || '.bin'
  const stem = sanitizeFilenameSegment(String(originalName || '').replace(/\.[^.]+$/g, '')) || 'asset'
  let suffix = 1

  while (suffix < 10000) {
    const fileName = suffix === 1 ? `${stem}${extension}` : `${stem}-${suffix}${extension}`
    const filePath = join(directoryPath, fileName)

    try {
      await stat(filePath)
      suffix += 1
    } catch {
      return {
        fileName,
        filePath
      }
    }
  }

  throw new Error('IMPORT_ASSET_FILENAME_FAILED')
}

/**
 * Import 자산을 업로드 폴더에 저장한다.
 * @param {Object} input - 저장 입력
 * @param {Map<string, Buffer>} input.entryMap - ZIP 엔트리 맵
 * @param {string} input.postFolder - 게시물 폴더
 * @param {Set<string>} input.assetPaths - 자산 경로 목록
 * @returns {Promise<Map<string, string>>} 원본 경로별 새 URL
 */
const saveImportAssets = async ({ entryMap, postFolder, assetPaths }) => {
  const now = new Date()
  const year = String(now.getFullYear())
  const month = String(now.getMonth() + 1).padStart(2, '0')
  const directoryPath = join(process.cwd(), 'public', 'uploads', 'posts', year, month)
  const replacements = new Map()

  await mkdir(directoryPath, { recursive: true })

  for (const assetPath of assetPaths) {
    const entryPath = resolveAssetEntryPath(postFolder, assetPath)
    const data = entryMap.get(entryPath)

    if (!data) {
      continue
    }

    const { fileName, filePath } = await pickUniqueDiskFileName(directoryPath, basename(entryPath))
    await writeFile(filePath, data)

    const publicUrl = `${UPLOAD_BASE_URL}/posts/${year}/${month}/${fileName}`
    await upsertMediaMetadataCategory(publicUrl, '미분류')

    replacements.set(assetPath, publicUrl)
    replacements.set(assetPath.replace(/^\.\//, ''), publicUrl)
    replacements.set(`./${assetPath.replace(/^\.\//, '')}`, publicUrl)
  }

  return replacements
}

/**
 * Markdown 안의 로컬 자산 경로를 새 업로드 URL로 교체한다.
 * @param {string} content - 원본 본문
 * @param {Map<string, string>} replacements - 경로 교체 맵
 * @returns {string} 교체된 본문
 */
const replaceAssetPaths = (content, replacements) => {
  let next = String(content || '')
  const entries = [...replacements.entries()]
    .sort((a, b) => b[0].length - a[0].length)

  for (const [source, target] of entries) {
    next = next.split(source).join(target)
  }

  return next
}

/**
 * Import 대상 자산 경로를 수집한다.
 * @param {Object} input - 수집 입력
 * @param {Object} input.frontmatter - frontmatter
 * @param {string} input.content - 본문
 * @returns {Set<string>} 자산 경로 목록
 */
const collectImportAssetPaths = ({ frontmatter, content }) => {
  const paths = new Set()
  const localAssetPattern = /(?:\.\/)?(?:images|files)\/[^\s"'<>)]*/g

  for (const match of String(content || '').match(localAssetPattern) || []) {
    paths.add(match)
  }

  for (const key of ['featured_image', 'og_image']) {
    const value = frontmatter[key]
    if (typeof value === 'string' && /^(?:\.\/)?(?:images|files)\//.test(value)) {
      paths.add(value)
    }
  }

  return paths
}

/**
 * 게시물 슬러그 중복을 피한다.
 * @param {string} baseSlug - 기준 슬러그
 * @param {Set<string>} reservedSlugs - 이번 Import에서 예약된 슬러그
 * @returns {Promise<string>} 고유 슬러그
 */
const createUniquePostSlug = async (baseSlug, reservedSlugs) => {
  const sql = getPostgresClient()
  const base = sanitizeSlug(baseSlug)
  let next = base
  let suffix = 2

  while (reservedSlugs.has(next)) {
    next = `${base}-${suffix}`
    suffix += 1
  }

  if (!sql) {
    reservedSlugs.add(next)
    return next
  }

  while (suffix < 10000) {
    const rows = await sql`
      SELECT 1
      FROM posts
      WHERE slug = ${next}
      LIMIT 1
    `

    if (!rows.length && !reservedSlugs.has(next)) {
      reservedSlugs.add(next)
      return next
    }

    next = `${base}-${suffix}`
    suffix += 1
  }

  throw new Error('IMPORT_SLUG_FAILED')
}

/**
 * 게시물 상태를 Import 가능한 값으로 정리한다.
 * @param {unknown} value - 상태 값
 * @returns {'published'|'draft'|'members'|'private'} 게시물 상태
 */
const normalizePostStatus = (value) => {
  const status = String(value || '').trim()

  if (['published', 'draft', 'members', 'private'].includes(status)) {
    return status
  }

  return 'draft'
}

/**
 * frontmatter 이미지 값을 Import 후 URL로 정리한다.
 * @param {unknown} value - frontmatter 이미지 값
 * @param {Map<string, string>} replacements - 자산 교체 맵
 * @returns {string|null} 저장할 이미지 URL
 */
const resolveImportedImageUrl = (value, replacements) => {
  if (typeof value !== 'string' || !value.trim()) {
    return null
  }

  const trimmed = value.trim()
  const replaced = replacements.get(trimmed)

  if (replaced) {
    return replaced
  }

  if (/^(?:\.\/)?(?:images|files)\//.test(trimmed)) {
    return null
  }

  return trimmed
}

/**
 * ISO 날짜 문자열을 정리한다.
 * @param {unknown} value - 날짜 값
 * @returns {string|null} ISO 문자열
 */
const normalizeIsoDate = (value) => {
  if (!value) {
    return null
  }

  const date = new Date(String(value))

  if (Number.isNaN(date.getTime())) {
    return null
  }

  return date.toISOString()
}

/**
 * ZIP 엔트리에서 Markdown 게시물 목록을 만든다.
 * @param {Array<{ path: string, data: Buffer }>} entries - ZIP 엔트리
 * @returns {Array<Object>} Markdown 문서 목록
 */
const collectMarkdownPosts = (entries) => entries
  .filter((entry) => MARKDOWN_EXTENSION_PATTERN.test(entry.path))
  .map((entry) => ({
    path: entry.path,
    postFolder: getPostFolder(entry.path),
    ...parseMarkdownDocument(entry.data.toString('utf8'))
  }))

/**
 * Export ZIP을 게시물로 가져온다.
 * @param {{ zipBuffer: Buffer, authorId: string }} input - Import 입력
 * @returns {Promise<{ importedCount: number, assetCount: number, posts: Array<Object> }>} Import 결과
 */
export const importPostsFromExportZip = async ({ zipBuffer, authorId }) => {
  const entries = readZipBufferEntries(zipBuffer)
  const entryMap = createZipEntryMap(entries)
  const markdownPosts = collectMarkdownPosts(entries)

  if (!markdownPosts.length) {
    throw new Error('IMPORT_MARKDOWN_NOT_FOUND')
  }

  if (markdownPosts.length > MAX_IMPORT_POSTS) {
    throw new Error('IMPORT_POST_LIMIT_EXCEEDED')
  }

  const reservedSlugs = new Set()
  const importedPosts = []
  let importedAssetCount = 0

  for (const markdownPost of markdownPosts) {
    const { frontmatter, content, postFolder } = markdownPost
    const assetPaths = collectImportAssetPaths({ frontmatter, content })
    const replacements = await saveImportAssets({
      entryMap,
      postFolder,
      assetPaths
    })
    importedAssetCount += replacements.size ? new Set(replacements.values()).size : 0

    const title = String(frontmatter.title || basename(markdownPost.path).replace(MARKDOWN_EXTENSION_PATTERN, '') || 'Imported Post').trim()
    const slug = await createUniquePostSlug(frontmatter.slug || title, reservedSlugs)
    const featuredImage = resolveImportedImageUrl(frontmatter.featured_image, replacements)
    const ogImage = resolveImportedImageUrl(frontmatter.og_image, replacements)
    const status = normalizePostStatus(frontmatter.status)
    const publishedAt = status === 'published' || status === 'members'
      ? normalizeIsoDate(frontmatter.published_at) || new Date().toISOString()
      : normalizeIsoDate(frontmatter.published_at)

    const post = await createAdminPost({
      title,
      slug,
      content: replaceAssetPaths(content, replacements),
      excerpt: String(frontmatter.excerpt || ''),
      featuredImage,
      isFeatured: false,
      seoTitle: String(frontmatter.seo_title || ''),
      seoDescription: String(frontmatter.seo_description || ''),
      canonicalUrl: String(frontmatter.canonical_url || ''),
      noindex: Boolean(frontmatter.noindex),
      ogImage,
      status,
      publishedAt,
      tags: Array.isArray(frontmatter.tags) ? frontmatter.tags : []
    }, authorId)

    importedPosts.push(post)
  }

  return {
    importedCount: importedPosts.length,
    assetCount: importedAssetCount,
    posts: importedPosts.map((post) => ({
      id: post.id,
      title: post.title,
      slug: post.slug
    }))
  }
}