TH1/Tools/apply_translations_full.py

# -*- coding: utf-8 -*-
"""执行翻译填充: 读取 Multilingual.xlsx,写回繁中/英/日/韩列"""
import sys, io, re, shutil, os
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')

import openpyxl
import opencc
from translate_data import (
    KEEP_RAW_IDS, ZH_TW_EN_ONLY_IDS, TRANSLATIONS, VERSION_LOG_EN,
)

EXCEL = 'Multilingual.xlsx'
BACKUP = 'Multilingual.backup_before_trans.xlsx'

# 占位符正则: **<任意内容>** → **<>**
PLACEHOLDER = re.compile(r'\*\*<[^>]*>\*\*')

def count_placeholders(text):
    if not text:
        return 0
    return len(re.findall(r'\*\*<[^>]*>\*\*', text))

def main():
    if not os.path.exists(BACKUP):
        shutil.copy(EXCEL, BACKUP)
        print(f'[备份] {EXCEL} -> {BACKUP}')

    cc_s2tw = opencc.OpenCC('s2tw')

    wb = openpyxl.load_workbook(EXCEL)
    ws = wb['Sheet']

    COL_ID = 1
    COL_ZH = 3
    COL_ZHTW = 4
    COL_EN = 5
    COL_JA = 6
    COL_KO = 7

    stats = {'raw': 0, 'log': 0, 'translated': 0, 'missing': [], 'placeholder_warn': []}

    for row_idx in range(2, ws.max_row + 1):
        id_cell = ws.cell(row=row_idx, column=COL_ID).value
        if not id_cell:
            continue
        id_str = str(id_cell).lstrip('').strip()
        zh = ws.cell(row=row_idx, column=COL_ZH).value
        if zh is None:
            zh = ''

        # 1) 异常数据/Staff姓名: 四语言原样
        if id_str in KEEP_RAW_IDS:
            ws.cell(row=row_idx, column=COL_ZHTW).value = zh
            ws.cell(row=row_idx, column=COL_EN).value = zh
            ws.cell(row=row_idx, column=COL_JA).value = zh
            ws.cell(row=row_idx, column=COL_KO).value = zh
            stats['raw'] += 1
            continue

        # 2) 版本日志: 仅繁中+英文
        if id_str in ZH_TW_EN_ONLY_IDS:
            zh_tw = cc_s2tw.convert(zh)
            ws.cell(row=row_idx, column=COL_ZHTW).value = zh_tw
            ws.cell(row=row_idx, column=COL_EN).value = VERSION_LOG_EN
            ws.cell(row=row_idx, column=COL_JA).value = None
            ws.cell(row=row_idx, column=COL_KO).value = None
            stats['log'] += 1
            continue

        # 3) 常规条目
        zh_tw = cc_s2tw.convert(zh)
        ws.cell(row=row_idx, column=COL_ZHTW).value = zh_tw

        if id_str not in TRANSLATIONS:
            stats['missing'].append((row_idx, id_str, zh[:50]))
            continue

        en, ja, ko = TRANSLATIONS[id_str]
        ws.cell(row=row_idx, column=COL_EN).value = en
        ws.cell(row=row_idx, column=COL_JA).value = ja
        ws.cell(row=row_idx, column=COL_KO).value = ko

        zh_n = count_placeholders(zh)
        for lang, text in [('TW', zh_tw), ('EN', en), ('JA', ja), ('KO', ko)]:
            if count_placeholders(text) != zh_n:
                stats['placeholder_warn'].append(
                    f'ID={id_str} {lang} 占位符数={count_placeholders(text)} 应={zh_n}'
                )
        stats['translated'] += 1

    wb.save(EXCEL)

    print(f'\n[完成]')
    print(f'  原样填充 (Staff/异常): {stats["raw"]}')
    print(f'  仅繁中+英文 (版本日志): {stats["log"]}')
    print(f'  四语言翻译: {stats["translated"]}')
    print(f'  缺翻译: {len(stats["missing"])}')
    for r, i, z in stats['missing']:
        print(f'    Row {r} ID={i}: {z}')
    if stats['placeholder_warn']:
        print(f'\n[占位符警告]')
        for w in stats['placeholder_warn']:
            print(f'  {w}')
    else:
        print(f'  占位符数全部对齐 OK')

if __name__ == '__main__':
    main()