1569 lines
51 KiB
Python
1569 lines
51 KiB
Python
#!/usr/bin/env python3
|
||
from __future__ import annotations
|
||
|
||
import importlib.util
|
||
import json
|
||
import os
|
||
import re
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
|
||
ROOT = Path(__file__).resolve().parents[1]
|
||
GEO_START_ID = 3771
|
||
PY_TMP_PACKAGES = Path(os.environ.get("TEMP", ".")) / "codex_py_pkgs_pypinyin"
|
||
|
||
|
||
def load_module(name: str, path: Path):
|
||
spec = importlib.util.spec_from_file_location(name, path)
|
||
module = importlib.util.module_from_spec(spec)
|
||
assert spec.loader is not None
|
||
spec.loader.exec_module(module)
|
||
return module
|
||
|
||
|
||
def yaml_quote(value: str) -> str:
|
||
if value == "":
|
||
return ""
|
||
return json.dumps(value, ensure_ascii=True)
|
||
|
||
|
||
def decode_yaml_escapes(value: str) -> str:
|
||
simple_escapes = {
|
||
"0": "\0",
|
||
"a": "\a",
|
||
"b": "\b",
|
||
"t": "\t",
|
||
"\t": "\t",
|
||
"n": "\n",
|
||
"v": "\v",
|
||
"f": "\f",
|
||
"r": "\r",
|
||
"e": "\x1b",
|
||
" ": " ",
|
||
'"': '"',
|
||
"/": "/",
|
||
"\\": "\\",
|
||
"N": "\x85",
|
||
"_": "\xa0",
|
||
"L": "\u2028",
|
||
"P": "\u2029",
|
||
}
|
||
result: list[str] = []
|
||
index = 0
|
||
while index < len(value):
|
||
char = value[index]
|
||
if char != "\\" or index + 1 >= len(value):
|
||
result.append(char)
|
||
index += 1
|
||
continue
|
||
|
||
escape = value[index + 1]
|
||
if escape in simple_escapes:
|
||
result.append(simple_escapes[escape])
|
||
index += 2
|
||
continue
|
||
|
||
width_by_escape = {"x": 2, "u": 4, "U": 8}
|
||
width = width_by_escape.get(escape)
|
||
if width is not None:
|
||
start = index + 2
|
||
chunk = value[start : start + width]
|
||
if len(chunk) == width and re.fullmatch(r"[0-9a-fA-F]+", chunk):
|
||
result.append(chr(int(chunk, 16)))
|
||
index = start + width
|
||
continue
|
||
|
||
result.append("\\" + escape)
|
||
index += 2
|
||
return "".join(result)
|
||
|
||
|
||
def decode_yaml_value(lines: list[str]) -> str:
|
||
if not lines:
|
||
return ""
|
||
parts = "\n".join(lines).split("\n")
|
||
value = parts[0].strip()
|
||
if len(parts) > 1:
|
||
value += " " + " ".join(part.strip() for part in parts[1:])
|
||
value = value.strip()
|
||
if value.startswith('"') and value.endswith('"'):
|
||
try:
|
||
return json.loads(value)
|
||
except json.JSONDecodeError:
|
||
return decode_yaml_escapes(value[1:-1])
|
||
if value.startswith('"'):
|
||
value = value[1:]
|
||
if value.endswith('"'):
|
||
value = value[:-1]
|
||
return decode_yaml_escapes(value)
|
||
return value
|
||
|
||
|
||
FIELD_START_RE = re.compile(r"^ ([A-Za-z][A-Za-z0-9]*):\s*(.*)$")
|
||
ITEM_START_RE = re.compile(r"^ - ID: (\d+)$", re.M)
|
||
CJK_RE = re.compile(r"[\u3400-\u9fff]")
|
||
LATIN_RE = re.compile(r"[A-Za-z]{2,}")
|
||
|
||
|
||
def parse_multilingual_asset(text: str) -> tuple[dict[int, tuple[int, int, str]], dict[int, dict[str, str]]]:
|
||
starts = [(m.start(), int(m.group(1))) for m in ITEM_START_RE.finditer(text)]
|
||
starts.append((len(text), -1))
|
||
blocks: dict[int, tuple[int, int, str]] = {}
|
||
items: dict[int, dict[str, str]] = {}
|
||
for idx in range(len(starts) - 1):
|
||
start, item_id = starts[idx]
|
||
end = starts[idx + 1][0]
|
||
block = text[start:end]
|
||
blocks[item_id] = (start, end, block)
|
||
items[item_id] = parse_multilingual_block(block)
|
||
return blocks, items
|
||
|
||
|
||
def parse_multilingual_block(block: str) -> dict[str, str]:
|
||
lines = block.split("\n")
|
||
fields: dict[str, str] = {}
|
||
index = 1
|
||
while index < len(lines):
|
||
match = FIELD_START_RE.match(lines[index])
|
||
if not match:
|
||
index += 1
|
||
continue
|
||
key = match.group(1)
|
||
values = [match.group(2)]
|
||
next_index = index + 1
|
||
while (
|
||
next_index < len(lines)
|
||
and not FIELD_START_RE.match(lines[next_index])
|
||
and not lines[next_index].startswith(" - ID:")
|
||
):
|
||
values.append(lines[next_index])
|
||
next_index += 1
|
||
fields[key] = decode_yaml_value(values)
|
||
index = next_index
|
||
return fields
|
||
|
||
|
||
def render_multilingual_block(block: str, updates: dict[str, str]) -> str:
|
||
lines = block.split("\n")
|
||
result: list[str] = []
|
||
index = 0
|
||
while index < len(lines):
|
||
match = FIELD_START_RE.match(lines[index])
|
||
if not match or match.group(1) not in updates:
|
||
result.append(lines[index])
|
||
index += 1
|
||
continue
|
||
key = match.group(1)
|
||
result.append(f" {key}: {yaml_quote(updates[key])}")
|
||
index += 1
|
||
while index < len(lines) and not FIELD_START_RE.match(lines[index]) and not lines[index].startswith(" - ID:"):
|
||
index += 1
|
||
return "\n".join(result)
|
||
|
||
|
||
def parse_geo_export_refs(geo_text: str) -> dict[int, tuple[int, int]]:
|
||
row_re = re.compile(
|
||
r"^ - Id: (\d+)\n"
|
||
r" GeoBigClass: \d+\n"
|
||
r" GeoSmallClass: \d+\n"
|
||
r" CivEnum: \d+\n"
|
||
r" NearbyCity:.*?\n"
|
||
r" GeoName: (\d+)\n"
|
||
r" GeoDesc: (\d+)",
|
||
re.M | re.S,
|
||
)
|
||
return {
|
||
int(match.group(1)): (int(match.group(2)), int(match.group(3)))
|
||
for match in row_re.finditer(geo_text)
|
||
}
|
||
|
||
|
||
def build_known_term_maps(items: dict[int, dict[str, str]], gen, excluded_ids: set[int]) -> dict[str, dict[str, str]]:
|
||
known = {lang: {} for lang in ["EN", "JP", "KR"]}
|
||
for item_id, item in items.items():
|
||
if item_id in excluded_ids:
|
||
continue
|
||
zh = item.get("ZH", "")
|
||
if not zh or len(zh) > 18:
|
||
continue
|
||
en = item.get("EN", "")
|
||
if en and not CJK_RE.search(en) and en != zh:
|
||
known["EN"][zh] = en
|
||
jp = item.get("JP", "")
|
||
if jp and jp != zh:
|
||
known["JP"][zh] = jp
|
||
kr = item.get("KR", "")
|
||
if kr and kr != zh and not CJK_RE.search(kr):
|
||
known["KR"][zh] = kr
|
||
for civ, data in gen.CIVS.items():
|
||
zh = data["cn"]
|
||
known["EN"].setdefault(zh, civ)
|
||
known["JP"].setdefault(zh, civ)
|
||
known["KR"].setdefault(zh, civ)
|
||
return known
|
||
|
||
|
||
def make_term_maps(app) -> tuple[dict[str, str], dict[str, str], dict[str, str], dict[str, dict[str, str]]]:
|
||
term_en = dict(app.TERM_EN)
|
||
term_en.update({"冻土": "permafrost", "峡湾": "fjord", "沼泽": "marsh", "岛屿": "island", "半岛": "peninsula"})
|
||
term_jp = dict(app.TERM_JP)
|
||
term_jp.update({"冻土": "永久凍土", "峡湾": "フィヨルド", "沼泽": "沼地", "岛屿": "島", "半岛": "半島"})
|
||
term_kr = dict(app.TERM_KR)
|
||
term_kr.update({"冻土": "영구동토", "峡湾": "피오르", "沼泽": "늪지", "岛屿": "섬", "半岛": "반도"})
|
||
|
||
common_en = {
|
||
"低洼田": "Lowland Fields",
|
||
"农耕低地": "Farming Lowlands",
|
||
"冲积平原": "Alluvial Plains",
|
||
"冲积区": "Alluvial Zone",
|
||
"冲积地": "Alluvial Land",
|
||
"冲积土": "Alluvial Soil",
|
||
"冲积台地": "Alluvial Terrace",
|
||
"洪泛平原": "Floodplain",
|
||
"洪泛地": "Floodplain",
|
||
"湿润平原": "Wet Plain",
|
||
"平原": "Plains",
|
||
"低地": "Lowlands",
|
||
"沃野": "Fertile Fields",
|
||
"田野": "Fields",
|
||
"农田": "Fields",
|
||
"田": "Fields",
|
||
"水田": "Paddies",
|
||
"稻作区": "Rice Fields",
|
||
"稻田": "Rice Fields",
|
||
"牧场": "Pasture",
|
||
"草场": "Grassland",
|
||
"草甸": "Meadow",
|
||
"草地": "Grassland",
|
||
"草原": "Grassland",
|
||
"牧草地": "Pasture",
|
||
"旱地": "Drylands",
|
||
"荒原": "Wilderness",
|
||
"荒漠": "Desert",
|
||
"沙漠": "Desert",
|
||
"沙地": "Sands",
|
||
"沙海": "Sand Sea",
|
||
"沙路": "Desert Route",
|
||
"沙丘": "Dunes",
|
||
"碎石漠": "Gravel Desert",
|
||
"盐漠": "Salt Desert",
|
||
"盐路": "Salt Road",
|
||
"盐田": "Salt Fields",
|
||
"盐井": "Salt Wells",
|
||
"盐矿": "Salt Mine",
|
||
"盐水": "Saltwater",
|
||
"山脉": "Mountains",
|
||
"山地": "Highlands",
|
||
"山口": "Mountain Pass",
|
||
"山路": "Mountain Road",
|
||
"山麓": "Foothills",
|
||
"山前": "Foothills",
|
||
"前丘": "Foothills",
|
||
"余脉": "Foothills",
|
||
"边山": "Border Mountains",
|
||
"山": "Mountain",
|
||
"峰": "Peak",
|
||
"火山": "Volcano",
|
||
"圣峰": "Sacred Peak",
|
||
"丘陵": "Hills",
|
||
"丘地": "Hills",
|
||
"丘": "Hill",
|
||
"高地": "Highlands",
|
||
"台地": "Plateau",
|
||
"土丘": "Mound",
|
||
"城丘": "City Mound",
|
||
"圣丘": "Sacred Mound",
|
||
"王陵高地": "Royal Tomb Heights",
|
||
"王冢": "Royal Tomb",
|
||
"断崖": "Escarpment",
|
||
"悬崖": "Cliffs",
|
||
"崖丘": "Cliff Hills",
|
||
"峡谷": "Canyon",
|
||
"峡口": "Gorge Pass",
|
||
"谷地": "Valley",
|
||
"谷": "Valley",
|
||
"河谷": "River Valley",
|
||
"盆地": "Basin",
|
||
"湖盆": "Lake Basin",
|
||
"高原": "Plateau",
|
||
"源头山": "Headwater Mountains",
|
||
"河口平原": "River Mouth Plain",
|
||
"河口": "River Mouth",
|
||
"河段": "River Reach",
|
||
"河岸平原": "Riverbank Plains",
|
||
"河岸": "Riverbank",
|
||
"河滩": "Riverbank",
|
||
"河畔": "Riverside",
|
||
"河湾": "River Bend",
|
||
"河阶地": "River Terrace",
|
||
"河网": "River Network",
|
||
"河道": "River Channel",
|
||
"支流": "Tributary",
|
||
"汇流处": "Confluence",
|
||
"溪流": "Stream",
|
||
"溪谷": "Stream Valley",
|
||
"水道": "Waterway",
|
||
"水路": "Waterway",
|
||
"水网": "Water Network",
|
||
"水脉": "Water Vein",
|
||
"水渠": "Canal",
|
||
"圣渠": "Sacred Canal",
|
||
"灌渠": "Irrigation Canal",
|
||
"运河": "Canal",
|
||
"渠": "Canal",
|
||
"渠网": "Canal Network",
|
||
"渠塘": "Canal Ponds",
|
||
"排水渠": "Drainage Canal",
|
||
"排水泽": "Drainage Marsh",
|
||
"沟渠": "Ditches",
|
||
"边境沟渠": "Border Ditches",
|
||
"水坝水道": "Dam Waterway",
|
||
"水闸": "Sluice",
|
||
"水井": "Well",
|
||
"井泉": "Springs",
|
||
"泉渠": "Spring Canals",
|
||
"泉眼": "Spring",
|
||
"泉地": "Springland",
|
||
"泉田": "Spring Fields",
|
||
"井田": "Well Fields",
|
||
"水塘": "Pond",
|
||
"蓄水池": "Reservoir",
|
||
"水池": "Pool",
|
||
"圣池": "Sacred Pool",
|
||
"圣井": "Sacred Well",
|
||
"天然井": "Cenote",
|
||
"水洞": "Water Cave",
|
||
"护城河": "Moat",
|
||
"湖群": "Lakes",
|
||
"湖区": "Lake District",
|
||
"湖滨": "Lakeside",
|
||
"湖岸": "Lakeshore",
|
||
"湖畔": "Lakeside",
|
||
"湖港": "Lake Port",
|
||
"湖上": "Lake",
|
||
"湖沼": "Lakes and Marshes",
|
||
"湖": "Lake",
|
||
"沼湖": "Marsh Lake",
|
||
"湿湖": "Wet Lake",
|
||
"泻湖": "Lagoon",
|
||
"潟湖": "Lagoon",
|
||
"咸湖": "Salt Lake",
|
||
"古湖": "Old Lake",
|
||
"池湖": "Pool Lake",
|
||
"河湾湖": "River Bend Lake",
|
||
"低地湖": "Lowland Lake",
|
||
"季节湖": "Seasonal Lake",
|
||
"沼泽边地": "Marsh Borderlands",
|
||
"沼泽": "Marsh",
|
||
"湿地": "Wetland",
|
||
"湿原": "Wetland",
|
||
"芦苇荡": "Reed Marsh",
|
||
"芦苇泽": "Reed Marsh",
|
||
"芦苇带": "Reed Belt",
|
||
"苇田": "Reed Fields",
|
||
"水鸟湿地": "Waterbird Wetland",
|
||
"季节湿地": "Seasonal Wetland",
|
||
"湖滨湿地": "Lakeside Wetland",
|
||
"河岸湿地": "Riverbank Wetland",
|
||
"河湾湿地": "River Bend Wetland",
|
||
"潮汐林": "Tidal Forest",
|
||
"潮水林": "Tidal Forest",
|
||
"红树林": "Mangrove",
|
||
"海岸红树林": "Coastal Mangrove",
|
||
"河口红树林": "River-Mouth Mangrove",
|
||
"湿草甸": "Wet Meadow",
|
||
"洪泛湖": "Flood Lake",
|
||
"橡树林": "Oak Forest",
|
||
"阔叶林": "Broadleaf Forest",
|
||
"落叶林": "Deciduous Forest",
|
||
"旱林": "Dry Forest",
|
||
"季雨林": "Monsoon Forest",
|
||
"雨林": "Rainforest",
|
||
"热带林": "Tropical Forest",
|
||
"密林": "Dense Forest",
|
||
"丛林": "Jungle",
|
||
"林地": "Woodland",
|
||
"山林": "Mountain Forest",
|
||
"河岸林": "Riparian Forest",
|
||
"谷地林": "Valley Forest",
|
||
"山麓林": "Foothill Forest",
|
||
"果木林": "Orchard Forest",
|
||
"柳林": "Willow Grove",
|
||
"杨树林": "Poplar Grove",
|
||
"松林": "Pine Forest",
|
||
"柏树林": "Cypress Forest",
|
||
"圣林": "Sacred Grove",
|
||
"圣树园": "Sacred Grove",
|
||
"神庙林": "Temple Grove",
|
||
"神庙果园": "Temple Orchard",
|
||
"园林": "Gardens",
|
||
"园圃": "Gardens",
|
||
"果园": "Orchard",
|
||
"枣园": "Date Grove",
|
||
"枣椰园": "Date Palm Grove",
|
||
"棕榈园": "Palm Grove",
|
||
"棕榈绿洲": "Palm Oasis",
|
||
"棕榈林": "Palm Grove",
|
||
"绿洲": "Oasis",
|
||
"井泉绿洲": "Spring Oasis",
|
||
"灌木绿洲": "Shrub Oasis",
|
||
"水渠果园": "Canal Orchard",
|
||
"水苑": "Water Garden",
|
||
"悬园水苑": "Hanging Garden Water Park",
|
||
"王家林": "Royal Grove",
|
||
"王家林苑": "Royal Grove",
|
||
"古林": "Ancient Forest",
|
||
"常绿林": "Evergreen Forest",
|
||
"常绿": "Evergreen",
|
||
"高冠林": "High-Canopy Forest",
|
||
"云雾林": "Cloud Forest",
|
||
"林": "Forest",
|
||
"外海": "Offshore Waters",
|
||
"远海": "Open Sea",
|
||
"海域": "Sea",
|
||
"海岸": "Coast",
|
||
"北岸": "North Coast",
|
||
"南岸": "South Coast",
|
||
"东岸": "East Bank",
|
||
"西岸": "West Bank",
|
||
"海湾": "Bay",
|
||
"湾": "Bay",
|
||
"潮滩": "Tidal Flats",
|
||
"泥滩": "Mudflats",
|
||
"海口": "Sea Mouth",
|
||
"海峡": "Strait",
|
||
"航道": "Sea Route",
|
||
"商船道": "Merchant Sea Route",
|
||
"航线": "Sea Route",
|
||
"商路": "Trade Route",
|
||
"贸易海岸": "Trade Coast",
|
||
"外贸海路": "Foreign Trade Sea Route",
|
||
"远洋商路": "Overseas Trade Route",
|
||
"远航海": "Voyage Sea",
|
||
"季风海": "Monsoon Sea",
|
||
"黄金海": "Gold Sea",
|
||
"海门": "Sea Gate",
|
||
"港": "Port",
|
||
"古港": "Old Port",
|
||
"泊地": "Anchorage",
|
||
"码头": "Dock",
|
||
"港口": "Port",
|
||
"港湾": "Harbor",
|
||
"港区": "Harbor District",
|
||
"上游": "Upstream",
|
||
"中游": "Midstream",
|
||
"下游": "Downstream",
|
||
"边地": "Borderlands",
|
||
"外缘": "Outer Edge",
|
||
"东段": "East Section",
|
||
"西段": "West Section",
|
||
"北缘": "Northern Edge",
|
||
"南缘": "Southern Edge",
|
||
"北部": "North",
|
||
"南部": "South",
|
||
"东部": "East",
|
||
"西部": "West",
|
||
"北": "North",
|
||
"南": "South",
|
||
"东": "East",
|
||
"西": "West",
|
||
"旧": "Old",
|
||
"古代": "Ancient",
|
||
"古": "Ancient",
|
||
"王家": "Royal",
|
||
"圣": "Sacred",
|
||
"神庙": "Temple",
|
||
"太阳神": "Sun God",
|
||
"边界": "Boundary",
|
||
"争界": "Disputed Boundary",
|
||
"前线": "Frontier",
|
||
"前哨": "Outpost",
|
||
"城郊": "Outskirts",
|
||
"周边": "Surroundings",
|
||
"贸易": "Trade",
|
||
"商旅": "Caravan",
|
||
"驿站": "Relay Station",
|
||
"驿路": "Relay Route",
|
||
"驼队": "Caravan",
|
||
"补给": "Supply",
|
||
"外贸": "Foreign Trade",
|
||
"贡品": "Tribute",
|
||
"水城": "Water City",
|
||
"水上": "Waterborne",
|
||
"水利": "Waterworks",
|
||
"水库": "Reservoir",
|
||
"梯田": "Terraces",
|
||
"水车": "Watermill",
|
||
"磨坊": "Mill",
|
||
"粮仓": "Granary",
|
||
"晒场": "Drying Yard",
|
||
"圩田": "Polder Fields",
|
||
"围栏": "Enclosure",
|
||
"农庄": "Farmstead",
|
||
"田庄": "Estate",
|
||
"菜园": "Vegetable Garden",
|
||
"市场": "Market",
|
||
"大集市": "Grand Bazaar",
|
||
"市集": "Market",
|
||
"巴扎": "Bazaar",
|
||
"商埠": "Trading Port",
|
||
"货栈": "Warehouse",
|
||
"交易所": "Exchange",
|
||
"工坊": "Workshop",
|
||
"街区": "Quarter",
|
||
"商街": "Market Street",
|
||
"陶器": "Pottery",
|
||
"织品": "Textiles",
|
||
"铜器": "Bronze Ware",
|
||
"书吏": "Scribe",
|
||
"手工业": "Craft",
|
||
"珠宝": "Jewelry",
|
||
"香料": "Spice",
|
||
"谷物": "Grain",
|
||
"牲畜": "Livestock",
|
||
"盐金": "Salt and Gold",
|
||
"石材": "Stone",
|
||
"军港": "Naval Base",
|
||
"军营": "Garrison",
|
||
"堡垒": "Fortress",
|
||
"卫戍": "Garrison",
|
||
"边境": "Frontier",
|
||
"城墙": "City Wall",
|
||
"兵站": "Depot",
|
||
"卫队营": "Guard Camp",
|
||
"要塞": "Fortress",
|
||
"营垒": "Camp",
|
||
"哨堡": "Watch Fort",
|
||
"禁卫军区": "Guard District",
|
||
"战车校场": "Chariot Grounds",
|
||
"弓兵营地": "Archer Camp",
|
||
"石墙卫城": "Stone-Walled Citadel",
|
||
"兵营": "Barracks",
|
||
"营寨": "Camp",
|
||
"关堡": "Pass Fort",
|
||
"兵港": "Military Harbor",
|
||
"防区": "Defense Zone",
|
||
"海防码头": "Coastal Defense Dock",
|
||
"水师": "Navy",
|
||
"军船": "Warship",
|
||
"修造所": "Repair Yard",
|
||
"灯塔": "Lighthouse",
|
||
"卫港": "Guard Harbor",
|
||
"船厂": "Shipyard",
|
||
"船坞": "Dockyard",
|
||
"锚地": "Anchorage",
|
||
"船队": "Fleet",
|
||
"舰队": "Fleet",
|
||
"河舰": "River Fleet",
|
||
"护航": "Escort",
|
||
"水军": "Navy",
|
||
"海防": "Coastal Defense",
|
||
"内河": "Inland River",
|
||
"商船": "Merchant Ships",
|
||
"护卫港": "Guard Harbor",
|
||
"矿场": "Mine",
|
||
"采石场": "Quarry",
|
||
"铜矿坑": "Copper Pit",
|
||
"金矿井": "Gold Mine",
|
||
"银矿山": "Silver Mine",
|
||
"铁矿营地": "Iron Mine Camp",
|
||
"锡矿井": "Tin Mine",
|
||
"玉石矿场": "Jade Mine",
|
||
"黑曜石矿": "Obsidian Mine",
|
||
"绿松石矿": "Turquoise Mine",
|
||
"玄武岩": "Basalt",
|
||
"砂岩": "Sandstone",
|
||
"大理石": "Marble",
|
||
"花岗岩": "Granite",
|
||
"硝石场": "Saltpeter Works",
|
||
"煤矿井": "Coal Mine",
|
||
"宝石矿": "Gem Mine",
|
||
"磨盘石场": "Millstone Quarry",
|
||
"黏土坑": "Clay Pit",
|
||
"赭石矿场": "Ochre Mine",
|
||
"石灰岩": "Limestone",
|
||
"铸币厂": "Mint",
|
||
"青铜工坊": "Bronze Workshop",
|
||
"兵器铸造所": "Weapon Foundry",
|
||
"金银细作坊": "Gold and Silver Workshop",
|
||
"铜器锻坊": "Bronze Forge",
|
||
"铁匠街区": "Blacksmith Quarter",
|
||
"战车轮毂坊": "Chariot Wheel Workshop",
|
||
"钟鼎铸坊": "Bell and Vessel Foundry",
|
||
"甲胄工坊": "Armor Workshop",
|
||
"金工作坊": "Goldsmith Workshop",
|
||
"船钉锻坊": "Ship-Nail Forge",
|
||
"铸炮前坊": "Early Cannon Foundry",
|
||
"贵金属熔炉": "Precious-Metal Furnace",
|
||
"刀剑作坊": "Sword Workshop",
|
||
"马具锻造场": "Tack Forge",
|
||
"陶范铸造坊": "Mold-Casting Workshop",
|
||
"匠人炉区": "Artisan Furnace Quarter",
|
||
"火盆工坊": "Brazier Workshop",
|
||
"祭器铸坊": "Ritual Vessel Foundry",
|
||
"官营冶坊": "State Smeltery",
|
||
"铸造工坊": "Forge",
|
||
"石桥": "Stone Bridge",
|
||
"木桥": "Wooden Bridge",
|
||
"堤道": "Causeway",
|
||
"运河桥": "Canal Bridge",
|
||
"河湾栈桥": "River Bend Trestle",
|
||
"山谷吊桥": "Valley Suspension Bridge",
|
||
"渡槽桥": "Aqueduct Bridge",
|
||
"浮桥": "Pontoon Bridge",
|
||
"关隘桥": "Pass Bridge",
|
||
"城门桥": "Gate Bridge",
|
||
"湖上堤桥": "Lake Causeway Bridge",
|
||
"商路石桥": "Trade Road Stone Bridge",
|
||
"峡口桥": "Gorge Bridge",
|
||
"河口长堤": "River-Mouth Causeway",
|
||
"古道桥": "Old Road Bridge",
|
||
"灌渠桥": "Irrigation Canal Bridge",
|
||
"港区栈桥": "Harbor Trestle",
|
||
"军道桥": "Military Road Bridge",
|
||
"圣路桥": "Sacred Road Bridge",
|
||
"驿道桥": "Relay Road Bridge",
|
||
"桥": "Bridge",
|
||
}
|
||
common_en.update(
|
||
{
|
||
"营地": "Camp",
|
||
"营寨": "Camp",
|
||
"军寨": "Military Fort",
|
||
"军坞": "Naval Dockyard",
|
||
"船坞": "Dockyard",
|
||
"哨港": "Outpost Harbor",
|
||
"兵港": "Military Harbor",
|
||
"防区": "Defense Zone",
|
||
"修造所": "Repair Yard",
|
||
"前哨": "Outpost",
|
||
"卫戍": "Garrison",
|
||
"驻地": "Outpost",
|
||
"塔寨": "Watchtower Fort",
|
||
"军站": "Military Station",
|
||
"护卫营": "Guard Camp",
|
||
"军械": "Armory",
|
||
"王权": "Royal",
|
||
"渠网": "Canal Network",
|
||
"晒场": "Drying Yard",
|
||
"堤坝": "Levee",
|
||
"圩田": "Polder Fields",
|
||
"围栏": "Enclosure",
|
||
"种子": "Seed",
|
||
"仓库": "Storehouse",
|
||
"农场": "Farm",
|
||
"农庄": "Farmstead",
|
||
"祭田": "Ritual Field",
|
||
"菜园": "Vegetable Garden",
|
||
"农渠": "Farm Canal",
|
||
"枢纽": "Hub",
|
||
"市场仓": "Market Storehouse",
|
||
"集市": "Market",
|
||
"驿站": "Relay Station",
|
||
"坊市": "Market",
|
||
"商铺": "Merchant Shop",
|
||
"手工业街": "Craft Street",
|
||
"木场": "Lumberyard",
|
||
"林苑": "Grove",
|
||
"木材场": "Timber Yard",
|
||
"木料栈": "Timber Yard",
|
||
"木工坊": "Wood Workshop",
|
||
"储备林": "Reserve Forest",
|
||
"伐木场": "Logging Camp",
|
||
"木匠坊": "Carpenter Workshop",
|
||
"猎场": "Hunting Ground",
|
||
"烧制场": "Kiln",
|
||
"桨木": "Oarwood",
|
||
"梁木": "Beam Timber",
|
||
"修船所": "Ship Repair Yard",
|
||
"庭园": "Garden",
|
||
"船板": "Plank",
|
||
"保护林": "Preserve",
|
||
"木作坊": "Wood Workshop",
|
||
"转运场": "Depot",
|
||
"林场": "Lumberyard",
|
||
"关口": "Checkpoint",
|
||
"内河港": "Inland River Port",
|
||
"盐船": "Salt Ship",
|
||
"渔船": "Fishing Boat",
|
||
"远航": "Long-Voyage",
|
||
"帆船": "Sailboat",
|
||
"集货": "Cargo",
|
||
"朝圣": "Pilgrim",
|
||
"边境港": "Frontier Port",
|
||
"关税": "Customs",
|
||
"坑": "Pit",
|
||
"采石": "Quarry",
|
||
"前坊": "Early Foundry",
|
||
"细作坊": "Fine Workshop",
|
||
"锻坊": "Forge",
|
||
"轮毂": "Wheel Hub",
|
||
"钟鼎": "Bell and Vessel",
|
||
"甲胄": "Armor",
|
||
"金工": "Goldsmith",
|
||
"船钉": "Ship-Nail",
|
||
"熔炉": "Furnace",
|
||
"马具": "Tack",
|
||
"陶范": "Mold-Casting",
|
||
"炉区": "Furnace Quarter",
|
||
"火盆": "Brazier",
|
||
"祭器": "Ritual Vessel",
|
||
"冶坊": "Smeltery",
|
||
"堤桥": "Causeway Bridge",
|
||
"吊桥": "Suspension Bridge",
|
||
"渡槽": "Aqueduct",
|
||
"浮桥": "Pontoon Bridge",
|
||
"关隘": "Pass",
|
||
"城门": "City Gate",
|
||
"长堤": "Causeway",
|
||
"军道": "Military Road",
|
||
"圣路": "Sacred Road",
|
||
"驿道": "Relay Road",
|
||
"旧道": "Old Channel",
|
||
"三角洲": "Delta",
|
||
"阶地": "Terrace",
|
||
"河": "River",
|
||
"岸": "Bank",
|
||
"源": "Source",
|
||
"道": "Road",
|
||
"游": "Reach",
|
||
"渡": "Ferry",
|
||
"防": "Defense",
|
||
"边防": "Frontier Defense",
|
||
"树": "Tree",
|
||
"作坊": "Workshop",
|
||
"海": "Sea",
|
||
"池": "Pool",
|
||
"区": "Zone",
|
||
"坡": "Slope",
|
||
"间": "Interior",
|
||
"圃": "Garden",
|
||
"室": "House",
|
||
"场": "Yard",
|
||
"段": "Reach",
|
||
"带": "Belt",
|
||
"泽": "Marsh",
|
||
"堤": "Dike",
|
||
"城": "City",
|
||
"大": "Great",
|
||
"石": "Stone",
|
||
"盐": "Salt",
|
||
"井": "Well",
|
||
"口": "Mouth",
|
||
"门": "Gate",
|
||
"湾": "Bay",
|
||
"角": "Cape",
|
||
"洲": "Islet",
|
||
"商港": "Merchant Port",
|
||
"渡口": "Ferry Crossing",
|
||
"河防": "River Defense",
|
||
"边防": "Frontier Defense",
|
||
"沙洲": "Sandbar",
|
||
"稻作": "Rice Fields",
|
||
"牧草": "Pasture",
|
||
"祭典": "Festival",
|
||
"丰收": "Harvest",
|
||
"王室": "Royal",
|
||
"丝绸": "Silk",
|
||
"造船": "Shipbuilding",
|
||
"杉木": "Cedar",
|
||
"木炭": "Charcoal",
|
||
"薪柴": "Firewood",
|
||
"橡木": "Oak",
|
||
"木材": "Timber",
|
||
"木料": "Timber",
|
||
"木作": "Woodwork",
|
||
"栈桥": "Trestle",
|
||
"渡船": "Ferry Boat",
|
||
"粮船": "Grain Ship",
|
||
"大理石矿": "Marble Mine",
|
||
"大理石": "Marble",
|
||
"珍珠海": "Pearl Sea",
|
||
"珍珠": "Pearl",
|
||
"咸水": "Saltwater",
|
||
"南海": "Southern Sea",
|
||
"外贸海": "Foreign Trade Sea",
|
||
"蓄水": "Water Storage",
|
||
"洪水": "Flood",
|
||
"季节": "Seasonal",
|
||
"红海": "Red Sea",
|
||
"地峡": "Isthmus",
|
||
"橄榄": "Olive",
|
||
"月桂": "Laurel",
|
||
"护城": "Moat",
|
||
"稀树": "Savanna",
|
||
"航路": "Sea Route",
|
||
"海路": "Sea Route",
|
||
"外洋": "Open Sea",
|
||
"边缘": "Fringe",
|
||
"草坡": "Grassy Slope",
|
||
"旱": "Dry",
|
||
"灌木": "Shrub",
|
||
"棉": "Cotton",
|
||
"荒丘": "Wasteland Hills",
|
||
"盐丘": "Salt Hills",
|
||
"堤田": "Dike Fields",
|
||
"浮田": "Floating Fields",
|
||
"湖城": "Lake City",
|
||
"皇冢": "Royal Tomb",
|
||
"北门": "North Gate",
|
||
"南门": "South Gate",
|
||
"古道": "Old Road",
|
||
"谷仓": "Granary",
|
||
"河港": "River Port",
|
||
"林间": "Woodland",
|
||
"古树": "Ancient Tree",
|
||
"古渡口": "Old Ferry Crossing",
|
||
"檀木": "Sandalwood",
|
||
"源山": "Headwater Mountain",
|
||
"河源山": "River Source Mountain",
|
||
"洼地": "Depression",
|
||
"低洼泽": "Low Marsh",
|
||
"泉园": "Spring Garden",
|
||
"圣泉园": "Sacred Spring Garden",
|
||
"海滨": "Coastal",
|
||
"潮汐": "Tidal",
|
||
"北汊": "North Branch",
|
||
"河渠": "River Canal",
|
||
"山丘": "Hill",
|
||
"山脚": "Foothill",
|
||
"旱生": "Dryland",
|
||
"开阔地": "Open Ground",
|
||
"林缘": "Forest Edge",
|
||
"双塔林": "Twin-Tower Grove",
|
||
"天文林": "Astronomer Grove",
|
||
"地下": "Underground",
|
||
"雨季": "Rainy Season",
|
||
"水站": "Water Station",
|
||
"棕榈地": "Palm Grove",
|
||
"灌区": "Irrigation Zone",
|
||
"内陆": "Inland",
|
||
"沙原": "Sand Plain",
|
||
"补给漠": "Supply Desert",
|
||
"湖湿地": "Lake Wetland",
|
||
"波斯湾": "Persian Gulf",
|
||
"阿拉伯湾": "Arabian Gulf",
|
||
"加勒比海": "Caribbean Sea",
|
||
"墨西哥湾": "Gulf of Mexico",
|
||
"爱琴海": "Aegean Sea",
|
||
"黑海": "Black Sea",
|
||
"地中海": "Mediterranean",
|
||
"幼发拉底河": "Euphrates River",
|
||
"幼发拉底": "Euphrates",
|
||
"底格里斯河": "Tigris River",
|
||
"底格里斯": "Tigris",
|
||
"迪亚拉河": "Diyala River",
|
||
"迪亚拉": "Diyala",
|
||
"卡尔赫河": "Karkheh River",
|
||
"卡尔赫": "Karkheh",
|
||
"加拉夫河": "Gharraf River",
|
||
"加拉夫": "Gharraf",
|
||
"沙特阿拉伯河": "Shatt al-Arab",
|
||
"扎布河": "Zab River",
|
||
"王陵高地": "Royal Tomb Heights",
|
||
"王陵丘": "Royal Tomb Hill",
|
||
"王陵": "Royal Tomb",
|
||
"王冢": "Royal Tomb",
|
||
"圣山坡": "Sacred Mountain Slope",
|
||
"圣山": "Sacred Mountain",
|
||
"圣丘": "Sacred Hill",
|
||
"圣坡": "Sacred Slope",
|
||
"山毛榉林": "Beech Forest",
|
||
"山毛榉": "Beech",
|
||
"山溪": "Mountain Stream",
|
||
"山间草甸": "Mountain Meadow",
|
||
"山间": "Mountain",
|
||
"山坡": "Mountain Slope",
|
||
"山前": "Foothill",
|
||
"山前丘": "Foothill Hill",
|
||
"山脊": "Ridge",
|
||
"山林": "Mountain Forest",
|
||
"山麓林": "Foothill Forest",
|
||
"山麓": "Foothills",
|
||
"山丘": "Hill",
|
||
"潟湖": "Lagoon",
|
||
"阿联酋": "UAE",
|
||
"库丘马塔内斯": "Cuchumatanes",
|
||
}
|
||
)
|
||
for zh, en in term_en.items():
|
||
common_en.setdefault(zh, en.title())
|
||
|
||
common_jp = dict(term_jp)
|
||
common_jp.update(
|
||
{
|
||
"低洼田": "低地田",
|
||
"上游": "上流",
|
||
"中游": "中流",
|
||
"下游": "下流",
|
||
"冲积平原": "沖積平野",
|
||
"平原": "平原",
|
||
"河": "川",
|
||
"山": "山",
|
||
"湖": "湖",
|
||
"海": "海",
|
||
"港": "港",
|
||
"桥": "橋",
|
||
"市场": "市場",
|
||
"工坊": "工房",
|
||
"矿": "鉱山",
|
||
"绿洲": "オアシス",
|
||
"湿地": "湿地",
|
||
"沼泽": "沼地",
|
||
"森林": "森林",
|
||
"林": "林",
|
||
"谷": "谷",
|
||
"湾": "湾",
|
||
"水渠": "水路",
|
||
"运河": "運河",
|
||
"古": "古",
|
||
"圣": "聖",
|
||
"王家": "王家",
|
||
"神庙": "神殿",
|
||
"军港": "軍港",
|
||
"军营": "兵営",
|
||
"港口": "港",
|
||
"矿场": "鉱山",
|
||
"铸造工坊": "鋳造工房",
|
||
"桥梁": "橋",
|
||
"王陵高地": "王陵高地",
|
||
"王陵丘": "王陵の丘",
|
||
"王陵": "王陵",
|
||
"王冢": "王塚",
|
||
"圣山坡": "聖山斜面",
|
||
"圣山": "聖山",
|
||
"圣丘": "聖丘",
|
||
"圣坡": "聖なる斜面",
|
||
"山毛榉林": "ブナ林",
|
||
"山毛榉": "ブナ",
|
||
"山溪": "山溪",
|
||
"山间草甸": "山間草地",
|
||
"山间": "山間",
|
||
"山坡": "山腹",
|
||
"山前": "山麓",
|
||
"山前丘": "山麓丘",
|
||
"山脊": "山稜",
|
||
"山林": "山林",
|
||
"山麓林": "山麓林",
|
||
"山麓": "山麓",
|
||
"山丘": "丘",
|
||
"潟湖": "ラグーン",
|
||
"阿联酋": "アラブ首長国連邦",
|
||
"库丘马塔内斯": "クチュマタネス",
|
||
}
|
||
)
|
||
common_kr = dict(term_kr)
|
||
common_kr.update(
|
||
{
|
||
"低洼田": "저지대 밭",
|
||
"上游": "상류",
|
||
"中游": "중류",
|
||
"下游": "하류",
|
||
"冲积平原": "충적 평야",
|
||
"平原": "평원",
|
||
"河": "강",
|
||
"山": "산",
|
||
"湖": "호수",
|
||
"海": "바다",
|
||
"港": "항구",
|
||
"桥": "다리",
|
||
"市场": "시장",
|
||
"工坊": "공방",
|
||
"矿": "광산",
|
||
"绿洲": "오아시스",
|
||
"湿地": "습지",
|
||
"沼泽": "늪지",
|
||
"森林": "숲",
|
||
"林": "숲",
|
||
"谷": "계곡",
|
||
"湾": "만",
|
||
"水渠": "수로",
|
||
"运河": "운하",
|
||
"古": "고대",
|
||
"圣": "성스러운",
|
||
"王家": "왕실",
|
||
"神庙": "신전",
|
||
"军港": "군항",
|
||
"军营": "병영",
|
||
"港口": "항구",
|
||
"矿场": "광산",
|
||
"铸造工坊": "주조 공방",
|
||
"桥梁": "다리",
|
||
"王陵高地": "왕릉 고지",
|
||
"王陵丘": "왕릉 언덕",
|
||
"王陵": "왕릉",
|
||
"王冢": "왕릉",
|
||
"圣山坡": "성산 비탈",
|
||
"圣山": "성산",
|
||
"圣丘": "성스러운 언덕",
|
||
"圣坡": "성스러운 비탈",
|
||
"山毛榉林": "너도밤나무 숲",
|
||
"山毛榉": "너도밤나무",
|
||
"山溪": "산간 계류",
|
||
"山间草甸": "산간 초지",
|
||
"山间": "산간",
|
||
"山坡": "산비탈",
|
||
"山前": "산기슭",
|
||
"山前丘": "산기슭 언덕",
|
||
"山脊": "능선",
|
||
"山林": "산림",
|
||
"山麓林": "산기슭 숲",
|
||
"山麓": "산기슭",
|
||
"山丘": "언덕",
|
||
"潟湖": "석호",
|
||
"阿联酋": "아랍에미리트",
|
||
"库丘马塔内斯": "쿠추마타네스",
|
||
}
|
||
)
|
||
return term_en, term_jp, term_kr, {"EN": common_en, "JP": common_jp, "KR": common_kr}
|
||
|
||
|
||
def ensure_pypinyin():
|
||
try:
|
||
from pypinyin import lazy_pinyin
|
||
|
||
return lazy_pinyin
|
||
except ImportError:
|
||
if PY_TMP_PACKAGES.exists():
|
||
sys.path.insert(0, str(PY_TMP_PACKAGES))
|
||
from pypinyin import lazy_pinyin
|
||
|
||
return lazy_pinyin
|
||
raise RuntimeError(
|
||
"pypinyin is required for fallback romanization. "
|
||
"Install with: python -m pip install --target %TEMP%/codex_py_pkgs_pypinyin pypinyin"
|
||
)
|
||
|
||
|
||
def fallback_roman(text: str, lazy_pinyin) -> str:
|
||
parts: list[str] = []
|
||
buffer = ""
|
||
for char in text:
|
||
if "\u3400" <= char <= "\u9fff":
|
||
buffer += char
|
||
else:
|
||
if buffer:
|
||
parts.extend(lazy_pinyin(buffer))
|
||
buffer = ""
|
||
if char.strip():
|
||
parts.append(char)
|
||
if buffer:
|
||
parts.extend(lazy_pinyin(buffer))
|
||
return " ".join(part.capitalize() for part in parts if part)
|
||
|
||
|
||
def normalize_pinyin_syllable(syllable: str) -> str:
|
||
return syllable.lower().replace("u:", "v").replace("ü", "v")
|
||
|
||
|
||
def split_pinyin_syllable(syllable: str) -> tuple[str, str]:
|
||
syllable = normalize_pinyin_syllable(syllable)
|
||
if syllable.startswith(("zh", "ch", "sh")):
|
||
return syllable[:2], syllable[2:]
|
||
if syllable[:1] in "bpmfdtnlgkhjqxrzcsyw":
|
||
initial = syllable[:1]
|
||
final = syllable[1:]
|
||
if initial == "y":
|
||
if final.startswith("u"):
|
||
return "", "v" + final[1:]
|
||
return "", "i" + final if final else "i"
|
||
if initial == "w":
|
||
return "", "u" + final if final else "u"
|
||
return initial, final
|
||
return "", syllable
|
||
|
||
|
||
JP_INITIALS = {
|
||
"": "",
|
||
"b": "ブ",
|
||
"p": "プ",
|
||
"m": "ム",
|
||
"f": "フ",
|
||
"d": "ド",
|
||
"t": "ト",
|
||
"n": "ヌ",
|
||
"l": "ル",
|
||
"g": "グ",
|
||
"k": "ク",
|
||
"h": "ホ",
|
||
"j": "ジ",
|
||
"q": "チ",
|
||
"x": "シ",
|
||
"zh": "ジ",
|
||
"ch": "チ",
|
||
"sh": "シ",
|
||
"r": "ル",
|
||
"z": "ズ",
|
||
"c": "ツ",
|
||
"s": "ス",
|
||
}
|
||
|
||
JP_FINALS = {
|
||
"": "",
|
||
"a": "ア",
|
||
"ai": "アイ",
|
||
"an": "アン",
|
||
"ang": "アン",
|
||
"ao": "アオ",
|
||
"e": "エ",
|
||
"ei": "エイ",
|
||
"en": "エン",
|
||
"eng": "ン",
|
||
"er": "アル",
|
||
"i": "イ",
|
||
"ia": "ヤ",
|
||
"ian": "エン",
|
||
"iang": "ヤン",
|
||
"iao": "ャオ",
|
||
"ie": "エ",
|
||
"in": "イン",
|
||
"ing": "ン",
|
||
"iong": "ョン",
|
||
"iu": "ュウ",
|
||
"o": "オ",
|
||
"ong": "オン",
|
||
"ou": "オウ",
|
||
"u": "ウ",
|
||
"ua": "ワ",
|
||
"uai": "ワイ",
|
||
"uan": "ワン",
|
||
"uang": "ワン",
|
||
"ue": "ュエ",
|
||
"ui": "ェイ",
|
||
"un": "ウン",
|
||
"uo": "オ",
|
||
"v": "ュ",
|
||
"ve": "ュエ",
|
||
"van": "ュアン",
|
||
"vn": "ュン",
|
||
}
|
||
|
||
JP_SPECIAL_SYLLABLES = {
|
||
"zhi": "ジ",
|
||
"chi": "チ",
|
||
"shi": "シ",
|
||
"ri": "リ",
|
||
"zi": "ズ",
|
||
"ci": "ツ",
|
||
"si": "ス",
|
||
"wu": "ウ",
|
||
"yi": "イ",
|
||
"yu": "ユ",
|
||
"yue": "ユエ",
|
||
"yuan": "ユアン",
|
||
"yun": "ユン",
|
||
}
|
||
|
||
|
||
def pinyin_to_katakana(syllable: str) -> str:
|
||
syllable = normalize_pinyin_syllable(syllable)
|
||
if syllable in JP_SPECIAL_SYLLABLES:
|
||
return JP_SPECIAL_SYLLABLES[syllable]
|
||
initial, final = split_pinyin_syllable(syllable)
|
||
return JP_INITIALS.get(initial, "") + JP_FINALS.get(final, "ン")
|
||
|
||
|
||
KR_INITIALS = {
|
||
"": 11, # ㅇ
|
||
"b": 7,
|
||
"p": 17,
|
||
"m": 6,
|
||
"f": 17,
|
||
"d": 3,
|
||
"t": 16,
|
||
"n": 2,
|
||
"l": 5,
|
||
"g": 0,
|
||
"k": 15,
|
||
"h": 18,
|
||
"j": 12,
|
||
"q": 14,
|
||
"x": 10,
|
||
"zh": 12,
|
||
"ch": 14,
|
||
"sh": 10,
|
||
"r": 5,
|
||
"z": 12,
|
||
"c": 14,
|
||
"s": 10,
|
||
}
|
||
|
||
KR_FINALS = {
|
||
"": [(18, 0, "")],
|
||
"a": [(0, 0, "")],
|
||
"ai": [(0, 0, "이")],
|
||
"an": [(0, 4, "")],
|
||
"ang": [(0, 21, "")],
|
||
"ao": [(0, 0, "오")],
|
||
"e": [(4, 0, "")],
|
||
"ei": [(5, 0, "이")],
|
||
"en": [(4, 4, "")],
|
||
"eng": [(4, 21, "")],
|
||
"er": [(4, 8, "")],
|
||
"i": [(20, 0, "")],
|
||
"ia": [(2, 0, "")],
|
||
"ian": [(6, 4, "")],
|
||
"iang": [(2, 21, "")],
|
||
"iao": [(2, 0, "오")],
|
||
"ie": [(6, 0, "")],
|
||
"in": [(20, 4, "")],
|
||
"ing": [(20, 21, "")],
|
||
"iong": [(13, 21, "")],
|
||
"iu": [(17, 0, "")],
|
||
"o": [(8, 0, "")],
|
||
"ong": [(8, 21, "")],
|
||
"ou": [(8, 0, "우")],
|
||
"u": [(13, 0, "")],
|
||
"ua": [(9, 0, "")],
|
||
"uai": [(9, 0, "이")],
|
||
"uan": [(9, 4, "")],
|
||
"uang": [(9, 21, "")],
|
||
"ue": [(15, 0, "")],
|
||
"ui": [(14, 0, "이")],
|
||
"un": [(13, 4, "")],
|
||
"uo": [(14, 0, "")],
|
||
"v": [(16, 0, "")],
|
||
"ve": [(15, 0, "")],
|
||
"van": [(9, 4, "")],
|
||
"vn": [(16, 4, "")],
|
||
}
|
||
|
||
KR_SPECIAL_FINALS = {
|
||
("zh", "i"): [(18, 18, "")],
|
||
("ch", "i"): [(18, 18, "")],
|
||
("sh", "i"): [(18, 18, "")],
|
||
("r", "i"): [(18, 18, "")],
|
||
("z", "i"): [(18, 18, "")],
|
||
("c", "i"): [(18, 18, "")],
|
||
("s", "i"): [(18, 18, "")],
|
||
}
|
||
|
||
|
||
def compose_hangul(initial: int, vowel: int, final: int = 0) -> str:
|
||
return chr(0xAC00 + (initial * 21 + vowel) * 28 + final)
|
||
|
||
|
||
def pinyin_to_hangul(syllable: str) -> str:
|
||
initial, final = split_pinyin_syllable(syllable)
|
||
initial_index = KR_INITIALS.get(initial, 11)
|
||
parts = KR_SPECIAL_FINALS.get((initial, final), KR_FINALS.get(final, KR_FINALS[""]))
|
||
first_vowel, first_final, tail = parts[0]
|
||
return compose_hangul(initial_index, first_vowel, first_final) + tail
|
||
|
||
|
||
def fallback_localized(text: str, lang: str, lazy_pinyin) -> str:
|
||
parts: list[str] = []
|
||
buffer = ""
|
||
|
||
def flush_buffer() -> None:
|
||
nonlocal buffer
|
||
if not buffer:
|
||
return
|
||
syllables = lazy_pinyin(buffer)
|
||
if lang == "JP":
|
||
parts.append("".join(pinyin_to_katakana(syllable) for syllable in syllables))
|
||
elif lang == "KR":
|
||
parts.append("".join(pinyin_to_hangul(syllable) for syllable in syllables))
|
||
else:
|
||
parts.append(" ".join(part.capitalize() for part in syllables if part))
|
||
buffer = ""
|
||
|
||
for char in text:
|
||
if "\u3400" <= char <= "\u9fff":
|
||
buffer += char
|
||
else:
|
||
flush_buffer()
|
||
if char.strip():
|
||
parts.append(char)
|
||
flush_buffer()
|
||
if lang == "EN":
|
||
return " ".join(part for part in parts if part)
|
||
return "".join(part for part in parts if part)
|
||
|
||
|
||
def clean_en(text: str) -> str:
|
||
text = re.sub(r"\s+", " ", text).strip()
|
||
return text.replace(" Of ", " of ").replace(" And ", " and ")
|
||
|
||
|
||
def make_name_translator(
|
||
known: dict[str, dict[str, str]],
|
||
term_maps: dict[str, dict[str, str]],
|
||
lazy_pinyin,
|
||
segment_known_terms: set[str],
|
||
):
|
||
generic_suffixes = tuple(
|
||
"上下中区地田林山河湖海港桥市场坊营寨城口道坡洲湾岸谷峰岭原漠泽池井渠坞厂仓庄园圃街栈坑矿炉门"
|
||
)
|
||
single_term_followups = (
|
||
"上游",
|
||
"中游",
|
||
"下游",
|
||
"北部",
|
||
"南部",
|
||
"东部",
|
||
"西部",
|
||
"北缘",
|
||
"南缘",
|
||
"东段",
|
||
"西段",
|
||
"边地",
|
||
"地",
|
||
"区",
|
||
"带",
|
||
"畔",
|
||
"岸",
|
||
"道",
|
||
"港",
|
||
"桥",
|
||
"林",
|
||
"树",
|
||
"泉",
|
||
"口",
|
||
"群",
|
||
"上",
|
||
)
|
||
|
||
def can_match_term(name: str, index: int, term: str, terms: list[str]) -> bool:
|
||
if len(term) > 1:
|
||
return True
|
||
next_index = index + len(term)
|
||
return (
|
||
next_index == len(name)
|
||
or any(name.startswith(followup, next_index) for followup in single_term_followups)
|
||
or any(len(next_term) > 1 and name.startswith(next_term, next_index) for next_term in terms)
|
||
)
|
||
|
||
def translate_name(name: str, lang: str) -> str:
|
||
if name in known[lang]:
|
||
return known[lang][name]
|
||
known_for_segmentation = {
|
||
key: value
|
||
for key, value in known[lang].items()
|
||
if key in segment_known_terms or 2 <= len(key) <= 12
|
||
}
|
||
all_terms = {**known_for_segmentation, **term_maps[lang]}
|
||
terms = sorted(all_terms, key=len, reverse=True)
|
||
pieces: list[str] = []
|
||
index = 0
|
||
while index < len(name):
|
||
match = None
|
||
for term in terms:
|
||
if name.startswith(term, index) and can_match_term(name, index, term, terms):
|
||
match = term
|
||
break
|
||
if match is not None:
|
||
pieces.append(all_terms[match])
|
||
index += len(match)
|
||
continue
|
||
next_index = index + 1
|
||
while next_index < len(name) and not any(
|
||
name.startswith(term, next_index) and can_match_term(name, next_index, term, terms) for term in terms
|
||
):
|
||
next_index += 1
|
||
pieces.append(fallback_localized(name[index:next_index], lang, lazy_pinyin))
|
||
index = next_index
|
||
if lang == "EN":
|
||
return clean_en(" ".join(piece for piece in pieces if piece))
|
||
if lang == "KR":
|
||
return re.sub(r"\s+", " ", " ".join(piece for piece in pieces if piece)).strip()
|
||
return "".join(piece for piece in pieces if piece)
|
||
|
||
return translate_name
|
||
|
||
|
||
def english_article(word: str) -> str:
|
||
return "an" if word[:1].lower() in "aeiou" else "a"
|
||
|
||
|
||
def make_desc_translator(app, gen, term_en, term_jp, term_kr, translate_name):
|
||
terms = {"EN": term_en, "JP": term_jp, "KR": term_kr}
|
||
profiles = {"EN": app.PROFILE_EN, "JP": app.PROFILE_JP, "KR": app.PROFILE_KR}
|
||
phrases = {"EN": app.PHRASE_EN, "JP": app.PHRASE_JP, "KR": app.PHRASE_KR}
|
||
fallback_phrases = {"EN": "reflecting its regional role", "JP": "地域で重要な役割を持つ", "KR": "지역적 역할을 보여 준다"}
|
||
|
||
def translate_phrase(text: str, lang: str) -> str:
|
||
return phrases[lang].get(text, fallback_phrases[lang])
|
||
|
||
def translate_desc(row: dict[str, str], lang: str) -> str:
|
||
desc = row["GeoDescStr"]
|
||
kind_cn = gen.TYPE_CN[row["SmallClass"]]
|
||
kind = terms[lang][kind_cn]
|
||
building_match = re.match(r"^位于(.+?)周边的(.+?),(.+)。$", desc)
|
||
if building_match:
|
||
place = translate_name(building_match.group(1), lang)
|
||
clauses = building_match.group(3).split(",")
|
||
if lang == "EN":
|
||
phrase = " and ".join(translate_phrase(clause, lang) for clause in clauses)
|
||
return f"{kind.capitalize()} around {place}, {phrase}."
|
||
if lang == "JP":
|
||
phrase = "、".join(translate_phrase(clause, lang) for clause in clauses)
|
||
return f"{place}周辺の{kind}で、{phrase}。"
|
||
phrase = ", ".join(translate_phrase(clause, lang) for clause in clauses)
|
||
return f"{place} 주변의 {kind}이며, {phrase}."
|
||
|
||
natural_match = re.match(r"^(.+?)是(.+?)(?:中的|的)(.+?),(.+)。$", desc)
|
||
if natural_match:
|
||
name = translate_name(row["GeoName"], lang)
|
||
impact = natural_match.group(4)
|
||
if lang == "EN":
|
||
return f"{name} is {english_article(kind)} {kind} in {profiles[lang][row['CivEnum']]}, {translate_phrase(impact, lang)}."
|
||
if lang == "JP":
|
||
return f"{name}は{profiles[lang][row['CivEnum']]}の{kind}で、{translate_phrase(impact, lang)}。"
|
||
return f"{name}은(는) {profiles[lang][row['CivEnum']]}의 {kind}이며, {translate_phrase(impact, lang)}."
|
||
|
||
name = translate_name(row["GeoName"], lang)
|
||
if lang == "EN":
|
||
return f"{name} is {english_article(kind)} {kind} of the {row['CivEnum']} civilization, reflecting its regional geography."
|
||
if lang == "JP":
|
||
return f"{name}は{row['CivEnum']}文明圏の{kind}で、地域地理を示す。"
|
||
return f"{name}은(는) {row['CivEnum']} 문명의 {kind}이며, 지역 지리를 보여 준다."
|
||
|
||
return translate_desc
|
||
|
||
|
||
def build_updates(items: dict[int, dict[str, str]], gen, app):
|
||
lazy_pinyin = ensure_pypinyin()
|
||
rows = gen.generate(GEO_START_ID)
|
||
geo_refs = parse_geo_export_refs((ROOT / "Unity/Assets/BundleResources/Export/GeoDataAssets.asset").read_text(encoding="utf-8"))
|
||
generated_ids = {item_id for geo_id, pair in geo_refs.items() if GEO_START_ID <= geo_id for item_id in pair}
|
||
known = build_known_term_maps(items, gen, generated_ids)
|
||
term_en, term_jp, term_kr, term_maps = make_term_maps(app)
|
||
segment_known_terms = {data["cn"] for data in gen.CIVS.values()}
|
||
for data in gen.CIVS.values():
|
||
segment_known_terms.update(data.get("cities", []))
|
||
translate_name = make_name_translator(known, term_maps, lazy_pinyin, segment_known_terms)
|
||
translate_desc = make_desc_translator(app, gen, term_en, term_jp, term_kr, translate_name)
|
||
|
||
updates: dict[int, dict[str, str]] = {}
|
||
bad_rows: list[tuple[int, str]] = []
|
||
mismatches: list[str] = []
|
||
for row in rows:
|
||
geo_id = int(row["Id"])
|
||
if geo_id not in geo_refs:
|
||
mismatches.append(f"missing GeoDataAssets export row for {geo_id}")
|
||
continue
|
||
name_id, desc_id = geo_refs[geo_id]
|
||
if items.get(name_id, {}).get("ZH") != row["GeoName"]:
|
||
mismatches.append(f"GeoName mismatch geo={geo_id} multilingual={name_id}")
|
||
if items.get(desc_id, {}).get("ZH") != row["GeoDescStr"]:
|
||
mismatches.append(f"GeoDesc mismatch geo={geo_id} multilingual={desc_id}")
|
||
translated = {
|
||
name_id: {
|
||
"EN": translate_name(row["GeoName"], "EN"),
|
||
"JP": translate_name(row["GeoName"], "JP"),
|
||
"KR": translate_name(row["GeoName"], "KR"),
|
||
},
|
||
desc_id: {
|
||
"EN": translate_desc(row, "EN"),
|
||
"JP": translate_desc(row, "JP"),
|
||
"KR": translate_desc(row, "KR"),
|
||
},
|
||
}
|
||
for item_id, field_updates in translated.items():
|
||
if CJK_RE.search(field_updates["EN"]) or CJK_RE.search(field_updates["KR"]):
|
||
bad_rows.append((geo_id, row["GeoName"]))
|
||
if LATIN_RE.search(field_updates["JP"]) or LATIN_RE.search(field_updates["KR"]):
|
||
bad_rows.append((geo_id, row["GeoName"]))
|
||
updates[item_id] = field_updates
|
||
if mismatches:
|
||
raise RuntimeError("Geo export/multilingual mismatch:\n" + "\n".join(mismatches[:20]))
|
||
if bad_rows:
|
||
raise RuntimeError("Generated translations still contain CJK in EN/KR: " + repr(bad_rows[:20]))
|
||
return updates
|
||
|
||
|
||
def apply_asset_updates(asset_path: Path, updates: dict[int, dict[str, str]]) -> int:
|
||
text = asset_path.read_text(encoding="utf-8").replace("\r\n", "\n")
|
||
blocks, _items = parse_multilingual_asset(text)
|
||
pieces: list[str] = []
|
||
cursor = 0
|
||
changed = 0
|
||
for item_id, (start, end, block) in sorted(blocks.items(), key=lambda item: item[1][0]):
|
||
pieces.append(text[cursor:start])
|
||
if item_id in updates:
|
||
new_block = render_multilingual_block(block, updates[item_id])
|
||
if new_block != block:
|
||
changed += 1
|
||
pieces.append(new_block)
|
||
else:
|
||
pieces.append(block)
|
||
cursor = end
|
||
pieces.append(text[cursor:])
|
||
asset_path.write_text("".join(pieces), encoding="utf-8", newline="\n")
|
||
return changed
|
||
|
||
|
||
TXT_COL = "%$#@!"
|
||
TXT_ROW = "!@#$%"
|
||
|
||
|
||
def apply_txt_updates(txt_path: Path, updates: dict[int, dict[str, str]]) -> int:
|
||
if not txt_path.exists():
|
||
return 0
|
||
text = txt_path.read_text(encoding="utf-8").replace("\r\n", "\n")
|
||
rows = text.split(TXT_ROW)
|
||
changed = 0
|
||
for idx, row in enumerate(rows):
|
||
if not row:
|
||
continue
|
||
cols = row.split(TXT_COL)
|
||
if not cols or not cols[0].isdigit():
|
||
continue
|
||
item_id = int(cols[0])
|
||
if item_id not in updates:
|
||
continue
|
||
while len(cols) <= 6:
|
||
cols.append("")
|
||
old = (cols[4], cols[5], cols[6])
|
||
cols[4] = updates[item_id]["EN"]
|
||
cols[5] = updates[item_id]["JP"]
|
||
cols[6] = updates[item_id]["KR"]
|
||
if old != (cols[4], cols[5], cols[6]):
|
||
changed += 1
|
||
rows[idx] = TXT_COL.join(cols)
|
||
txt_path.write_text(TXT_ROW.join(rows), encoding="utf-8", newline="\n")
|
||
return changed
|
||
|
||
|
||
def audit_new_geo(items: dict[int, dict[str, str]], updates: dict[int, dict[str, str]]) -> dict[str, int]:
|
||
# Validate the generated values that will be written, independent of current file state.
|
||
en_cjk = 0
|
||
jp_latin = 0
|
||
kr_cjk = 0
|
||
kr_latin = 0
|
||
for update in updates.values():
|
||
if CJK_RE.search(update["EN"]):
|
||
en_cjk += 1
|
||
if LATIN_RE.search(update["JP"]):
|
||
jp_latin += 1
|
||
if CJK_RE.search(update["KR"]):
|
||
kr_cjk += 1
|
||
if LATIN_RE.search(update["KR"]):
|
||
kr_latin += 1
|
||
return {"updates": len(updates), "en_cjk": en_cjk, "jp_latin": jp_latin, "kr_cjk": kr_cjk, "kr_latin": kr_latin}
|
||
|
||
|
||
def main() -> int:
|
||
dry_run = "--dry-run" in sys.argv
|
||
gen = load_module("geo917_generate", ROOT / ".codex/skills/th1-geo-copywriting/scripts/generate_geo_9_17_draft.py")
|
||
app = load_module("geo917_apply", ROOT / ".codex/skills/th1-geo-copywriting/scripts/apply_geo_9_17_to_config.py")
|
||
asset_path = ROOT / "Unity/Assets/BundleResources/Export/Multilingual.asset"
|
||
txt_path = ROOT / "Tools/MultilingualTxt.txt"
|
||
asset_text = asset_path.read_text(encoding="utf-8").replace("\r\n", "\n")
|
||
_blocks, items = parse_multilingual_asset(asset_text)
|
||
updates = build_updates(items, gen, app)
|
||
audit = audit_new_geo(items, updates)
|
||
print(json.dumps(audit, ensure_ascii=True, sort_keys=True))
|
||
if dry_run:
|
||
return 0
|
||
changed_asset = apply_asset_updates(asset_path, updates)
|
||
changed_txt = apply_txt_updates(txt_path, updates)
|
||
print(json.dumps({"changed_asset_rows": changed_asset, "changed_txt_rows": changed_txt}, ensure_ascii=True, sort_keys=True))
|
||
return 0
|
||
|
||
|
||
if __name__ == "__main__":
|
||
raise SystemExit(main())
|