270 lines
11 KiB
Python
270 lines
11 KiB
Python
#!/usr/bin/env python3
|
||
"""Parse source markdown to generate complete n01_phylums.json with ALL orders and families."""
|
||
import json
|
||
import re
|
||
|
||
def species_to_rank(species):
|
||
"""Derive rank from species count."""
|
||
if species >= 20000: return 'S'
|
||
if species >= 5000: return 'A'
|
||
if species >= 1000: return 'B'
|
||
if species >= 100: return 'C'
|
||
if species >= 10: return 'D'
|
||
if species >= 2: return 'E'
|
||
return 'F'
|
||
|
||
with open('01_Active_Projects/N01_植物帝国企划/owm植物学AVGIV总览.md', 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
sections = content.split('## ')
|
||
|
||
# ─── 1. Parse 目视角 for all angiosperm order data ───
|
||
orders_data = {}
|
||
for s in sections:
|
||
if s.startswith('目视角'):
|
||
lines = s.strip().split('\n')
|
||
for line in lines:
|
||
if line.startswith('|'):
|
||
cols = [c.strip() for c in line.split('|')]
|
||
if len(cols) > 14 and cols[8].strip().isdigit():
|
||
num = int(cols[8])
|
||
rank = cols[9]
|
||
cn_name = cols[10]
|
||
latin = cols[11]
|
||
mode_genus = cols[12]
|
||
fam_count = cols[13]
|
||
gen_count = cols[14]
|
||
spe_count = cols[15]
|
||
spe_err = cols[16] if len(cols) > 16 else '0'
|
||
orders_data[cn_name] = {
|
||
'num': num,
|
||
'rank': rank,
|
||
'name': cn_name,
|
||
'latin': latin,
|
||
'modeGenus': mode_genus if mode_genus != 'nan' else '',
|
||
'families': int(fam_count) if fam_count.isdigit() else 0,
|
||
'genera': int(gen_count) if gen_count.isdigit() else 0,
|
||
'species': int(spe_count) if spe_count.isdigit() else 0,
|
||
'error': int(spe_err) if spe_err.replace('-', '').isdigit() else 0,
|
||
'family_list': []
|
||
}
|
||
break
|
||
|
||
print(f'Angiosperm orders from 目视角: {len(orders_data)}')
|
||
|
||
# Add gymnosperm orders manually (from 设定详细 and Sheet4)
|
||
gymno_orders = {
|
||
'苏铁目': {'num': 65, 'rank': 'C', 'name': '苏铁目', 'latin': 'Cycadales',
|
||
'families': 2, 'genera': 10, 'species': 318, 'error': 6, 'family_list': []},
|
||
'银杏目': {'num': 66, 'rank': 'F', 'name': '银杏目', 'latin': 'Ginkgoales',
|
||
'families': 1, 'genera': 1, 'species': 1, 'error': 0, 'family_list': []},
|
||
'南洋杉目': {'num': 67, 'rank': 'C', 'name': '南洋杉目', 'latin': 'Araucariales',
|
||
'families': 2, 'genera': 6, 'species': 71, 'error': 5, 'family_list': []},
|
||
'柏目': {'num': 68, 'rank': 'C', 'name': '柏目', 'latin': 'Cupressales',
|
||
'families': 4, 'genera': 32, 'species': 170, 'error': 10, 'family_list': []},
|
||
'松目': {'num': 69, 'rank': 'C', 'name': '松目', 'latin': 'Pinales',
|
||
'families': 1, 'genera': 11, 'species': 255, 'error': 5, 'family_list': []},
|
||
'麻黄目': {'num': 70, 'rank': 'E', 'name': '麻黄目', 'latin': 'Ephedrales',
|
||
'families': 1, 'genera': 1, 'species': 65, 'error': 3, 'family_list': []},
|
||
'百岁兰目': {'num': 71, 'rank': 'F', 'name': '百岁兰目', 'latin': 'Welwitschiales',
|
||
'families': 1, 'genera': 1, 'species': 1, 'error': 0, 'family_list': []},
|
||
'买麻藤目': {'num': 72, 'rank': 'D', 'name': '买麻藤目', 'latin': 'Gnetales',
|
||
'families': 1, 'genera': 1, 'species': 40, 'error': 5, 'family_list': []},
|
||
}
|
||
for k, v in gymno_orders.items():
|
||
v['modeGenus'] = ''
|
||
orders_data[k] = v
|
||
|
||
print(f'Total orders (incl. gymnosperms): {len(orders_data)}')
|
||
|
||
# ─── 2. Parse 科视角 for ALL family data (use order name as key) ───
|
||
for s in sections:
|
||
if s.startswith('科视角'):
|
||
lines = s.strip().split('\n')
|
||
for line in lines:
|
||
if line.startswith('|') and '---' not in line:
|
||
cols = [c.strip() for c in line.split('|')]
|
||
if len(cols) > 5:
|
||
order_name = cols[2].strip()
|
||
fam_raw = cols[4].strip()
|
||
if (order_name and order_name != 'nan' and '目' in order_name
|
||
and order_name != '目名'
|
||
and fam_raw and fam_raw != 'nan'):
|
||
# Parse family: "无油樟科 Amborellaceae" or "无油樟科 Amborellaceae Pers. (1807)"
|
||
# Split on whitespace (including full-width space)
|
||
parts = re.split(r'[\s ]+', fam_raw)
|
||
fam_cn = parts[0] if parts else fam_raw
|
||
fam_latin = parts[1] if len(parts) > 1 else ''
|
||
|
||
mode_genus = cols[5].strip() if len(cols) > 5 else ''
|
||
# Collect description from remaining cols
|
||
desc_parts = []
|
||
for ci in range(6, min(len(cols), 12)):
|
||
val = cols[ci].strip()
|
||
if val and val != 'nan':
|
||
desc_parts.append(val)
|
||
desc = ' '.join(desc_parts)
|
||
|
||
if order_name in orders_data:
|
||
orders_data[order_name]['family_list'].append({
|
||
'name': fam_cn,
|
||
'latin': fam_latin,
|
||
'modeGenus': mode_genus if mode_genus != 'nan' else '',
|
||
'description': desc
|
||
})
|
||
break
|
||
|
||
total_fam = sum(len(o['family_list']) for o in orders_data.values())
|
||
print(f'Total families parsed: {total_fam}')
|
||
|
||
# ─── 3. Load existing JSON for game-specific data ───
|
||
with open('Dashboard/data/n01_phylums.json', 'r', encoding='utf-8') as f:
|
||
existing = json.load(f)
|
||
|
||
existing_orders = {}
|
||
for clade in existing['data']:
|
||
for order in clade.get('orders', []):
|
||
existing_orders[order['id']] = order
|
||
|
||
# ─── 4. Clade mapping ───
|
||
num_to_name = {v['num']: k for k, v in orders_data.items()}
|
||
|
||
clade_map = {}
|
||
# ANITA: orders 1-3
|
||
for i in range(1, 4):
|
||
if i in num_to_name:
|
||
clade_map[num_to_name[i]] = ('ANITA', '基部被子植物', '演化支')
|
||
# Magnoliids: orders 4-8
|
||
for i in range(4, 9):
|
||
if i in num_to_name:
|
||
clade_map[num_to_name[i]] = ('Magnoliids', '木兰类植物', '类群')
|
||
# Monocots: orders 9-19
|
||
for i in range(9, 20):
|
||
if i in num_to_name:
|
||
clade_map[num_to_name[i]] = ('Monocots', '单子叶植物', '类群')
|
||
# Ceratophyllales: order 20
|
||
for i in range(20, 21):
|
||
if i in num_to_name:
|
||
clade_map[num_to_name[i]] = ('Ceratophyllales_clade', '金鱼藻类', '演化支')
|
||
# Eudicots: orders 21-64
|
||
for i in range(21, 65):
|
||
if i in num_to_name:
|
||
clade_map[num_to_name[i]] = ('Eudicots', '真双子叶植物', '类群')
|
||
# Gymnosperms: orders 65-72
|
||
for i in range(65, 73):
|
||
if i in num_to_name:
|
||
clade_map[num_to_name[i]] = ('Gymnosperms', '裸子植物', '类群')
|
||
|
||
# ─── 5. Build output ───
|
||
# Preserve clade order
|
||
clade_order = ['ANITA', 'Magnoliids', 'Monocots', 'Ceratophyllales_clade', 'Eudicots', 'Gymnosperms']
|
||
clades_output = {}
|
||
for cid in clade_order:
|
||
clades_output[cid] = None # placeholder
|
||
|
||
desc_map = {
|
||
'ANITA': '最原始的被子植物分支,包含无油樟、睡莲和木兰藤',
|
||
'Magnoliids': '原始的被子植物类群,包含木兰、樟、胡椒等',
|
||
'Monocots': '单子叶植物,包含禾本、兰花、棕榈等',
|
||
'Ceratophyllales_clade': '金鱼藻类,水生植物,系统位置特殊',
|
||
'Eudicots': '最大的被子植物类群,占被子植物75%以上',
|
||
'Gymnosperms': '古老的种子植物,包含松、杉、银杏、苏铁'
|
||
}
|
||
|
||
for cid in clade_order:
|
||
clades_output[cid] = {
|
||
'id': cid,
|
||
'name': desc_map.get(cid, '').split(',')[0] if cid in desc_map else cid,
|
||
'rank': '演化支' if cid in ('ANITA', 'Ceratophyllales_clade') else '类群',
|
||
'description': desc_map.get(cid, ''),
|
||
'orders': []
|
||
}
|
||
|
||
# Set proper names
|
||
clades_output['ANITA']['name'] = '基部被子植物'
|
||
clades_output['Magnoliids']['name'] = '木兰类植物'
|
||
clades_output['Monocots']['name'] = '单子叶植物'
|
||
clades_output['Ceratophyllales_clade']['name'] = '金鱼藻类'
|
||
clades_output['Eudicots']['name'] = '真双子叶植物'
|
||
clades_output['Gymnosperms']['name'] = '裸子植物'
|
||
|
||
# Sort orders by num and assign to clades
|
||
for order_name, order in sorted(orders_data.items(), key=lambda x: x[1]['num']):
|
||
clade_id = clade_map.get(order_name, ('Unknown', '未分类', '类群'))[0]
|
||
if clade_id not in clades_output:
|
||
clades_output[clade_id] = {
|
||
'id': clade_id, 'name': '未分类', 'rank': '类群',
|
||
'description': '', 'orders': []
|
||
}
|
||
|
||
# Get existing game data
|
||
existing_order = existing_orders.get(order['latin'], {})
|
||
|
||
# Build children (families)
|
||
children = []
|
||
for fam in order['family_list']:
|
||
child = {
|
||
'id': fam['latin'] if fam['latin'] else fam['name'],
|
||
'name': fam['name'],
|
||
'rank': '科',
|
||
}
|
||
if fam['latin']:
|
||
child['latin'] = fam['latin']
|
||
if fam['modeGenus']:
|
||
child['modeGenus'] = fam['modeGenus']
|
||
if fam['description']:
|
||
child['description'] = fam['description']
|
||
children.append(child)
|
||
|
||
order_entry = {
|
||
'id': order['latin'],
|
||
'name': order['name'],
|
||
'rank': '目',
|
||
'latin': order['latin'],
|
||
'chineseName': order['name'],
|
||
'population': order['species'],
|
||
'rankLevel': species_to_rank(order['species']),
|
||
'stats': {
|
||
'families': order['families'],
|
||
'genera': order['genera'],
|
||
'species': order['species'],
|
||
'error': order['error']
|
||
},
|
||
'children': children
|
||
}
|
||
|
||
# Copy game data from existing if available
|
||
if existing_order.get('leader'):
|
||
order_entry['leader'] = existing_order['leader']
|
||
if existing_order.get('territory'):
|
||
order_entry['territory'] = existing_order['territory']
|
||
if existing_order.get('features'):
|
||
order_entry['features'] = existing_order['features']
|
||
|
||
clades_output[clade_id]['orders'].append(order_entry)
|
||
|
||
# ─── 6. Output ───
|
||
output = {
|
||
'project': 'N01_植物帝国企划',
|
||
'description': '基于APG IV植物分类系统的AVG游戏阵营设定',
|
||
'version': '2.0.0',
|
||
'lastUpdated': '2026-04-19',
|
||
'hierarchy': ['演化支', '类群', '目', '科'],
|
||
'data': list(clades_output.values())
|
||
}
|
||
|
||
total_orders = sum(len(c['orders']) for c in output['data'])
|
||
total_families = sum(len(o['children']) for c in output['data'] for o in c['orders'])
|
||
total_species = sum(o['stats']['species'] for c in output['data'] for o in c['orders'])
|
||
|
||
print(f'\n=== Final Output ===')
|
||
print(f'Clades: {len(output["data"])}')
|
||
print(f'Orders: {total_orders}')
|
||
print(f'Families: {total_families}')
|
||
print(f'Species: {total_species:,}')
|
||
|
||
with open('Dashboard/data/n01_phylums.json', 'w', encoding='utf-8') as f:
|
||
json.dump(output, f, ensure_ascii=False, indent=2)
|
||
|
||
print('\nSaved to Dashboard/data/n01_phylums.json')
|