2026-05-30 23:30:55 +08:00

718 lines
23 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
from hashlib import sha1
from html import escape
import threading
import time
from typing import Any
from fastapi import FastAPI, Form, Query
from fastapi.responses import HTMLResponse, RedirectResponse
from .config import Settings, get_settings
from .db import decode_json, init_db, session
from .models import RawItem
from .openrouter import OpenRouterClient
from .sync import analyze_pending, run_sync, save_analysis, upsert_raw_item
app = FastAPI(title="TOHOTOPIA Steam Monitor")
sync_lock = threading.Lock()
analysis_lock = threading.Lock()
stop_event = threading.Event()
def current_settings() -> Settings:
return get_settings()
def _fmt_ts(value: int | None) -> str:
if not value:
return ""
return time.strftime("%Y-%m-%d %H:%M", time.localtime(int(value)))
def _badge(text: str, cls: str = "") -> str:
return f'<span class="badge {cls}">{escape(text)}</span>'
def _manual_item_id(source_url: str, source_name: str, title: str, author_name: str, content: str) -> str:
seed = source_url.strip() or "\n".join(
[source_name.strip(), title.strip(), author_name.strip(), content.strip()]
)
return sha1(seed.encode("utf-8", errors="ignore")).hexdigest()
def _looks_chinese(text: str) -> bool:
letters = [char for char in text if char.isalpha()]
if not letters:
return True
cjk_count = sum(1 for char in letters if "\u4e00" <= char <= "\u9fff")
return cjk_count / len(letters) >= 0.2
def _query(filters: dict[str, str]) -> tuple[str, list[Any]]:
where = []
params: list[Any] = []
if filters.get("content_type"):
where.append("r.content_type = ?")
params.append(filters["content_type"])
if filters.get("sentiment"):
where.append("a.sentiment = ?")
params.append(filters["sentiment"])
if filters.get("status"):
where.append("w.status = ?")
params.append(filters["status"])
if filters.get("reply") == "1":
where.append("a.reply_recommended = 1")
if filters.get("actionable") == "1":
where.append("a.has_actionable_feedback = 1")
if filters.get("q"):
where.append("(r.content LIKE ? OR r.title LIKE ? OR a.summary LIKE ?)")
like = f"%{filters['q']}%"
params.extend([like, like, like])
clause = "WHERE " + " AND ".join(where) if where else ""
return clause, params
@app.on_event("startup")
def startup() -> None:
settings = current_settings()
with session(settings.database_path) as conn:
init_db(conn)
if settings.auto_sync_enabled:
thread = threading.Thread(target=_sync_loop, name="steam-sync-loop", daemon=True)
thread.start()
@app.on_event("shutdown")
def shutdown() -> None:
stop_event.set()
def _sync_loop() -> None:
settings = current_settings()
interval_seconds = max(settings.sync_interval_minutes, 1) * 60
while not stop_event.wait(interval_seconds):
if not sync_lock.acquire(blocking=False):
continue
try:
with session(settings.database_path) as conn:
run_sync(conn, settings, full=False)
except Exception:
# Sync failures are recorded in sync_runs by run_sync when possible.
pass
finally:
sync_lock.release()
@app.get("/", response_class=HTMLResponse)
def index(
content_type: str = Query(""),
sentiment: str = Query(""),
status: str = Query(""),
reply: str = Query(""),
actionable: str = Query(""),
q: str = Query(""),
manual: str = Query(""),
notice: str = Query(""),
) -> str:
settings = current_settings()
filters = {
"content_type": content_type,
"sentiment": sentiment,
"status": status,
"reply": reply,
"actionable": actionable,
"q": q,
}
with session(settings.database_path) as conn:
clause, params = _query(filters)
rows = conn.execute(
f"""
SELECT r.*, a.sentiment, a.is_positive, a.is_negative,
a.has_actionable_feedback, a.feedback_types, a.reply_recommended,
a.reply_priority, a.reply_suggestion, a.summary, a.priority,
a.confidence, a.reason, w.status, w.owner, w.notes
FROM raw_items r
LEFT JOIN analysis_results a ON a.raw_item_id = r.id
LEFT JOIN work_items w ON w.raw_item_id = r.id
{clause}
ORDER BY
COALESCE(a.reply_recommended, 0) DESC,
COALESCE(r.published_at, r.collected_at) DESC,
r.collected_at DESC,
r.id DESC
LIMIT 200
""",
params,
).fetchall()
metrics = conn.execute(
"""
SELECT
COUNT(*) AS total,
SUM(CASE WHEN w.status = 'new' THEN 1 ELSE 0 END) AS new_count,
SUM(CASE WHEN a.is_negative = 1 THEN 1 ELSE 0 END) AS negative_count,
SUM(CASE WHEN a.has_actionable_feedback = 1 THEN 1 ELSE 0 END) AS actionable_count,
SUM(CASE WHEN a.reply_recommended = 1 THEN 1 ELSE 0 END) AS reply_count,
SUM(CASE WHEN a.priority = 'high' THEN 1 ELSE 0 END) AS high_count,
SUM(CASE WHEN r.analysis_status = 'done' THEN 1 ELSE 0 END) AS analyzed_count,
SUM(CASE WHEN r.analysis_status = 'pending' THEN 1 ELSE 0 END) AS pending_count,
SUM(CASE WHEN r.analysis_status = 'error' THEN 1 ELSE 0 END) AS error_count
FROM raw_items r
LEFT JOIN analysis_results a ON a.raw_item_id = r.id
LEFT JOIN work_items w ON w.raw_item_id = r.id
"""
).fetchone()
last_runs = conn.execute(
"SELECT * FROM sync_runs ORDER BY started_at DESC LIMIT 5"
).fetchall()
last_success = conn.execute(
"""
SELECT finished_at FROM sync_runs
WHERE status = 'success' AND finished_at IS NOT NULL
ORDER BY finished_at DESC
LIMIT 1
"""
).fetchone()
latest_collected = conn.execute(
"SELECT MAX(collected_at) AS collected_at FROM raw_items"
).fetchone()
items_html = "\n".join(_render_item(row) for row in rows)
runs_html = "\n".join(
f"<li>{_fmt_ts(run['started_at'])} {escape(run['mode'])} "
f"{escape(run['status'])} {escape(run['stats_json'] or '')} {escape(run['message'] or '')}</li>"
for run in last_runs
)
return f"""
<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>{escape(settings.product_name)} 社区监控</title>
<style>{CSS}</style>
</head>
<body>
<header>
<div>
<h1>{escape(settings.product_name)} 社区监控</h1>
<p>Steam 与社区平台内容,每 {settings.sync_interval_minutes} 分钟刷新</p>
<p>最近更新时间:{_last_update_text(last_success, latest_collected)}</p>
</div>
<div class="actions">
<form method="post" action="/sync"><button>增量同步</button></form>
<form method="post" action="/sync?full=1"><button class="secondary">全量同步</button></form>
<form method="post" action="/analyze-pending"><button class="secondary">补跑分析</button></form>
<a class="button secondary" href="/?manual=1">手动添加</a>
</div>
</header>
<section class="metrics">
{_metric("总内容", metrics["total"])}
{_metric("未处理", metrics["new_count"])}
{_metric("差评/负面", metrics["negative_count"])}
{_metric("具体反馈", metrics["actionable_count"])}
{_metric("建议回复", metrics["reply_count"])}
{_metric("高优先级", metrics["high_count"])}
{_metric("已分析", metrics["analyzed_count"])}
{_metric("待补跑", (metrics["pending_count"] or 0) + (metrics["error_count"] or 0))}
</section>
{f'<div class="notice">{escape(notice)}</div>' if notice else ''}
{_render_manual_form() if manual == '1' else ''}
<form class="filters" method="get">
{_select("content_type", content_type, {"": "全部类型", "review": "Steam 评测", "discussion_topic": "Steam 帖子", "discussion_reply": "Steam 回复", "twitter_post": "Twitter 帖子", "twitter_reply": "Twitter 回复", "manual_note": "手动添加"})}
{_select("sentiment", sentiment, {"": "全部情绪", "positive": "正面", "negative": "负面", "mixed": "混合", "neutral": "中性"})}
{_select("status", status, {"": "全部状态", "new": "未处理", "read": "已读", "needs_reply": "待回复", "replied": "已回复", "needs_fix": "待修复", "archived": "已归档"})}
<label><input type="checkbox" name="reply" value="1" {'checked' if reply == '1' else ''}> 建议回复</label>
<label><input type="checkbox" name="actionable" value="1" {'checked' if actionable == '1' else ''}> 具体反馈</label>
<input name="q" placeholder="搜索正文/摘要" value="{escape(q)}">
<button>筛选</button>
</form>
<main>{items_html or '<div class="empty">暂无数据。先运行同步。</div>'}</main>
<aside>
<h2>最近同步</h2>
<ul>{runs_html or '<li>暂无同步记录</li>'}</ul>
</aside>
</body>
</html>
"""
@app.post("/sync")
def sync(full: int = Query(0)) -> RedirectResponse:
if sync_lock.acquire(blocking=False):
thread = threading.Thread(target=_run_sync_background, args=(bool(full),), daemon=True)
thread.start()
return RedirectResponse("/?notice=同步已在后台开始,稍后刷新查看结果", status_code=303)
return RedirectResponse("/?notice=已有同步任务正在运行", status_code=303)
@app.post("/analyze-pending")
def analyze() -> RedirectResponse:
if analysis_lock.acquire(blocking=False):
thread = threading.Thread(target=_run_analysis_background, kwargs={"limit": 20}, daemon=True)
thread.start()
return RedirectResponse("/?notice=补跑分析已在后台开始,每批最多 20 条,稍后刷新查看结果", status_code=303)
return RedirectResponse("/?notice=已有补跑分析正在运行", status_code=303)
@app.post("/manual-items")
def create_manual_item(
source_name: str = Form(...),
source_url: str = Form(""),
title: str = Form(""),
author_name: str = Form(""),
published_at_text: str = Form(""),
content: str = Form(...),
status: str = Form("new"),
owner: str = Form(""),
notes: str = Form(""),
) -> RedirectResponse:
source_name = source_name.strip()
source_url = source_url.strip()
title = title.strip()
author_name = author_name.strip()
published_at_text = published_at_text.strip()
content = content.strip()
status = status if status in _work_status_options() else "new"
if not source_name or not content:
return RedirectResponse("/?manual=1&notice=来源社群和正文不能为空", status_code=303)
original_content = content
translated = False
analysis_error = ""
settings = current_settings()
analyzer = OpenRouterClient(settings)
try:
if not _looks_chinese(content):
content = analyzer.translate_to_chinese(content)
translated = content != original_content
except Exception as exc: # noqa: BLE001 - keep manual entry even if translation fails
analysis_error = f"翻译失败,已保留原文并标记待补跑:{exc}"
item = RawItem(
source="manual",
source_item_id=_manual_item_id(source_url, source_name, title, author_name, content),
source_url=source_url,
content_type="manual_note",
author_id=None,
author_name=author_name or source_name,
title=title or f"{source_name} 手动信息",
published_at=None,
published_at_text=published_at_text,
updated_at_source=None,
content=content,
raw={
"source_name": source_name,
"source_url": source_url,
"title": title,
"author_name": author_name,
"published_at_text": published_at_text,
"original_content": original_content,
"translated_to_zh": translated,
"manual": True,
},
)
now = int(time.time())
try:
with session(settings.database_path) as conn:
raw_item_id, inserted = upsert_raw_item(conn, item)
conn.execute(
"""
UPDATE work_items
SET status = ?, owner = ?, notes = ?, updated_at = ?,
last_handled_at = CASE WHEN ? != 'new' THEN ? ELSE last_handled_at END
WHERE raw_item_id = ?
""",
(status, owner.strip(), notes.strip(), now, status, now, raw_item_id),
)
if not analysis_error:
try:
analysis = analyzer.analyze(item)
save_analysis(conn, raw_item_id, settings.openrouter_model, analysis)
except Exception as exc: # noqa: BLE001 - keep pending/error for analyze-pending
analysis_error = f"分析失败,已标记待补跑:{exc}"
conn.execute(
"UPDATE raw_items SET analysis_status = 'error' WHERE id = ?",
(raw_item_id,),
)
finally:
analyzer.close()
parts = ["已添加手动信息" if inserted else "已更新同来源手动信息"]
if translated:
parts.append("已翻译成中文")
if analysis_error:
parts.append(analysis_error)
else:
parts.append("已生成是否回复和回复建议")
notice = "".join(parts)
return RedirectResponse(f"/?notice={notice}", status_code=303)
@app.post("/items/{raw_item_id}/work")
def update_work(
raw_item_id: int,
status: str = Form(...),
owner: str = Form(""),
notes: str = Form(""),
) -> RedirectResponse:
settings = current_settings()
now = int(time.time())
with session(settings.database_path) as conn:
conn.execute(
"""
UPDATE work_items
SET status = ?, owner = ?, notes = ?, updated_at = ?,
last_handled_at = CASE WHEN ? != 'new' THEN ? ELSE last_handled_at END
WHERE raw_item_id = ?
""",
(status, owner, notes, now, status, now, raw_item_id),
)
return RedirectResponse("/", status_code=303)
def _run_sync_background(full: bool) -> None:
settings = current_settings()
try:
with session(settings.database_path) as conn:
run_sync(conn, settings, full=full)
finally:
sync_lock.release()
def _run_analysis_background(limit: int) -> None:
settings = current_settings()
try:
with session(settings.database_path) as conn:
analyze_pending(conn, settings, limit=limit)
finally:
analysis_lock.release()
def _notice_text(stats: dict[str, Any]) -> str:
if not stats:
return "无待处理项目"
return "".join(f"{key}={value}" for key, value in stats.items())
def _last_update_text(last_success: Any, latest_collected: Any) -> str:
if last_success and last_success["finished_at"]:
return _fmt_ts(last_success["finished_at"])
if latest_collected and latest_collected["collected_at"]:
return _fmt_ts(latest_collected["collected_at"])
return "暂无"
def _metric(label: str, value: Any) -> str:
return f'<div class="metric"><span>{escape(label)}</span><strong>{int(value or 0)}</strong></div>'
def _select(name: str, current: str, options: dict[str, str]) -> str:
option_html = "".join(
f'<option value="{escape(value)}" {"selected" if value == current else ""}>{escape(label)}</option>'
for value, label in options.items()
)
return f'<select name="{escape(name)}">{option_html}</select>'
def _work_status_options() -> dict[str, str]:
return {
"new": "未处理",
"read": "已读",
"needs_reply": "待回复",
"replied": "已回复",
"needs_fix": "待修复",
"archived": "已归档",
}
def _render_manual_form() -> str:
return f"""
<section class="manual-panel">
<h2>手动添加社区信息</h2>
<form class="manual-form" method="post" action="/manual-items">
<input name="source_name" placeholder="来源社群/平台,例如 Discord、小红书、QQ群" required>
<input name="source_url" placeholder="原始链接,可留空">
<input name="title" placeholder="标题,可留空">
<input name="author_name" placeholder="作者/昵称,可留空">
<input name="published_at_text" placeholder="发布时间文本,可留空">
<textarea name="content" placeholder="正文/摘要" required></textarea>
{_select("status", "new", _work_status_options())}
<input name="owner" placeholder="制作人/处理人">
<input name="notes" placeholder="备注">
<button>添加</button>
</form>
</section>
"""
def _render_item(row: Any) -> str:
feedback_types = ", ".join(decode_json(row["feedback_types"], [])) if row["feedback_types"] else ""
cls = "item urgent" if row["reply_recommended"] or row["priority"] == "high" else "item"
badges = [
_badge(row["content_type"] or "", "type"),
_badge(row["sentiment"] or "pending", row["sentiment"] or ""),
_badge(row["priority"] or "low", "priority"),
]
if row["has_actionable_feedback"]:
badges.append(_badge("具体反馈", "action"))
if row["reply_recommended"]:
badges.append(_badge("建议回复", "reply"))
content = escape(row["content"] or "")
if len(content) > 900:
content = content[:900] + "..."
return f"""
<article class="{cls}">
<div class="item-head">
<div>
<h2>{escape(row['summary'] or row['title'] or '未分析')}</h2>
<div class="meta">{' '.join(badges)} <span>{escape(row['author_name'] or '')}</span> <span>{_fmt_ts(row['published_at']) or escape(row['published_at_text'] or '')}</span></div>
</div>
{_source_link(row['source_url'])}
</div>
<p class="content">{content}</p>
<p class="reason">{escape(row['reason'] or '')}</p>
<p class="reply-suggestion">{escape(row['reply_suggestion'] or '')}</p>
<p class="types">{escape(feedback_types)}</p>
<form class="work" method="post" action="/items/{row['id']}/work">
{_select("status", row["status"] or "new", _work_status_options())}
<input name="owner" placeholder="制作人/处理人" value="{escape(row['owner'] or '')}">
<input name="notes" placeholder="备注" value="{escape(row['notes'] or '')}">
<button>保存</button>
</form>
</article>
"""
def _source_link(source_url: str | None) -> str:
if not source_url:
return '<span class="source muted">无原始链接</span>'
if not source_url.startswith(("http://", "https://")):
return f'<span class="source muted">{escape(source_url)}</span>'
return (
f'<a class="source" href="{escape(source_url)}" target="_blank" '
f'rel="noreferrer">原始链接</a>'
)
CSS = """
:root {
color-scheme: light;
font-family: Inter, "Segoe UI", "Microsoft YaHei", sans-serif;
background: #f6f7f9;
color: #1f2933;
}
body {
margin: 0;
}
header {
display: flex;
justify-content: space-between;
gap: 24px;
align-items: center;
padding: 24px 32px;
background: #ffffff;
border-bottom: 1px solid #d9dee7;
}
h1 {
margin: 0 0 4px;
font-size: 24px;
}
p {
line-height: 1.5;
}
header p {
margin: 0;
color: #64748b;
}
.actions {
display: flex;
gap: 8px;
flex-wrap: wrap;
}
button, .button, select, input, textarea {
min-height: 36px;
border: 1px solid #cbd5e1;
border-radius: 6px;
padding: 0 12px;
background: #fff;
font: inherit;
}
button, .button {
display: inline-flex;
align-items: center;
background: #166534;
color: white;
border-color: #166534;
cursor: pointer;
text-decoration: none;
}
button.secondary, .button.secondary {
background: #334155;
border-color: #334155;
}
.metrics {
display: grid;
grid-template-columns: repeat(6, minmax(120px, 1fr));
gap: 12px;
padding: 18px 32px;
}
.metric {
background: #fff;
border: 1px solid #d9dee7;
border-radius: 8px;
padding: 14px;
}
.metric span {
display: block;
color: #64748b;
font-size: 13px;
}
.metric strong {
display: block;
font-size: 26px;
margin-top: 6px;
}
.filters {
display: flex;
gap: 10px;
flex-wrap: wrap;
align-items: center;
padding: 0 32px 18px;
}
.manual-panel {
margin: 0 32px 18px;
padding: 18px;
border: 1px solid #d9dee7;
border-radius: 8px;
background: #fff;
}
.manual-panel h2 {
margin: 0 0 12px;
font-size: 17px;
}
.manual-form {
display: grid;
grid-template-columns: repeat(3, minmax(160px, 1fr));
gap: 10px;
}
.manual-form textarea {
grid-column: 1 / -1;
min-height: 120px;
padding: 10px 12px;
resize: vertical;
}
.notice {
margin: 0 32px 18px;
padding: 12px 14px;
border: 1px solid #86efac;
border-radius: 8px;
background: #f0fdf4;
color: #166534;
}
main {
display: grid;
gap: 14px;
padding: 0 32px 24px;
}
.item {
background: #fff;
border: 1px solid #d9dee7;
border-radius: 8px;
padding: 18px;
}
.item.urgent {
border-color: #dc2626;
box-shadow: inset 4px 0 0 #dc2626;
}
.item-head {
display: flex;
justify-content: space-between;
gap: 16px;
align-items: flex-start;
}
.item h2 {
margin: 0 0 8px;
font-size: 17px;
}
.meta {
display: flex;
gap: 8px;
align-items: center;
flex-wrap: wrap;
color: #64748b;
font-size: 13px;
}
.badge {
display: inline-flex;
align-items: center;
min-height: 24px;
padding: 0 8px;
border-radius: 999px;
background: #e2e8f0;
color: #334155;
}
.badge.negative, .badge.reply {
background: #fee2e2;
color: #991b1b;
}
.badge.positive {
background: #dcfce7;
color: #166534;
}
.badge.action {
background: #fef3c7;
color: #92400e;
}
.source {
color: #166534;
white-space: nowrap;
}
.source.muted {
color: #64748b;
}
.content {
white-space: pre-wrap;
}
.reason, .reply-suggestion, .types {
color: #475569;
margin: 8px 0;
}
.reply-suggestion {
font-weight: 600;
}
.work {
display: grid;
grid-template-columns: 150px minmax(140px, 220px) 1fr 80px;
gap: 8px;
margin-top: 12px;
}
aside {
padding: 0 32px 32px;
color: #475569;
}
.empty {
background: #fff;
border: 1px solid #d9dee7;
border-radius: 8px;
padding: 32px;
}
@media (max-width: 900px) {
header, .item-head {
flex-direction: column;
}
.metrics {
grid-template-columns: repeat(2, minmax(120px, 1fr));
}
.work {
grid-template-columns: 1fr;
}
.manual-form {
grid-template-columns: 1fr;
}
}
"""