Add Dashboard Codex voice input

2026-06-29 16:28:06 +08:00 · 2026-06-29 16:28:06 +08:00 · 2f29d78b5a
commit 2f29d78b5a
parent fb3834240d
4 changed files with 296 additions and 2 deletions
--- a/Tools/Dashboard/css/style.css
+++ b/Tools/Dashboard/css/style.css
@ -4087,6 +4087,19 @@ body::after {
    flex-wrap: wrap;
 }

+.codex-voice-btn.recording {
+    border-color: #dc2626;
+    background: #dc2626;
+    color: #fff;
+}
+
+.codex-voice-status {
+    color: var(--text-secondary);
+    font-size: 12px;
+    font-weight: 700;
+    min-height: 18px;
+}
+
 .codex-job-status {
    color: var(--text-secondary);
    font-size: 12px;
--- a/Tools/Dashboard/index.html
+++ b/Tools/Dashboard/index.html
@ -837,15 +837,17 @@
                    <div class="codex-detail-head">
                        <div>
                            <div class="codex-detail-title" id="codex-detail-title">选择一个 TH1 会话</div>
-                            <div class="codex-detail-meta" id="codex-detail-meta">本面板只显示工作目录为 C:\TH1\TH1 的本机 Codex 会话。</div>
+                            <div class="codex-detail-meta" id="codex-detail-meta">本面板读取 ~/.codex/sessions 的本机 CLI 会话，只显示工作目录为 C:\TH1\TH1 的记录；这些记录不一定出现在桌面版侧栏线程里。</div>
                        </div>
                    </div>

                    <div class="codex-runner">
                        <textarea id="codex-prompt" class="codex-prompt" rows="5" placeholder="输入要交给本机 Codex 执行的任务..."></textarea>
                        <div class="codex-runner-actions">
+                            <button class="bug-btn codex-voice-btn" id="codex-voice-toggle" type="button">语音输入</button>
                            <button class="bug-btn" id="codex-run-new" type="button">新会话执行</button>
                            <button class="bug-btn bug-btn-primary" id="codex-run-resume" type="button" disabled>续接所选会话执行</button>
+                            <span id="codex-voice-status" class="codex-voice-status"></span>
                            <span id="codex-job-status" class="codex-job-status"></span>
                        </div>
                        <pre id="codex-job-output" class="codex-job-output"></pre>
--- a/Tools/Dashboard/js/codex_threads.js
+++ b/Tools/Dashboard/js/codex_threads.js
@ -6,6 +6,9 @@ let codexSessions = [];
 let codexLoaded = false;
 let codexSelectedId = '';
 let codexPollTimer = null;
+let codexRecorder = null;
+let codexVoiceChunks = [];
+let codexVoiceStream = null;

 function codexEsc(value) {
    const div = document.createElement('div');
@ -24,6 +27,145 @@ function codexShortId(id) {
    return id ? id.slice(0, 8) : '';
 }

+function codexPreferredAudioMimeType() {
+    const candidates = [
+        'audio/webm;codecs=opus',
+        'audio/webm',
+        'audio/mp4',
+        'audio/wav',
+    ];
+    if (!window.MediaRecorder || !MediaRecorder.isTypeSupported) return '';
+    return candidates.find(type => MediaRecorder.isTypeSupported(type)) || '';
+}
+
+function codexAudioExtension(mimeType) {
+    const lower = (mimeType || '').toLowerCase();
+    if (lower.includes('mp4')) return 'm4a';
+    if (lower.includes('wav')) return 'wav';
+    if (lower.includes('mpeg') || lower.includes('mp3')) return 'mp3';
+    if (lower.includes('ogg')) return 'ogg';
+    return 'webm';
+}
+
+function codexSetVoiceStatus(text) {
+    const el = document.getElementById('codex-voice-status');
+    if (el) el.textContent = text || '';
+}
+
+function codexSetVoiceRecording(recording) {
+    const btn = document.getElementById('codex-voice-toggle');
+    if (!btn) return;
+    btn.classList.toggle('recording', recording);
+    btn.textContent = recording ? '停止录音' : '语音输入';
+}
+
+function codexCleanupVoiceStream() {
+    if (!codexVoiceStream) return;
+    codexVoiceStream.getTracks().forEach(track => track.stop());
+    codexVoiceStream = null;
+}
+
+function codexBlobToBase64(blob) {
+    return new Promise((resolve, reject) => {
+        const reader = new FileReader();
+        reader.onload = () => {
+            const value = String(reader.result || '');
+            resolve(value.includes(',') ? value.split(',', 2)[1] : value);
+        };
+        reader.onerror = () => reject(reader.error || new Error('读取录音失败'));
+        reader.readAsDataURL(blob);
+    });
+}
+
+function codexAppendPromptText(text) {
+    const promptEl = document.getElementById('codex-prompt');
+    if (!promptEl || !text) return;
+    const current = promptEl.value || '';
+    const separator = current.trim() ? (current.endsWith('\n') ? '' : '\n') : '';
+    promptEl.value = current + separator + text.trim();
+    promptEl.focus();
+    promptEl.selectionStart = promptEl.selectionEnd = promptEl.value.length;
+}
+
+async function codexStartVoiceInput() {
+    if (!navigator.mediaDevices?.getUserMedia || !window.MediaRecorder) {
+        codexSetVoiceStatus('当前浏览器不支持录音');
+        return;
+    }
+    try {
+        codexVoiceChunks = [];
+        const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+        codexVoiceStream = stream;
+        const mimeType = codexPreferredAudioMimeType();
+        codexRecorder = new MediaRecorder(stream, mimeType ? { mimeType } : undefined);
+        codexRecorder.addEventListener('dataavailable', event => {
+            if (event.data && event.data.size > 0) codexVoiceChunks.push(event.data);
+        });
+        codexRecorder.addEventListener('stop', () => {
+            const blob = new Blob(codexVoiceChunks, { type: codexRecorder?.mimeType || mimeType || 'audio/webm' });
+            codexRecorder = null;
+            codexCleanupVoiceStream();
+            codexSetVoiceRecording(false);
+            codexFinishVoiceInput(blob);
+        });
+        codexRecorder.addEventListener('error', event => {
+            codexSetVoiceStatus(event.error?.message || '录音失败');
+            codexRecorder = null;
+            codexCleanupVoiceStream();
+            codexSetVoiceRecording(false);
+        });
+        codexRecorder.start();
+        codexSetVoiceRecording(true);
+        codexSetVoiceStatus('录音中，再点一次停止');
+    } catch (err) {
+        codexRecorder = null;
+        codexCleanupVoiceStream();
+        codexSetVoiceRecording(false);
+        codexSetVoiceStatus(err?.message || '无法启动麦克风');
+    }
+}
+
+function codexStopVoiceInput() {
+    if (codexRecorder && codexRecorder.state !== 'inactive') {
+        codexRecorder.stop();
+    }
+}
+
+async function codexToggleVoiceInput() {
+    if (codexRecorder && codexRecorder.state === 'recording') {
+        codexStopVoiceInput();
+    } else {
+        await codexStartVoiceInput();
+    }
+}
+
+async function codexFinishVoiceInput(blob) {
+    if (!blob || blob.size === 0) {
+        codexSetVoiceStatus('没有录到声音');
+        return;
+    }
+    codexSetVoiceStatus('正在转写...');
+    try {
+        const audioBase64 = await codexBlobToBase64(blob);
+        const resp = await fetch('/api/codex/transcribe', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({
+                audioBase64,
+                mimeType: blob.type || 'audio/webm',
+                filename: `codex-voice-${Date.now()}.${codexAudioExtension(blob.type)}`,
+                language: 'zh',
+            }),
+        });
+        const data = await resp.json();
+        if (!resp.ok || !data.success) throw new Error(data.error || `HTTP ${resp.status}`);
+        codexAppendPromptText(data.text || '');
+        codexSetVoiceStatus(data.model ? `已转成文字 · ${data.model}` : '已转成文字');
+    } catch (err) {
+        codexSetVoiceStatus(err?.message || '转写失败');
+    }
+}
+
 async function codexLoadSessions(force = false) {
    if (codexLoaded && !force) return;
    const list = document.getElementById('codex-session-list');
@ -113,7 +255,7 @@ async function codexSelectSession(id) {
        const session = data.session || {};
        if (title) title.textContent = session.title || session.id || 'Codex 会话';
        if (meta) {
-            meta.textContent = `${codexFormatDate(session.updated_at || session.created_at)} · ${session.id || ''} · ${session.archived ? '归档' : '当前'}`;
+            meta.textContent = `${codexFormatDate(session.updated_at || session.created_at)} · ${session.id || ''} · ${session.archived ? '归档' : '当前'} · 本机 CLI 会话`;
        }
        codexRenderMessages(data.messages || []);
    } catch (err) {
@ -237,6 +379,11 @@ function codexBind() {
        refresh.dataset.bound = '1';
        refresh.addEventListener('click', () => codexLoadSessions(true));
    }
+    const voice = document.getElementById('codex-voice-toggle');
+    if (voice && !voice.dataset.bound) {
+        voice.dataset.bound = '1';
+        voice.addEventListener('click', codexToggleVoiceInput);
+    }
    const runNew = document.getElementById('codex-run-new');
    if (runNew && !runNew.dataset.bound) {
        runNew.dataset.bound = '1';
@ -261,6 +408,7 @@ function codexBind() {
    if (panel) {
        observer.observe(panel, { attributes: true, attributeFilter: ['class'] });
    }
+    window.addEventListener('beforeunload', codexCleanupVoiceStream);
    document.addEventListener('DOMContentLoaded', () => {
        codexBind();
        const panelNow = document.getElementById('panel-codex');
--- a/Tools/Dashboard/serve.py
+++ b/Tools/Dashboard/serve.py
@ -54,6 +54,8 @@ Usage:
 """

 import http.server
+import base64
+import binascii
 import csv
 import json
 import os
@ -103,6 +105,10 @@ CODEX_HOME = os.path.join(os.path.expanduser('~'), '.codex')
 CODEX_SESSIONS_DIR = os.path.join(CODEX_HOME, 'sessions')
 CODEX_ARCHIVED_SESSIONS_DIR = os.path.join(CODEX_HOME, 'archived_sessions')
 CODEX_SESSION_INDEX = os.path.join(CODEX_HOME, 'session_index.jsonl')
+CODEX_TRANSCRIBE_MAX_BYTES = 25 * 1024 * 1024
+OPENROUTER_ENV_FILE = os.path.join(SCRIPT_DIR, 'private', 'community_monitor.env')
+OPENROUTER_TRANSCRIBE_ENDPOINT = 'https://openrouter.ai/api/v1/audio/transcriptions'
+OPENROUTER_TRANSCRIBE_DEFAULT_MODEL = 'openai/whisper-large-v3'
 CODEX_JOBS = {}
 CODEX_JOBS_LOCK = threading.Lock()
 COMMUNITY_MONITOR_API = None
@ -418,6 +424,30 @@ def _read_jsonl_records(path, limit=None):
    return records


+def _load_dashboard_env_file(path):
+    if not os.path.exists(path):
+        return
+    try:
+        with open(path, 'r', encoding='utf-8-sig', errors='replace') as f:
+            for raw_line in f:
+                line = raw_line.strip()
+                if not line or line.startswith('#') or '=' not in line:
+                    continue
+                key, value = line.split('=', 1)
+                key = key.strip()
+                value = value.strip().strip('"').strip("'")
+                if key and key not in os.environ:
+                    os.environ[key] = value
+    except Exception:
+        pass
+
+
+def _openrouter_setting(name, default=''):
+    if name not in os.environ:
+        _load_dashboard_env_file(OPENROUTER_ENV_FILE)
+    return os.environ.get(name, default)
+
+
 def _message_text(content):
    if isinstance(content, str):
        return content
@ -432,6 +462,90 @@ def _message_text(content):
    return '\n'.join(part for part in parts if part)


+def _decode_codex_audio_base64(value):
+    if not isinstance(value, str) or not value.strip():
+        raise ValueError('audioBase64 required')
+    cleaned = value.strip()
+    if ',' in cleaned and cleaned[:80].lower().startswith('data:'):
+        cleaned = cleaned.split(',', 1)[1]
+    try:
+        data = base64.b64decode(cleaned, validate=True)
+    except (binascii.Error, ValueError):
+        raise ValueError('invalid audioBase64')
+    if not data:
+        raise ValueError('audio is empty')
+    if len(data) > CODEX_TRANSCRIBE_MAX_BYTES:
+        raise ValueError('audio too large')
+    return data
+
+
+def _codex_audio_format(filename='', mime_type=''):
+    source = f'{mime_type or ""} {filename or ""}'.lower()
+    for fmt in ('webm', 'wav', 'mp3', 'flac', 'm4a', 'ogg', 'aac'):
+        if fmt in source:
+            return fmt
+    return 'webm'
+
+
+def _codex_transcribe_audio(audio_base64, filename='', mime_type='', language=''):
+    api_key = _openrouter_setting('OPENROUTER_API_KEY', '').strip()
+    if not api_key:
+        raise RuntimeError('OPENROUTER_API_KEY is not configured')
+
+    audio_bytes = _decode_codex_audio_base64(audio_base64)
+    model = _openrouter_setting('OPENROUTER_TRANSCRIBE_MODEL', OPENROUTER_TRANSCRIBE_DEFAULT_MODEL).strip()
+    if not model:
+        model = OPENROUTER_TRANSCRIBE_DEFAULT_MODEL
+
+    payload = {
+        'model': model,
+        'input_audio': {
+            'data': base64.b64encode(audio_bytes).decode('ascii'),
+            'format': _codex_audio_format(filename, mime_type),
+        },
+    }
+    if language:
+        payload['language'] = str(language)[:32]
+
+    body = json.dumps(payload, ensure_ascii=False).encode('utf-8')
+    request = urllib.request.Request(
+        OPENROUTER_TRANSCRIBE_ENDPOINT,
+        data=body,
+        headers={
+            'Authorization': f'Bearer {api_key}',
+            'Content-Type': 'application/json',
+            'HTTP-Referer': _openrouter_setting('OPENROUTER_REFERER', 'http://localhost:8080'),
+            'X-Title': _openrouter_setting('OPENROUTER_TITLE', 'TH1 Dashboard Codex Voice Input'),
+        },
+        method='POST',
+    )
+    try:
+        with urllib.request.urlopen(request, timeout=120) as resp:
+            raw = resp.read().decode('utf-8', errors='replace')
+    except urllib.error.HTTPError as e:
+        detail = e.read().decode('utf-8', errors='replace')[:1200]
+        raise RuntimeError(f'OpenRouter transcription failed: HTTP {e.code} {detail}')
+    except urllib.error.URLError as e:
+        raise RuntimeError(f'OpenRouter transcription failed: {e.reason}')
+
+    try:
+        data = json.loads(raw)
+    except Exception:
+        raise RuntimeError('OpenRouter transcription returned invalid JSON')
+    text = str(data.get('text') or '').strip()
+    if not text and isinstance(data.get('choices'), list) and data['choices']:
+        choice = data['choices'][0] or {}
+        message = choice.get('message') if isinstance(choice.get('message'), dict) else {}
+        text = str(message.get('content') or choice.get('text') or '').strip()
+    if not text:
+        raise RuntimeError('OpenRouter transcription returned empty text')
+    return {
+        'text': text,
+        'model': data.get('model') or model,
+        'duration': data.get('duration'),
+    }
+
+
 def _is_codex_context_message(role, text):
    if role != 'user':
        return False
@ -1112,6 +1226,8 @@ class DashboardHandler(http.server.SimpleHTTPRequestHandler):
            self._handle_dashboard_preferences_save()
        elif self.path == '/api/codex/run':
            self._handle_codex_run()
+        elif self.path == '/api/codex/transcribe':
+            self._handle_codex_transcribe()
        elif self.path.startswith('/api/community-monitor/'):
            self._handle_community_monitor_post()
        # SNS APIs
@ -1516,6 +1632,21 @@ class DashboardHandler(http.server.SimpleHTTPRequestHandler):
        except Exception as e:
            self._send_json({'success': False, 'error': str(e)}, 500)

+    def _handle_codex_transcribe(self):
+        try:
+            payload = self._read_json_body()
+            result = _codex_transcribe_audio(
+                payload.get('audioBase64'),
+                filename=str(payload.get('filename') or ''),
+                mime_type=str(payload.get('mimeType') or ''),
+                language=str(payload.get('language') or 'zh'),
+            )
+            self._send_json({'success': True, **result})
+        except ValueError as e:
+            self._send_json({'success': False, 'error': str(e)}, 400)
+        except Exception as e:
+            self._send_json({'success': False, 'error': str(e)}, 500)
+
    def _handle_codex_job_get(self):
        try:
            parsed = urlparse(self.path)