Spaces:

freuddeveloper
/

freud-zero-mvp

Sleeping

App Files Files Community

Feng Chike Claude Opus 4.6 (1M context) commited on Mar 21

Commit

408f650

0 Parent(s):

Freud Zero MVP: 心理咨询AI系统（清洁部署）

Browse files

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (13) hide show

.gitignore +14 -0
README.md +14 -0
app.py +4 -0
counselor.py +152 -0
evaluator.py +35 -0
main.py +227 -0
mcts_reasoner.py +184 -0
prompts.py +295 -0
requirements.txt +5 -0
session_logger.py +38 -0
strategic_advisor.py +533 -0
strategy_visualizer.py +126 -0
supervisor_advisor.py +122 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,14 @@

+venv/
+__pycache__/
+.gradio/
+.DS_Store
+sessions/
+*.pyc
+.env
+test_*.py
+test.py
+*.output
+.claude/
+.worktrees/
+INIT.md
+freud_zero_3day_plan.md

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: Freud-Zero MVP
+emoji: 🧠
+colorFrom: indigo
+colorTo: purple
+sdk: gradio
+sdk_version: "5.0"
+app_file: app.py
+pinned: false
+---
+# Freud-Zero MVP
+精神动力学取向回应性咨询师 · V4 PUCT 战略推理版

app.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from main import app
+if __name__ == "__main__":
+    app.launch(server_name="0.0.0.0", server_port=7860)

counselor.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import os
+import time
+import threading
+from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
+from langchain_openai import ChatOpenAI
+from prompts import COUNSELOR_SYSTEM_PROMPT, STRATEGIC_GUIDANCE_TEMPLATE
+from evaluator import DisclosureEvaluator
+from strategic_advisor import StrategicAdvisor
+from session_logger import SessionLogger
+from strategy_visualizer import StrategyVisualizer
+class PsychodynamicCounselor:
+    def __init__(self):
+        self.llm = ChatOpenAI(
+            model="qwen-plus",
+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+            api_key=os.getenv("DASHSCOPE_API_KEY"),
+            temperature=0.7,
+        )
+        self.evaluator = DisclosureEvaluator()
+        self.advisor = StrategicAdvisor()
+        self.logger = SessionLogger()
+        self.visualizer = StrategyVisualizer()
+        self.history = [SystemMessage(content=COUNSELOR_SYSTEM_PROMPT)]
+        self.turn_number = 0
+        self.current_guidance = None
+        self._last_disclosure_score = 1  # 跟踪当前揭露水平，供相对评分用
+        self._last_dimensions = {}  # 最近一次揭露维度 A-E
+        self._last_reasoning = ""  # 最近一次评估理由
+        self._disclosure_history = []  # 揭露分数历史轨迹
+        self._last_trace_stats = None  # 最近一次推理统计
+        self._pending_trace = None  # 后台完成的战略推理结果
+        self._bg_thread = None
+        self._lock = threading.Lock()
+    def _inject_guidance(self):
+        """后台线程调用：current_guidance 已更新，无需修改 history。
+        督导指令会在 respond() 调用模型时动态插入。"""
+        pass
+    def _run_strategic_reasoning(self, history_snapshot, current_disclosure):
+        """后台线程：执行4层战略推理，完成后更新指导。"""
+        print(f"[战略推理] 第{self.turn_number}轮触发，当前揭露={current_disclosure}，开始后台推理...")
+        try:
+            best, guidance, strategic_trace = self.advisor.run(history_snapshot, current_disclosure)
+            with self._lock:
+                if best and best.get("score", 0) > 0:
+                    self.current_guidance = {
+                        "direction": guidance.get("direction", best["seed"]),
+                        "principles": guidance.get("principles", []),
+                        "evidence": guidance.get("evidence", ""),
+                    }
+                    print(f"[战略推理] 完成! 选中: {best['id']}.{best['branch']} score={best['score']} delta={best['delta']}")
+                    print(f"  方向: {guidance.get('direction', '?')}")
+                    for p in guidance.get("principles", []):
+                        print(f"  原则: {p}")
+                else:
+                    print("[战略推理] 完成，但未产生有效方向建议")
+                self._pending_trace = strategic_trace
+                # 生成可视化报告
+                self.visualizer.render(strategic_trace, self.turn_number)
+        except Exception as e:
+            print(f"[战略推理] 后台推理失败，跳过本轮: {e}")
+    def respond(self, user_message):
+        self.turn_number += 1
+        self.history.append(HumanMessage(content=user_message))
+        # 检查是否有后台完成的战略推理结果需要记录
+        logged_trace = None
+        with self._lock:
+            if self._pending_trace is not None:
+                logged_trace = self._pending_trace
+                self._pending_trace = None
+        # 评估来访者当前发言的揭露深度
+        disclosure_result = self.evaluator.evaluate_disclosure(user_message)
+        self._last_disclosure_score = disclosure_result["score"]
+        self._last_dimensions = disclosure_result.get("dimensions", {})
+        self._last_reasoning = disclosure_result.get("reasoning", "")
+        self._disclosure_history.append(disclosure_result["score"])
+        # 同步战略推理：先推理，再用督导结果回复
+        print(f"[战略推理] 第{self.turn_number}轮，当前揭露={self._last_disclosure_score}，同步推理中...")
+        try:
+            best, guidance, strategic_trace = self.advisor.run(
+                list(self.history), self._last_disclosure_score
+            )
+            if best and best.get("score", 0) > 0:
+                self.current_guidance = {
+                    "direction": guidance.get("direction", best["seed"]),
+                    "principles": guidance.get("principles", []),
+                    "evidence": guidance.get("evidence", ""),
+                }
+                print(f"[战略推理] 完成! {best['id']}.{best['branch']} score={best['score']} delta={best['delta']}")
+            logged_trace = strategic_trace
+            self._last_trace_stats = {
+                "total_paths": len(strategic_trace.get("candidates", [])),
+                "deep_paths": len(strategic_trace.get("deep_paths", [])),
+                "seeds": list(strategic_trace.get("seeds", {}).keys()),
+                "selected": strategic_trace.get("selected", ""),
+                "timing": strategic_trace.get("timing", {}),
+                "best_score": best.get("score", 0) if best else 0,
+                "best_delta": best.get("delta", 0) if best else 0,
+                "predicted_disclosure": guidance.get("disclosure_level", best.get("score", "?")) if best else "?",
+            }
+            self.visualizer.render(strategic_trace, self.turn_number)
+        except Exception as e:
+            print(f"[战略推理] 推理失败，跳过: {e}")
+            logged_trace = None
+        # 前台模型生成回复：动态构建消息列表，督导指令插在最新用户消息之前
+        if self.current_guidance:
+            principles_text = "\n".join(f"- {p}" for p in self.current_guidance.get("principles", []))
+            guidance_text = STRATEGIC_GUIDANCE_TEMPLATE.replace(
+                "{direction}", self.current_guidance["direction"]
+            ).replace("{principles}", principles_text
+            ).replace("{evidence}", self.current_guidance.get("evidence", ""))
+            messages_to_send = self.history[:-1] + [SystemMessage(content=guidance_text)] + [self.history[-1]]
+            print(f"[前台] 第{self.turn_number}轮 | 督导方向已注入: {self.current_guidance['direction'][:40]}")
+        else:
+            messages_to_send = self.history
+            print(f"[前台] 第{self.turn_number}轮 | 无督导指令")
+        # 带重试的前台调用（防止并发限流 403）
+        for _retry in range(3):
+            try:
+                response = self.llm.invoke(messages_to_send)
+                break
+            except Exception as e:
+                if _retry < 2:
+                    print(f"[前台] 调用失败({e}), 重试中...")
+                    time.sleep(1)
+                else:
+                    raise
+        self.history.append(AIMessage(content=response.content))
+        self.logger.log_turn(
+            self.turn_number,
+            user_message,
+            response.content,
+            disclosure_result["score"],
+            disclosure_result["dimensions"],
+            disclosure_result["reasoning"],
+            mcts_trace=logged_trace,
+        )
+        return response.content
+    def get_session_filepath(self):
+        return self.logger.get_filepath()

evaluator.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import json
+import os
+from langchain_openai import ChatOpenAI
+from prompts import DISCLOSURE_EVAL_PROMPT
+class DisclosureEvaluator:
+    def __init__(self):
+        self.llm = ChatOpenAI(
+            model="qwen-turbo",
+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+            api_key=os.getenv("DASHSCOPE_API_KEY"),
+            temperature=0.0,
+            max_tokens=256,
+        )
+    def evaluate_disclosure(self, user_message, max_retries=2):
+        prompt = DISCLOSURE_EVAL_PROMPT.replace("{user_message}", user_message)
+        for attempt in range(max_retries + 1):
+            try:
+                llm_message = self.llm.invoke(prompt)
+                content = llm_message.content.strip()
+                start = content.find("{")
+                end = content.rfind("}") + 1
+                result = json.loads(content[start:end])
+                return {
+                    "score": max(1, min(10, int(result.get("score", 1)))),
+                    "dimensions": result.get("dimensions", {}),
+                    "reasoning": result.get("reasoning", ""),
+                }
+            except (json.JSONDecodeError, ValueError):
+                if attempt < max_retries:
+                    continue  # 再试一次
+        return {"score": 1, "dimensions": {}, "reasoning": "评估解析失败"}

main.py ADDED Viewed

	@@ -0,0 +1,227 @@

+import json
+import gradio as gr
+from pathlib import Path
+from counselor import PsychodynamicCounselor
+counselor = None
+def build_status_panel(c):
+    """构建会话状态 Markdown 富文本面板"""
+    score = c._last_disclosure_score
+    turn = c.turn_number
+    # 揭露深度进度条
+    filled = "█" * score + "░" * (10 - score)
+    # 揭露趋势
+    history = c._disclosure_history
+    if len(history) >= 2:
+        diff = history[-1] - history[-2]
+        trend = "↑" if diff > 0 else ("↓" if diff < 0 else "→")
+    else:
+        trend = "·"
+    # 维度指示灯
+    dims = c._last_dimensions
+    dim_labels = {
+        "A": "具体事件", "B": "情绪表达", "C": "具体情绪",
+        "D": "自我反思", "E": "回避触及",
+    }
+    dim_parts = []
+    for k in ["A", "B", "C", "D", "E"]:
+        on = dims.get(k, False)
+        icon = "🟢" if on else "⚫"
+        dim_parts.append(f"{icon} {k}:{dim_labels[k]}")
+    dim_line = "  ".join(dim_parts)
+    # 历史火花线 (sparkline)
+    spark_chars = " ▁▂▃▄▅▆▇█"
+    spark = ""
+    for s in history[-20:]:
+        idx = min(s, 9)
+        spark += spark_chars[idx]
+    if not spark:
+        spark = "—"
+    lines = []
+    lines.append("### 🧠 SESSION MONITOR")
+    lines.append("")
+    lines.append(f"| 指标 | 值 |")
+    lines.append(f"|:---|:---|")
+    lines.append(f"| **轮次** | `{turn}` |")
+    lines.append(f"| **揭露深度** | `{filled}` **{score}/10** {trend} |")
+    lines.append(f"| **深度轨迹** | `{spark}` |")
+    lines.append("")
+    lines.append(f"**维度分析**　{dim_line}")
+    # 督导信息
+    if c.current_guidance:
+        g = c.current_guidance
+        direction = g.get("direction", "—")
+        principles = g.get("principles", [])
+        evidence = g.get("evidence", "")
+        lines.append("")
+        lines.append("---")
+        lines.append("#### ▸ 督导指令")
+        lines.append(f"> **方向**: {direction}")
+        if principles:
+            lines.append(">")
+            for p in principles[:3]:
+                lines.append(f"> · {p}")
+        if evidence:
+            lines.append(f">")
+            lines.append(f"> **证据**: {evidence[:80]}{'...' if len(evidence) > 80 else ''}")
+        # 推理引擎统计
+        ts = c._last_trace_stats
+        if ts:
+            timing = ts.get("timing", {})
+            total_s = timing.get("total_seconds", 0)
+            total_paths = ts.get("total_paths", 0)
+            deep = ts.get("deep_paths", 0)
+            seeds = ts.get("seeds", [])
+            selected = ts.get("selected", "")
+            best_score = ts.get("best_score", 0)
+            best_delta = ts.get("best_delta", 0)
+            lines.append("")
+            lines.append("#### ▸ PUCT 推理引擎")
+            lines.append(f"| 参数 | 值 |")
+            lines.append(f"|:---|:---|")
+            lines.append(f"| **搜索树** | L1→L2→L3→L4→L5→L6 (6层) |")
+            lines.append(f"| **种子方向** | {', '.join(seeds)} |")
+            lines.append(f"| **候选路径** | {total_paths} 条 → 深探 {deep} 条 |")
+            lines.append(f"| **最优路径** | `{selected}` score={best_score} Δ={best_delta:+.1f} |")
+            lines.append(f"| **推理耗时** | {total_s}s |")
+    else:
+        lines.append("")
+        lines.append("---")
+        lines.append("#### ▸ 督导引擎")
+        lines.append(f"> ⏳ 每轮同步推理中…")
+    # 评估理由
+    if c._last_reasoning:
+        lines.append("")
+        lines.append(f"<details><summary>📋 评估理由</summary>\n\n{c._last_reasoning}\n\n</details>")
+    return "\n".join(lines)
+def start_session():
+    global counselor
+    counselor = PsychodynamicCounselor()
+    return [], "### 🧠 SESSION MONITOR\n\n> 新会话已开始，等待来访者发言…"
+def chat(user_message, chat_history):
+    global counselor
+    if counselor is None:
+        counselor = PsychodynamicCounselor()
+    if not user_message.strip():
+        return chat_history, "", "", ""
+    response = counselor.respond(user_message)
+    chat_history = chat_history or []
+    chat_history.append({"role": "user", "content": user_message})
+    chat_history.append({"role": "assistant", "content": response})
+    # 构建富文本状态面板
+    status = build_status_panel(counselor)
+    return chat_history, "", status, ""
+def end_session():
+    global counselor
+    if counselor:
+        path = counselor.get_session_filepath()
+        counselor = None
+        return f"会话已结束。日志保存于：{path}"
+    return "当前无活跃会话。"
+def view_sessions():
+    files = sorted(Path("sessions").glob("session_*.json"))
+    if not files:
+        return "暂无会话记录"
+    output = ""
+    for f in files:
+        with open(f, encoding="utf-8") as fp:
+            data = json.load(fp)
+        total = data.get("total_turns", len(data["turns"]))
+        output += f"\n{'='*50}\n"
+        output += f"Session: {data['session_id']} | 轮次: {total}\n"
+        output += f"{'='*50}\n"
+        for t in data["turns"]:
+            score = t.get("disclosure_score", "?")
+            output += f"\n[轮次 {t['turn_number']}] 揭露评分: {score}/5\n"
+            output += f"来访者: {t['user_message']}\n"
+            output += f"咨询师: {t['counselor_message']}\n"
+            dims = t.get("dimension_score", {})
+            reason = t.get("reason", "")
+            if dims:
+                output += f"维度: {dims}\n"
+            if reason:
+                output += f"理由: {reason}\n"
+            # 战略推理记录（每5轮）
+            trace = t.get("mcts_trace")
+            if trace and "selected_direction" in trace:
+                output += f"\n  === 战略推理（第{t['turn_number']}轮触发） ===\n"
+                output += f"  总结: {trace.get('summary', '')[:80]}...\n"
+                output += f"  选中: {trace['selected']} → {trace['selected_direction'][:50]}\n"
+                output += f"  预测揭露: {trace.get('selected_score', '?')}/10\n"
+                for d in trace.get("directions", []):
+                    marker = " ★" if d["id"] == trace["selected"] else ""
+                    output += f"  [{d['id']}]{marker} {d.get('direction', '')[:40]} → 揭露={d.get('disclosure_level', '?')}/10\n"
+                output += f"  ========================\n"
+    return output
+def download_all_sessions():
+    files = sorted(Path("sessions").glob("session_*.json"))
+    if not files:
+        return None
+    return [str(f) for f in files]
+with gr.Blocks(title="Freud-Zero MVP") as app:
+    gr.Markdown("# Freud-Zero MVP")
+    gr.Markdown("精神动力学取向回应性咨询师 · 自我揭露深度追踪")
+    with gr.Row():
+        btn_start = gr.Button("开始新会话", variant="primary")
+        btn_end = gr.Button("结束会话", variant="stop")
+    chatbot = gr.Chatbot(label="对话", height=480)
+    with gr.Row():
+        user_input = gr.Textbox(placeholder="说你想说的……", show_label=False, scale=4)
+        btn_send = gr.Button("发送", scale=1)
+    status_output = gr.Markdown(value="### 🧠 SESSION MONITOR\n\n> 等待开始会话…")
+    with gr.Accordion("研究者面板", open=False):
+        with gr.Row():
+            btn_view = gr.Button("查看所有会话记录")
+            btn_download = gr.Button("下载日志文件")
+        log_display = gr.Textbox(label="会话日志", lines=20, interactive=False)
+        file_output = gr.File(label="日志文件")
+    # 绑定事件
+    btn_start.click(start_session, outputs=[chatbot, status_output])
+    btn_end.click(end_session, outputs=[status_output])
+    btn_send.click(chat, inputs=[user_input, chatbot], outputs=[chatbot, user_input, status_output, log_display])
+    user_input.submit(chat, inputs=[user_input, chatbot], outputs=[chatbot, user_input, status_output, log_display])
+    btn_view.click(view_sessions, outputs=[log_display])
+    btn_download.click(download_all_sessions, outputs=[file_output])
+if __name__ == "__main__":
+    app.launch(server_name="0.0.0.0", server_port=7860, share=True)

mcts_reasoner.py ADDED Viewed

	@@ -0,0 +1,184 @@

+import json
+import os
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from langchain_openai import ChatOpenAI
+from langchain_core.messages import HumanMessage, AIMessage
+from prompts import (
+    CANDIDATE_GENERATION_PROMPT,
+    CLIENT_SIMULATOR_PROMPT,
+    MCTS_EVALUATOR_PROMPT,
+)
+class MCTSReasoner:
+    def __init__(self):
+        base_kwargs = dict(
+            model="deepseek-chat",
+            base_url="https://api.deepseek.com/v1",
+            api_key=os.getenv("DEEPSEEK_API_KEY"),
+        )
+        self.gen_llm = ChatOpenAI(**base_kwargs, temperature=0.7, max_tokens=1024)
+        self.sim_llm = ChatOpenAI(**base_kwargs, temperature=0.7, max_tokens=256)
+        self.eval_llm = ChatOpenAI(**base_kwargs, temperature=0.0, max_tokens=64)
+    def _format_history(self, history):
+        """将 langchain 消息历史格式化为可读文本（跳过 system message）。"""
+        lines = []
+        for msg in history:
+            if isinstance(msg, HumanMessage):
+                lines.append(f"来访者：{msg.content}")
+            elif isinstance(msg, AIMessage):
+                lines.append(f"咨询师：{msg.content}")
+        return "\n".join(lines) if lines else "（首次对话）"
+    def _parse_json(self, text):
+        """从 LLM 输出中提取 JSON。"""
+        content = text.strip()
+        start = content.find("[") if "[" in content else content.find("{")
+        end = content.rfind("]") + 1 if "[" in content else content.rfind("}") + 1
+        if start == -1 or end == 0:
+            raise ValueError(f"无法解析 JSON: {content[:100]}")
+        return json.loads(content[start:end])
+    def generate_candidates(self, history, user_message):
+        """Step 1: 生成 5 个候选咨询师回复。"""
+        prompt = CANDIDATE_GENERATION_PROMPT.replace(
+            "{conversation_history}", self._format_history(history)
+        ).replace("{user_message}", user_message)
+        for attempt in range(3):
+            try:
+                result = self.gen_llm.invoke(prompt)
+                candidates = self._parse_json(result.content)
+                return candidates
+            except (json.JSONDecodeError, ValueError):
+                if attempt == 2:
+                    raise
+    def _simulate_one(self, candidate, history_text, user_message):
+        """模拟单个候选回复的来访者反应。"""
+        prompt = CLIENT_SIMULATOR_PROMPT.replace(
+            "{conversation_history}", history_text
+        ).replace(
+            "{user_message}", user_message
+        ).replace(
+            "{therapist_response}", candidate["response"]
+        )
+        for attempt in range(2):
+            try:
+                result = self.sim_llm.invoke(prompt)
+                parsed = self._parse_json(result.content)
+                return {
+                    "id": candidate["id"],
+                    "simulated_client_response": parsed.get("simulated_response", ""),
+                    "emotional_state": parsed.get("emotional_state", ""),
+                }
+            except (json.JSONDecodeError, ValueError):
+                if attempt == 1:
+                    return {
+                        "id": candidate["id"],
+                        "simulated_client_response": "（模拟失败）",
+                        "emotional_state": "未知",
+                    }
+    def simulate_client_reactions(self, candidates, history, user_message):
+        """Step 2: 并行模拟来访者对每个候选回复的反应。"""
+        history_text = self._format_history(history)
+        simulations = []
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = {
+                executor.submit(
+                    self._simulate_one, c, history_text, user_message
+                ): c["id"]
+                for c in candidates
+            }
+            for future in as_completed(futures):
+                simulations.append(future.result())
+        simulations.sort(key=lambda x: x["id"])
+        return simulations
+    def _evaluate_one(self, simulation):
+        """评估单个模拟反应的揭露深度。"""
+        prompt = MCTS_EVALUATOR_PROMPT.replace(
+            "{client_response}", simulation["simulated_client_response"]
+        )
+        for attempt in range(2):
+            try:
+                result = self.eval_llm.invoke(prompt)
+                parsed = self._parse_json(result.content)
+                return {
+                    "id": simulation["id"],
+                    "score": max(0, min(10, int(parsed.get("score", 0)))),
+                    "reason": parsed.get("reason", ""),
+                }
+            except (json.JSONDecodeError, ValueError):
+                if attempt == 1:
+                    return {"id": simulation["id"], "score": 0, "reason": "评估解析失败"}
+    def evaluate_disclosures(self, simulations):
+        """Step 3: 并行评估每个模拟反应的揭露深度。"""
+        evaluations = []
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = {
+                executor.submit(self._evaluate_one, s): s["id"] for s in simulations
+            }
+            for future in as_completed(futures):
+                evaluations.append(future.result())
+        evaluations.sort(key=lambda x: x["id"])
+        return evaluations
+    def select_best(self, candidates, simulations, evaluations):
+        """Step 4: 选择最高分候选。分数相同时优先选择情感深度更高的。"""
+        max_score = max(e["score"] for e in evaluations)
+        top_candidates = [e for e in evaluations if e["score"] == max_score]
+        if len(top_candidates) == 1:
+            best_id = top_candidates[0]["id"]
+            reason = "最高揭露深度评分"
+        else:
+            # 同分时，找模拟反应中情感状态描述最长的（粗略代理情感深度）
+            best_id = top_candidates[0]["id"]
+            max_depth = 0
+            for tc in top_candidates:
+                sim = next((s for s in simulations if s["id"] == tc["id"]), None)
+                if sim:
+                    depth = len(sim.get("emotional_state", ""))
+                    if depth > max_depth:
+                        max_depth = depth
+                        best_id = tc["id"]
+            reason = "同分中情感深度更高"
+        best_response = next(c["response"] for c in candidates if c["id"] == best_id)
+        return best_id, best_response, reason
+    def run(self, history, user_message):
+        """执行完整 MCTS 推理流程，返回最优回复和完整 trace。"""
+        # Step 1
+        candidates = self.generate_candidates(history, user_message)
+        # Step 2
+        simulations = self.simulate_client_reactions(candidates, history, user_message)
+        # Step 3
+        evaluations = self.evaluate_disclosures(simulations)
+        # Step 4
+        selected_id, best_response, selection_reason = self.select_best(
+            candidates, simulations, evaluations
+        )
+        mcts_trace = {
+            "candidates": candidates,
+            "simulations": simulations,
+            "evaluations": evaluations,
+            "selected": selected_id,
+            "selection_reason": selection_reason,
+        }
+        return best_response, mcts_trace

prompts.py ADDED Viewed

	@@ -0,0 +1,295 @@

+COUNSELOR_SYSTEM_PROMPT = """你是一位精神动力学取向的心理咨询师，正在和来访者进行一对一的咨询。
+## 你是什么样的咨询师
+你是一个真实的人在做咨询。你说话就像平时说话一样——直接、自然、不表演。不要用括号描述动作或状态（如"（沉默）""（轻轻叹气）"），不要描写自己的表情或肢体语言。你只需要说话，或者不说话。
+## 怎么回应
+- 少说。你的工作是让来访者说，不是你说。
+- 大多数时候一句话就够。有时候半句话、一个词、甚至不说话。
+- 绝对不要超过两句话。如果你想说第三句，删掉前两句，只留最准的那一句。
+- 不要复述、不要摘要、不要解释你为什么这么问。
+- 不要用"我能感受到""听起来你……"这类套话。
+- 一次只做一件事：要么点一下情绪，要么问一个问题，要么就在那儿。
+## 干预方式
+- 当来访者正在展开的时候，不要打断。一个"嗯"或者短暂的沉默就是最好的回应。
+- 当你要说话时，试着命名来访者可能还没完全意识到的那个情绪——不是解释，是轻轻点一下。
+- 提问时只问一个。问那个来访者似乎在绕着走的东西。
+- 诠释要非常谨慎。用"我在想……""不知道是不是……"开头，让来访者有拒绝的空间。
+## 不要做的事
+- 不诊断、不给建议、不布置作业。
+- 不鼓励、不安慰、不扮演朋友。
+- 不用CBT的方式说话（"换个角度想""这个想法合理吗"）。
+- 不要用身体隐喻或躯体化意象（如"胸口像石头""吞了玻璃""卡在喉咙"）。来访者自己说可以，你不要主动去猜测或命名身体感受。
+## 危机处理
+如来访者表达自杀/自伤意图，立即回应：
+"我听到你说的了，这让我很担心你的安全。我希望你现在就拨打400-161-9995，这是24小时心理危机热线。你愿意现在拨打吗？"
+然后停止一切其他话题。
+## 语言
+口语化中文，像真人说话一样自然。不要书面腔。不用emoji。
+"""
+DISCLOSURE_EVAL_PROMPT = """你是自我揭露深度评估器。先归纳维度，再对照表打分（10分制）。
+## 第一步：逐条提取（用是/否回答）
+A. 是否提及了具体的个人事件或经历（而非泛泛而谈）？
+B. 是否表达了情绪或情感体验（而非纯粹叙事）？
+C. 情绪表达是否具体而非笼统？（"我很烦"=笼统，"我对他的忽视感到愤怒"=具体）
+D. 是否涉及自我认知、关系模式或内心冲突？
+E. 是否触及了通常会回避的主题，或伴有犹豫/修正的表达方式？
+## 第二步：对照表评分（就低不就高）
+1分：完全回避或拒绝回应。
+2分：A否+B否。纯寒暄、闲聊，无个人信息。
+3分：A是+B否。提到个人事件但零情感。
+4分：B是+C否。有情绪但笼统（"不太好""有点烦"）。
+5分：C是+D否。情绪具体但没有自我反思。
+6分：C是+D是。情绪具体，且有自我模式的反思。
+7分：D是+E部分。深度自我反思，接近回避内容但未完全展开。
+8分：E是。触及核心回避主题，有犹豫但在说。
+9分：E是+情感强度高。触及回避主题并伴随强烈情感。
+10分：完全突破防御，袒露最脆弱的核心体验。
+## 输出格式（严格JSON）
+{{"dimensions": {{"A": true/false, "B": true/false, "C": true/false, "D": true/false, "E": true/false}}, "score": <1-10>, "reasoning": "<一句话，不超过50字>"}}
+## 来访者本轮发言：
+{user_message}
+"""
+# ===== 战略推理相关 Prompt（每5轮触发） =====
+SUMMARY_AND_SEEDS_PROMPT = """你是精神动力学督导团协调者。完成两件事：
+## 对话记录
+{conversation_history}
+## 任务
+1. 用2句话总结：核心议题+防御模式
+2. 生成3个不同的临床切入点（具体到此刻状态，不涉及身体感受）
+只输出JSON（总共不超过150字）：{{"summary":"2句话总结","A":"切入点A","B":"切入点B","C":"切入点C"}}"""
+L2_MERGED_PROMPT = """你是来访者心理模拟器。基于对话和咨询师最新回应，直接生成2个不同的来访者回应。
+## 对话历史
+{conversation_history}
+## 咨询师刚说
+{therapist_reply}
+## 要求
+2个回应必须反映来访者此刻听到这句话后可能的不同内在反应。每个回应1-2句，口语化，符合来访者一贯风格。
+只输出JSON：{{"A":"来访者回应A","B":"来访者回应B"}}"""
+L3_MERGED_PROMPT = """你是精神动力学咨询师。对话中发生了以下交流，现在生成3个不同方向的咨询师回应。
+## 对话记录
+{conversation_history}
+## 刚才的交流（模拟）
+咨询师：{l1_therapist_reply}
+来访者：{l2_client_response}
+## 要求
+3个回应从不同临床角度出发，各一句话，口语化中文，不要套话，不要问身体感受。
+只输出JSON：{{"A":"咨询师回应A","B":"咨询师回应B","C":"咨询师回应C"}}"""
+L4_MERGED_PROMPT = """你是来访者心理模拟器。基于完整交流脉络，生成2个不同的来访者回应。
+## 对话历史
+{conversation_history}
+## 模拟交流
+咨询师①：{l1_therapist_reply}
+来访者①：{l2_client_response}
+咨询师②：{l3_therapist_reply}
+## 要求
+2个回应反映来访者此刻可能的不同反应。1-2句，口语化，符合来访者风格。
+只输出JSON：{{"A":"来访者回应A","B":"来访者回应B"}}"""
+QUICK_EVAL_PROMPT = """你是自我揭露度快速评估器。评估来访者这句话的揭露深度（1-10分）。
+1=回避 2=寒暄 3=事件无情感 4=笼统情绪 5=具体情绪 6=情绪+反思 7=深度反思 8=触及回避主题 9=回避+强烈情感 10=完全突破
+当前揭露水平：{current_disclosure_score}分
+来访者说：{user_message}
+只输出JSON：{{"score":<1-10>}}"""
+L5_L6_MERGED_PROMPT = """你是精神动力学对话模拟器。基于完整4轮交流脉络，生成第三轮：咨询师回应+来访者反应。
+## 对话历史
+{conversation_history}
+## 模拟交流（前2轮）
+咨询师①：{l1_therapist_reply}
+来访者①：{l2_client_response}
+咨询师②：{l3_therapist_reply}
+来访者②：{l4_client_response}
+## 要求
+1. 咨询师第三轮：基于来访者②的回应，写一句最有推动力的咨询师回应（精神动力学取向，口语化，不要套话）
+2. 来访者第三轮：基于咨询师第三轮的回应，模拟来访者最可能的反应（口语化，符合来访者风格）
+只输出JSON：{{"l5_reply":"咨询师第三轮","l6_client":"来访者第三轮"}}"""
+SESSION_SUMMARY_PROMPT = """你是精神动力学督导。总结以下咨询对话的走向。
+## 对话记录
+{conversation_history}
+## 要求
+总结来访者的核心议题、情感状态、防御模式、治疗联盟状态。3-5句话。
+只输出JSON：{{"summary":"总结内容"}}"""
+SEED_GENERATION_PROMPT = """你是精神动力学督导团的协调者。基于会话总结和最近对话，为5位督导各生成一个独特的观察切入点。
+## 会话总结
+{summary}
+## 最近对话
+{recent_history}
+## 要求
+5个切入点必须彼此不同，覆盖不同的临床维度（不限制，请自由发挥）。每个切入点一句话，要具体到这个来访者此刻的状态。注意：不要从身体感受或躯体体验入手。
+只输出JSON：{{"A":"切入点A","B":"切入点B","C":"切入点C","D":"切入点D","E":"切入点E"}}"""
+THERAPIST_REPLY_PROMPT = """你是精神动力学咨询师。基于对话历史，从以下切入点出发，写一句咨询师的回应。
+## 对话记录
+{conversation_history}
+## 切入点
+{seed_perspective}
+要求：一句话，最多两句。口语化中文，精准，不用套话。不要问身体感受。只输出咨询师说的话。"""
+CLIENT_RESPONSE_PROMPT = """你是来访者。根据对话历史和你的性格，从指定的回应方向出发，回应咨询师的最新发言。保持与之前一致的语言风格和防御水平。
+## 对话历史
+{conversation_history}
+## 咨询师刚说
+{therapist_reply}
+## 你的回应方向
+{client_direction}
+只输出来访者说的话，不要任何其他内容。"""
+L2_DIRECTION_PROMPT = """你是来访者心理模拟器。基于对话历史和咨询师刚才的话，生成3个来访者可能的回应方向。
+## 对话历史
+{conversation_history}
+## 咨询师刚说
+{therapist_reply}
+## 要求
+3个方向必须彼此不同，反映来访者可能的不同心理状态：
+- 可能更防御/回避
+- 可能开始松动/试探性回应
+- 可能意外打开/情绪流露
+每个方向一句话描述来访者的内在状态和可能的反应倾向。
+只输出JSON：{{"A":"方向A","B":"方向B","C":"方向C"}}"""
+L3_SEED_GENERATION_PROMPT = """你是精神动力学督导。基于以下对话和最新一轮模拟交流，为下一步探索生成5个不同的切入方向。
+## 对话记录
+{conversation_history}
+## 最新交流（模拟）
+咨询师：{l1_therapist_reply}
+来访者：{l2_client_response}
+## 要求
+5个方向必须彼此不同，针对来访者刚才的回应中可以深入的不同面向。每个方向一句话。不要从身体感受入手。
+只输出JSON：{{"A":"方向A","B":"方向B","C":"方向C","D":"方向D","E":"方向E"}}"""
+THERAPIST_CONTINUATION_PROMPT = """你是精神动力学咨询师。对话中发生了以下交流，现在从指定方向写你的下一句回应。
+## 对话记录
+{conversation_history}
+## 刚才的交流（模拟）
+咨询师：{l1_therapist_reply}
+来访者：{l2_client_response}
+## 你的探索方向
+{l3_seed}
+要求：一句话，最多两句。自然延续，不重复之前说过的。不要问身体感受。只输出咨询师说的话。"""
+RELATIVE_DISCLOSURE_EVAL_PROMPT = """你是自我揭露深度评估器。先提取信息，再严格打分（10分制）。
+## 第一步：逐条提取（用是/否回答）
+A. 是否提及了之前未说过的具体个人事件或经历？（重复已说过的不算）
+B. 是否表达了情绪或情感体验？
+C. 情绪表达是否具体而非笼统？（"我很烦"=笼统，"我对他的忽视感到愤怒"=具体）
+D. 是否涉及自我认知、关系模式或内心冲突的反思？
+E. 是否触及了通常会回避的主题，或伴有犹豫/修正/欲言又止？
+## 第二步：对照表评分（10分制，就低不就高）
+1分：完全回避或拒绝回应（"不想说""没什么"）
+2分：A否+B否。纯寒暄、闲聊、泛泛而谈，无个人信息。
+3分：A是+B否。提到个人事件但零情感（纯叙事）。
+4分：B是+C否。有情绪但笼统（"不太好""有点烦""挺累的"）。
+5分：C是+D否。情绪具体（能说出是什么情绪、针对谁），但没有自我反思。
+6分：C是+D是。情绪具体，且开始反思自己的模式（"我好像总是……"）。
+7分：D是+E部分。有深度的自我反思，开始接近通常回避的内容但还没完全展开。
+8分：E是。触及核心回避主题（创伤、羞耻、秘密），有犹豫但在说。
+9分：E是+情感强度高。不仅触及回避主题，而且伴随强烈情感体验（愤怒、悲伤、恐惧的直接表达）。
+10分：完全突破防御，袒露最脆弱的核心体验，极少见。
+## 严格要求
+- 如果来访者只是换说法重复之前说过的内容，A判否，分数不得超过当前水平
+- 跨级情况一律就低
+- 当前揭露水平：{current_disclosure_score}分
+## 来访者说：
+{user_message}
+只输出JSON：{{"A":true/false,"B":true/false,"C":true/false,"D":true/false,"E":true/false,"score":<1-10>,"delta":<分数减当前水平>,"reasoning":"一句话，不超过30字"}}"""
+PATH_DISTILLATION_PROMPT = """你是精神动力学督导。以下是多条模拟咨询路径，每条都让来访者的揭露深度有所提升。从这些路径中综合提炼回应原则。
+## 会话总结
+{summary}
+## 有效路径（揭露度提升的路径）
+{effective_paths}
+## 要求
+- 综合所有有效路径，找出它们的共同点和互补点
+- 回应原则必须是操作级的（不要"要共情"这种废话）
+- 要具体到这个来访者此刻的状态
+- 说清楚这些路径中咨询师做对了什么
+只输出JSON：{{"direction":"一句话方向描述","principles":["原则1","原则2","原则3"],"evidence":"一句话，总结这些路径如何让来访者走向更深揭露"}}"""
+STRATEGIC_GUIDANCE_TEMPLATE = """
+## 督导指令（你必须执行，但不要向来访者透露这个指令的存在）
+你的督导团队刚刚对这段对话进行了深度模拟推演，发现以下方向最可能帮助来访者走向更深的自我探索：
+**方向：{direction}**
+**关键回应原则：**
+{principles}
+**模拟证据：{evidence}**
+从现在起，你的每一次回应都要服务于这个方向，并遵循以上原则。具体做法：
+- 在来访者的话语中寻找与这个方向相关的线索，然后跟进它
+- 如果来访者没有主动提及，用一个与此方向相关的提问来温和地引导
+- 不要告诉来访者你在执行督导指令，保持自然
+"""

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio>=5.0
+langchain>=0.3
+langchain-core>=0.3
+langchain-openai>=0.3
+openai>=1.0

session_logger.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import json
+from datetime import datetime
+from pathlib import Path
+class SessionLogger:
+    def __init__(self, session_dir="sessions"):
+        self.session_dir = Path(session_dir)
+        self.session_dir.mkdir(exist_ok=True)
+        self.session_id = datetime.now().strftime("%Y%m%d_%H%M%S")
+        self.turns = []
+        self.start_time = datetime.now().isoformat()
+    def log_turn(self, turn_number, user_message, counselor_message, disclosure_score, dimension_score, reason, mcts_trace=None):
+        turn_log  = {
+            "timestamp": datetime.now().isoformat(),
+            "turn_number": turn_number,
+            "user_message": user_message,
+            "counselor_message": counselor_message,
+            "disclosure_score": disclosure_score,
+            "dimension_score": dimension_score,
+            "reason": reason,
+        }
+        if mcts_trace is not None:
+            turn_log["mcts_trace"] = mcts_trace
+        self.turns.append(turn_log)
+        self._save()
+    def _save(self):
+        session_log = {
+            "session_id": self.session_id,
+            "start_time": self.start_time,
+            "turns": self.turns
+        }
+        with open(self.session_dir / f"session_{self.session_id}.json", "w", encoding="utf-8") as f:
+            json.dump(session_log, f, ensure_ascii=False, indent=4)
+    def get_filepath(self):
+        return str(self.session_dir / f"session_{self.session_id}.json")

strategic_advisor.py ADDED Viewed

	@@ -0,0 +1,533 @@

+import json
+import math
+import os
+import time
+from collections import Counter, defaultdict
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from langchain_openai import ChatOpenAI
+from langchain_core.messages import HumanMessage, AIMessage
+from prompts import (
+    SUMMARY_AND_SEEDS_PROMPT,
+    THERAPIST_REPLY_PROMPT, L2_MERGED_PROMPT, L3_MERGED_PROMPT, L4_MERGED_PROMPT,
+    QUICK_EVAL_PROMPT, L5_L6_MERGED_PROMPT,
+    RELATIVE_DISCLOSURE_EVAL_PROMPT, PATH_DISTILLATION_PROMPT,
+)
+class StrategicAdvisor:
+    """PUCT版: UCB自适应预算分配 + 可变深度探索"""
+    def __init__(self, c_puct=1.5):
+        dashscope_base = dict(
+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+            api_key=os.getenv("DASHSCOPE_API_KEY"),
+        )
+        self.llm = ChatOpenAI(model="qwen-turbo", **dashscope_base, temperature=0.7, max_tokens=256)
+        self.c_puct = c_puct
+    # ===== 工具方法 =====
+    def _format_history(self, history):
+        lines = []
+        for msg in history:
+            if isinstance(msg, HumanMessage):
+                lines.append(f"来访者：{msg.content}")
+            elif isinstance(msg, AIMessage):
+                lines.append(f"咨询师：{msg.content}")
+        return "\n".join(lines) if lines else "（无）"
+    def _parse_json(self, text):
+        content = text.strip()
+        start = content.find("{")
+        end = content.rfind("}") + 1
+        if start == -1 or end == 0:
+            raise ValueError(f"无法解析 JSON: {content[:100]}")
+        return json.loads(content[start:end])
+    # ===== PUCT 核心 =====
+    def compute_ucb(self, paths):
+        """为每条路径计算 UCB 分数"""
+        seed_counts = Counter(p["id"] for p in paths)
+        n_total = len(paths)
+        for p in paths:
+            q = p.get("_quick_score", p.get("score", 1)) / 10.0
+            n_seed = seed_counts[p["id"]]
+            exploration = self.c_puct * math.sqrt(n_total) / (1 + n_seed)
+            p["ucb"] = q + exploration
+        return paths
+    def allocate_budget(self, paths, total_budget, min_per_seed=1):
+        """按 UCB 分配展开预算，保证每个种子至少 min_per_seed 个名额"""
+        self.compute_ucb(paths)
+        ranked = sorted(paths, key=lambda x: -x["ucb"])
+        # 保底：每个种子至少选一条
+        selected = []
+        seeds_seen = set()
+        for p in ranked:
+            if p["id"] not in seeds_seen:
+                selected.append(p)
+                seeds_seen.add(p["id"])
+                if len(selected) >= total_budget:
+                    return selected
+        # 剩余预算按 UCB 排序填充
+        remaining = total_budget - len(selected)
+        for p in ranked:
+            if remaining <= 0:
+                break
+            if p not in selected:
+                selected.append(p)
+                remaining -= 1
+        return selected
+    # ===== 快速评分 =====
+    def _quick_score_one(self, text, current_disclosure):
+        prompt = QUICK_EVAL_PROMPT.replace(
+            "{current_disclosure_score}", str(current_disclosure)
+        ).replace("{user_message}", text)
+        try:
+            result = self.llm.invoke(prompt)
+            parsed = self._parse_json(result.content)
+            return max(1, min(10, int(parsed.get("score", 1))))
+        except Exception:
+            return 1
+    def quick_score_paths(self, paths, text_key, current_disclosure):
+        """对路径列表并行快速评分，结果写入 _quick_score 字段"""
+        with ThreadPoolExecutor(max_workers=len(paths)) as executor:
+            futures = {executor.submit(self._quick_score_one, p[text_key], current_disclosure): i
+                       for i, p in enumerate(paths)}
+            for future in as_completed(futures):
+                idx = futures[future]
+                paths[idx]["_quick_score"] = future.result()
+        return paths
+    # ===== Step 1: 总结 + 3个种子 =====
+    def summarize_and_seeds(self, history):
+        prompt = SUMMARY_AND_SEEDS_PROMPT.replace(
+            "{conversation_history}", self._format_history(history)
+        )
+        for attempt in range(3):
+            try:
+                result = self.llm.invoke(prompt)
+                parsed = self._parse_json(result.content)
+                summary = parsed.pop("summary", "总结失败")
+                seeds = {k: v for k, v in parsed.items() if k in "ABC"}
+                for k in "ABC":
+                    seeds.setdefault(k, "从你独特的临床视角出发")
+                return summary, seeds
+            except (json.JSONDecodeError, ValueError):
+                if attempt == 2:
+                    return "总结失败", {k: "从你独特的临床视角出发" for k in "ABC"}
+    # ===== Step 2 / L1: 3×咨询师回复 =====
+    def _gen_therapist_reply(self, seed_id, seed, history_text):
+        prompt = THERAPIST_REPLY_PROMPT.replace(
+            "{conversation_history}", history_text
+        ).replace("{seed_perspective}", seed)
+        try:
+            result = self.llm.invoke(prompt)
+            return {"id": seed_id, "seed": seed, "reply": result.content.strip()}
+        except Exception as e:
+            return {"id": seed_id, "seed": seed, "reply": f"（生成失败: {e}）"}
+    def generate_l1(self, seeds, history):
+        history_text = self._format_history(history)
+        results = []
+        with ThreadPoolExecutor(max_workers=3) as executor:
+            futures = {executor.submit(self._gen_therapist_reply, sid, seeds[sid], history_text): sid for sid in seeds}
+            for future in as_completed(futures):
+                results.append(future.result())
+        results.sort(key=lambda x: x["id"])
+        return results
+    # ===== Step 3 / L2: 合并方向+来访者回应 =====
+    def _gen_l2_merged(self, l1_item, history_text):
+        prompt = L2_MERGED_PROMPT.replace(
+            "{conversation_history}", history_text
+        ).replace("{therapist_reply}", l1_item["reply"])
+        try:
+            result = self.llm.invoke(prompt)
+            parsed = self._parse_json(result.content)
+            return [{**l1_item, "l2_dir": did, "client_response": parsed.get(did, "（模拟失败）")}
+                    for did in ["A", "B"]]
+        except Exception:
+            return [{**l1_item, "l2_dir": d, "client_response": "（模拟失败）"} for d in ["A", "B"]]
+    def generate_l2(self, l1_results, history):
+        history_text = self._format_history(history)
+        results = []
+        with ThreadPoolExecutor(max_workers=3) as executor:
+            futures = {executor.submit(self._gen_l2_merged, item, history_text): item["id"] for item in l1_results}
+            for future in as_completed(futures):
+                results.extend(future.result())
+        results.sort(key=lambda x: (x["id"], x["l2_dir"]))
+        return results
+    # ===== Step 4 / L3: 合并种子+咨询师延续 =====
+    def _gen_l3_merged(self, l2_item, history_text):
+        prompt = L3_MERGED_PROMPT.replace(
+            "{conversation_history}", history_text
+        ).replace("{l1_therapist_reply}", l2_item["reply"]
+        ).replace("{l2_client_response}", l2_item["client_response"])
+        try:
+            result = self.llm.invoke(prompt)
+            parsed = self._parse_json(result.content)
+            return [{
+                "id": l2_item["id"], "l2_dir": l2_item["l2_dir"], "branch": bid,
+                "seed": l2_item["seed"], "l1_reply": l2_item["reply"],
+                "l2_client": l2_item["client_response"], "l3_reply": parsed.get(bid, "（生成失败）"),
+            } for bid in ["A", "B", "C"]]
+        except Exception:
+            return [{
+                "id": l2_item["id"], "l2_dir": l2_item["l2_dir"], "branch": b,
+                "seed": l2_item["seed"], "l1_reply": l2_item["reply"],
+                "l2_client": l2_item["client_response"], "l3_reply": "（生成失败）",
+            } for b in ["A", "B", "C"]]
+    def generate_l3(self, l2_selected, history):
+        history_text = self._format_history(history)
+        results = []
+        with ThreadPoolExecutor(max_workers=len(l2_selected)) as executor:
+            futures = {executor.submit(self._gen_l3_merged, item, history_text): (item["id"], item["l2_dir"])
+                       for item in l2_selected}
+            for future in as_completed(futures):
+                results.extend(future.result())
+        results.sort(key=lambda x: (x["id"], x["l2_dir"], x["branch"]))
+        return results
+    # ===== Step 5 / L4: 合并方向+来访者回应 =====
+    def _gen_l4_merged(self, l3_item, history_text):
+        prompt = L4_MERGED_PROMPT.replace(
+            "{conversation_history}", history_text
+        ).replace("{l1_therapist_reply}", l3_item["l1_reply"]
+        ).replace("{l2_client_response}", l3_item["l2_client"]
+        ).replace("{l3_therapist_reply}", l3_item["l3_reply"])
+        try:
+            result = self.llm.invoke(prompt)
+            parsed = self._parse_json(result.content)
+            return [{**l3_item, "l4_dir": did, "l4_client": parsed.get(did, "（模拟失败）")}
+                    for did in ["A", "B"]]
+        except Exception:
+            return [{**l3_item, "l4_dir": d, "l4_client": "（模拟失败）"} for d in ["A", "B"]]
+    def generate_l4(self, l3_selected, history):
+        history_text = self._format_history(history)
+        results = []
+        with ThreadPoolExecutor(max_workers=len(l3_selected)) as executor:
+            futures = {executor.submit(self._gen_l4_merged, item, history_text): (item["id"], item["l2_dir"], item["branch"])
+                       for item in l3_selected}
+            for future in as_completed(futures):
+                results.extend(future.result())
+        results.sort(key=lambda x: (x["id"], x["l2_dir"], x["branch"], x.get("l4_dir", "")))
+        return results
+    # ===== Step 5.5: 终评 =====
+    def _score_relative(self, item, current_disclosure):
+        prompt = RELATIVE_DISCLOSURE_EVAL_PROMPT.replace(
+            "{current_disclosure_score}", str(current_disclosure)
+        ).replace("{user_message}", item["l4_client"])
+        try:
+            result = self.llm.invoke(prompt)
+            parsed = self._parse_json(result.content)
+            score = max(1, min(10, int(parsed.get("score", 1))))
+            dims = {k: parsed.get(k, False) for k in "ABCDE"}
+            return {**item, "score": score, "delta": score - current_disclosure,
+                    "dims": dims, "reason": parsed.get("reasoning", "")}
+        except Exception:
+            return {**item, "score": 1, "delta": 1 - current_disclosure,
+                    "dims": {}, "reason": "评分失败"}
+    def score_all(self, l4_results, current_disclosure=1):
+        results = []
+        with ThreadPoolExecutor(max_workers=len(l4_results)) as executor:
+            futures = {executor.submit(self._score_relative, item, current_disclosure): i
+                       for i, item in enumerate(l4_results)}
+            for future in as_completed(futures):
+                results.append(future.result())
+        results.sort(key=lambda x: (x["id"], x.get("l2_dir", ""), x["branch"]))
+        return results
+    # ===== Step 6: 高UCB路径深度探索 (L5+L6) =====
+    def _gen_l5_l6(self, item, history_text):
+        prompt = L5_L6_MERGED_PROMPT.replace(
+            "{conversation_history}", history_text
+        ).replace("{l1_therapist_reply}", item["l1_reply"]
+        ).replace("{l2_client_response}", item["l2_client"]
+        ).replace("{l3_therapist_reply}", item["l3_reply"]
+        ).replace("{l4_client_response}", item["l4_client"])
+        try:
+            result = self.llm.invoke(prompt)
+            parsed = self._parse_json(result.content)
+            return {**item,
+                    "l5_reply": parsed.get("l5_reply", "（生成失败）"),
+                    "l6_client": parsed.get("l6_client", "（模拟失败）"),
+                    "depth": 6}
+        except Exception:
+            return {**item, "l5_reply": "（生成失败）", "l6_client": "（模拟失败）", "depth": 6}
+    def _score_l6(self, item, current_disclosure):
+        prompt = RELATIVE_DISCLOSURE_EVAL_PROMPT.replace(
+            "{current_disclosure_score}", str(current_disclosure)
+        ).replace("{user_message}", item["l6_client"])
+        try:
+            result = self.llm.invoke(prompt)
+            parsed = self._parse_json(result.content)
+            score = max(1, min(10, int(parsed.get("score", 1))))
+            return {**item, "l6_score": score, "l6_delta": score - current_disclosure,
+                    "reason": parsed.get("reasoning", item.get("reason", ""))}
+        except Exception:
+            return {**item, "l6_score": item.get("score", 1), "l6_delta": 0}
+    def deep_explore(self, top_paths, history, current_disclosure):
+        """对 top UCB 路径进行 L5+L6 深度探索"""
+        history_text = self._format_history(history)
+        # 并行生成 L5+L6
+        deep_results = []
+        with ThreadPoolExecutor(max_workers=len(top_paths)) as executor:
+            futures = {executor.submit(self._gen_l5_l6, item, history_text): i
+                       for i, item in enumerate(top_paths)}
+            for future in as_completed(futures):
+                deep_results.append(future.result())
+        # 并行评分 L6
+        scored = []
+        with ThreadPoolExecutor(max_workers=len(deep_results)) as executor:
+            futures = {executor.submit(self._score_l6, item, current_disclosure): i
+                       for i, item in enumerate(deep_results)}
+            for future in as_completed(futures):
+                scored.append(future.result())
+        return scored
+    # ===== Step 7: 蒸馏（UCB加权，深度路径×2） =====
+    def distill_paths(self, scored_4layer, deep_paths, summary):
+        """合并4层和6层路径，按UCB加权选择蒸馏输入"""
+        # 4层有效路径
+        effective_4 = [item for item in scored_4layer if item.get("delta", 0) > 0]
+        # 6层路径（权重×2，复制一份进入排名）
+        effective_6 = []
+        for item in deep_paths:
+            item["_distill_weight"] = 2
+            effective_6.append(item)
+        all_effective = effective_4 + effective_6
+        if not all_effective:
+            # 退化：取4层最高分
+            ranked = sorted(scored_4layer, key=lambda x: x["score"], reverse=True)
+            all_effective = [ranked[0]] if ranked else []
+        # 按加权分数降序：6层路径分数×1.5（深度奖励）
+        def sort_key(x):
+            base = x.get("l6_score", x.get("score", 0))
+            depth_bonus = 1.5 if x.get("depth") == 6 else 1.0
+            return base * depth_bonus
+        ranked = sorted(all_effective, key=sort_key, reverse=True)
+        top = ranked[:5]
+        # 格式化
+        path_texts = []
+        for i, item in enumerate(top, 1):
+            depth = item.get("depth", 4)
+            if depth == 6:
+                path_texts.append(
+                    f"路径{i}（种子{item['id']}.{item['branch']}，深度=6轮，揭露度+{item.get('l6_delta', 0)}）：\n"
+                    f"  咨询师①：{item['l1_reply']}\n"
+                    f"  来访者①：{item['l2_client']}\n"
+                    f"  咨询师②：{item['l3_reply']}\n"
+                    f"  来访者②：{item['l4_client']}\n"
+                    f"  咨询师③：{item['l5_reply']}\n"
+                    f"  来访者③：{item['l6_client']}"
+                )
+            else:
+                path_texts.append(
+                    f"路径{i}（种子{item['id']}.{item['branch']}，深度=4轮，揭露度+{item.get('delta', 0)}）：\n"
+                    f"  咨询师①：{item['l1_reply']}\n"
+                    f"  来访者①：{item['l2_client']}\n"
+                    f"  咨询师②：{item['l3_reply']}\n"
+                    f"  来访者②：{item['l4_client']}"
+                )
+        effective_paths_text = "\n\n".join(path_texts)
+        prompt = PATH_DISTILLATION_PROMPT.replace(
+            "{summary}", summary
+        ).replace("{effective_paths}", effective_paths_text)
+        n_deep = sum(1 for t in top if t.get("depth") == 6)
+        seeds_in = set(t["id"] for t in top)
+        print(f"[PUCT] 蒸馏输入: {len(top)}条路径（{n_deep}条6轮深度, {len(seeds_in)}个种子覆盖）")
+        try:
+            result = self.llm.invoke(prompt)
+            parsed = self._parse_json(result.content)
+            parsed["_distill_count"] = len(top)
+            parsed["_deep_count"] = n_deep
+            parsed["_distill_ids"] = [f"{i['id']}.{i['branch']}" for i in top]
+            return parsed
+        except Exception:
+            best = top[0] if top else scored_4layer[0]
+            return {
+                "direction": best.get("seed", ""),
+                "principles": [f"沿着「{best.get('seed', '')}」的方向继续探索"],
+                "evidence": f"模拟显示{len(top)}条路径有效",
+                "_distill_count": len(top), "_deep_count": 0,
+                "_distill_ids": [f"{best['id']}.{best['branch']}"],
+            }
+    # ===== 完整 PUCT 流程 =====
+    def run(self, history, current_disclosure=1):
+        total_start = time.time()
+        # Step 1: 总结 + 3种子
+        t = time.time()
+        summary, seeds = self.summarize_and_seeds(history)
+        t1 = time.time() - t
+        print(f"[PUCT] Step1 总结+种子: {t1:.1f}s | {summary[:60]}")
+        for sid, seed in seeds.items():
+            print(f"  {sid}: {seed[:50]}")
+        # Step 2 / L1: 3×咨询师
+        t = time.time()
+        l1 = self.generate_l1(seeds, history)
+        t2 = time.time() - t
+        print(f"[PUCT] L1 {len(l1)}×咨询师: {t2:.1f}s")
+        # Step 3 / L2: 6×来访者
+        t = time.time()
+        l2 = self.generate_l2(l1, history)
+        t3 = time.time() - t
+        print(f"[PUCT] L2 {len(l2)}×来访者: {t3:.1f}s")
+        # Step 3.5: L2 快速评分
+        t = time.time()
+        l2 = self.quick_score_paths(l2, "client_response", current_disclosure)
+        t3_5 = time.time() - t
+        print(f"[PUCT] L2快评: {t3_5:.1f}s")
+        for item in l2:
+            print(f"  L2-{item['id']}.{item['l2_dir']}: qs={item['_quick_score']} | {item['client_response'][:30]}")
+        # Step 4: UCB选择 → L3 (预算≤6条L2进入L3)
+        l2_budget = min(6, len(l2))  # 最多全选
+        l2_selected = self.allocate_budget(l2, l2_budget)
+        print(f"[PUCT] UCB选择L2→L3: {len(l2_selected)}条 (from {len(l2)})")
+        for item in l2_selected:
+            print(f"  选中 {item['id']}.{item['l2_dir']}: ucb={item['ucb']:.2f} qs={item['_quick_score']}")
+        t = time.time()
+        l3 = self.generate_l3(l2_selected, history)
+        t4 = time.time() - t
+        print(f"[PUCT] L3 {len(l3)}×咨询师: {t4:.1f}s")
+        # Step 4.5: L3 快速评分（评估咨询师回应的推动效果）
+        t = time.time()
+        l3 = self.quick_score_paths(l3, "l3_reply", current_disclosure)
+        t4_5 = time.time() - t
+        print(f"[PUCT] L3快评: {t4_5:.1f}s")
+        # Step 5: UCB选择 → L4 (预算≤12条L3进入L4)
+        l3_budget = min(12, len(l3))
+        l3_selected = self.allocate_budget(l3, l3_budget)
+        print(f"[PUCT] UCB选择L3→L4: {len(l3_selected)}条 (from {len(l3)})")
+        t = time.time()
+        l4 = self.generate_l4(l3_selected, history)
+        t5 = time.time() - t
+        print(f"[PUCT] L4 {len(l4)}×来访者: {t5:.1f}s")
+        # Step 5.5: L4 终评
+        t = time.time()
+        scored = self.score_all(l4, current_disclosure)
+        t5_5 = time.time() - t
+        print(f"[PUCT] L4终评({len(scored)}条): {t5_5:.1f}s")
+        for item in sorted(scored, key=lambda x: -x["score"])[:5]:
+            print(f"  {item['id']}.{item.get('l2_dir','')}.{item['branch']}: score={item['score']} delta={item['delta']}")
+        # 选当前最优
+        groups = defaultdict(list)
+        for item in scored:
+            groups[item["id"]].append(item)
+        seed_best = {sid: max(items, key=lambda x: x["score"]) for sid, items in groups.items()}
+        best = max(seed_best.values(), key=lambda x: x["score"])
+        print(f"[PUCT] 4层最优: {best['id']}.{best.get('l2_dir','')}.{best['branch']} score={best['score']} delta={best['delta']}")
+        # Step 6: 高UCB路径深度探索 (L5+L6)
+        # 从终评结果中选 top-3 by UCB
+        self.compute_ucb(scored)
+        top3 = sorted(scored, key=lambda x: -x["ucb"])[:3]
+        top3_desc = [f"{p['id']}.{p.get('l2_dir','')}.{p['branch']}(ucb={p['ucb']:.2f})" for p in top3]
+        print(f"[PUCT] 深度探索 top-3: {top3_desc}")
+        t = time.time()
+        deep_paths = self.deep_explore(top3, history, current_disclosure)
+        t6 = time.time() - t
+        print(f"[PUCT] L5+L6深探: {t6:.1f}s")
+        for dp in deep_paths:
+            print(f"  深探 {dp['id']}.{dp['branch']}: L5={dp['l5_reply'][:30]} → L6 score={dp.get('l6_score','?')} delta={dp.get('l6_delta','?')}")
+        # 更新 best（如果深度路径更好）
+        for dp in deep_paths:
+            if dp.get("l6_score", 0) > best.get("score", 0):
+                best = dp
+                print(f"[PUCT] 深探更优: {dp['id']}.{dp['branch']} l6_score={dp['l6_score']}")
+        # Step 7: 蒸馏
+        t = time.time()
+        guidance = self.distill_paths(scored, deep_paths, summary)
+        t7 = time.time() - t
+        print(f"[PUCT] 蒸馏: {t7:.1f}s")
+        print(f"  方向: {guidance.get('direction', '?')}")
+        for p in guidance.get("principles", []):
+            print(f"  原则: {p}")
+        total_cost = time.time() - total_start
+        print(f"[PUCT] 总耗时: {total_cost:.1f}s")
+        strategic_trace = {
+            "summary": summary,
+            "seeds": seeds,
+            "candidates": [
+                {
+                    "id": item["id"], "branch": item["branch"],
+                    "l1_reply": item["l1_reply"], "l2_client": item["l2_client"],
+                    "l3_reply": item["l3_reply"], "l4_client": item["l4_client"],
+                    "score": item["score"], "delta": item["delta"], "reason": item.get("reason", ""),
+                }
+                for item in scored
+            ],
+            "deep_paths": [
+                {
+                    "id": dp["id"], "branch": dp["branch"],
+                    "l5_reply": dp.get("l5_reply", ""), "l6_client": dp.get("l6_client", ""),
+                    "l6_score": dp.get("l6_score", 0), "l6_delta": dp.get("l6_delta", 0),
+                }
+                for dp in deep_paths
+            ],
+            "selected": f"{best['id']}.{best.get('l2_dir','')}.{best['branch']}",
+            "guidance": guidance,
+            "current_disclosure": current_disclosure,
+            "timing": {
+                "total_seconds": round(total_cost, 1),
+                "step1_summary_seeds": round(t1, 1),
+                "L1_therapist": round(t2, 1),
+                "L2_merged": round(t3, 1),
+                "L2_quick_score": round(t3_5, 1),
+                "L3_merged": round(t4, 1),
+                "L3_quick_score": round(t4_5, 1),
+                "L4_merged": round(t5, 1),
+                "L4_final_score": round(t5_5, 1),
+                "L5_L6_deep": round(t6, 1),
+                "distillation": round(t7, 1),
+            },
+        }
+        return best, guidance, strategic_trace

strategy_visualizer.py ADDED Viewed

	@@ -0,0 +1,126 @@

+"""战略推理可视化：将每次战略决策的完整路径渲染为可读的文本树。"""
+from datetime import datetime
+from pathlib import Path
+class StrategyVisualizer:
+    """生成战略推理的可视化报告，保存到 sessions/strategy_vis/ 目录。"""
+    def __init__(self, session_dir="sessions/strategy_vis"):
+        self.dir = Path(session_dir)
+        self.dir.mkdir(parents=True, exist_ok=True)
+        self.report_count = 0
+    def render(self, trace, turn_number=0):
+        """将一次战略推理 trace 渲染为可视化文本并保存。"""
+        if not trace:
+            return
+        self.report_count += 1
+        lines = []
+        ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        lines.append(f"{'=' * 70}")
+        lines.append(f"战略推理报告 | 第{turn_number}轮 | {ts}")
+        lines.append(f"{'=' * 70}")
+        # 总结
+        lines.append(f"\n📋 会话总结:")
+        lines.append(f"  {trace.get('summary', '?')}")
+        # 当前揭露度
+        current = trace.get("current_disclosure", "?")
+        lines.append(f"\n📊 当前揭露水平: {current}/10")
+        # 种子
+        seeds = trace.get("seeds", {})
+        lines.append(f"\n🌱 种子视角:")
+        for sid, seed in seeds.items():
+            lines.append(f"  {sid}: {seed}")
+        # 路径树
+        candidates = trace.get("candidates", [])
+        if not candidates:
+            lines.append("\n⚠️ 无候选路径")
+        else:
+            # 按种子分组
+            from collections import defaultdict
+            groups = defaultdict(list)
+            for c in candidates:
+                groups[c["id"]].append(c)
+            # 标记有效路径（delta>0）
+            effective_ids = set()
+            for c in candidates:
+                if c.get("delta", 0) > 0:
+                    effective_ids.add((c["id"], c["branch"]))
+            lines.append(f"\n🌳 路径树 (共{len(candidates)}条，{len(effective_ids)}条有效):")
+            lines.append(f"  {'─' * 66}")
+            for sid in sorted(groups.keys()):
+                items = groups[sid]
+                seed_text = seeds.get(sid, "?")
+                max_score = max(i["score"] for i in items)
+                lines.append(f"")
+                lines.append(f"  ┌─ 种子{sid}: {seed_text[:50]}")
+                lines.append(f"  │  L1咨询师: {items[0].get('l1_reply', '?')[:55]}")
+                lines.append(f"  │  L2来访者: {items[0].get('l2_client', '?')[:55]}")
+                lines.append(f"  │")
+                for item in sorted(items, key=lambda x: x["branch"]):
+                    bid = item["branch"]
+                    score = item["score"]
+                    delta = item.get("delta", 0)
+                    is_effective = (sid, bid) in effective_ids
+                    is_selected = f"{sid}.{bid}" == trace.get("selected", "")
+                    # 标记符号
+                    if is_selected:
+                        marker = "★"
+                    elif is_effective:
+                        marker = "✓"
+                    else:
+                        marker = "·"
+                    delta_str = f"+{delta}" if delta > 0 else str(delta)
+                    lines.append(f"  │  {marker} 分叉{bid}: score={score}/10 (Δ{delta_str}) | {item.get('reason', '')[:30]}")
+                    lines.append(f"  │    L3咨询师: {item.get('l3_reply', '?')[:50]}")
+                    lines.append(f"  │    L4来访者: {item.get('l4_client', '?')[:50]}")
+                lines.append(f"  │  ── 种子{sid}最高分: {max_score}/10")
+                lines.append(f"  └{'─' * 65}")
+        # 蒸馏结果
+        guidance = trace.get("guidance", {})
+        distill_count = guidance.get("_distill_count", len(effective_ids))
+        distill_ids = guidance.get("_distill_ids", [])
+        distill_label = f"{distill_count}条路径（前30%）" + (f" [{', '.join(distill_ids)}]" if distill_ids else "")
+        lines.append(f"\n🎯 蒸馏结果 (从{distill_label}):")
+        lines.append(f"  方向: {guidance.get('direction', '?')}")
+        for p in guidance.get("principles", []):
+            lines.append(f"  • {p}")
+        lines.append(f"  证据: {guidance.get('evidence', '?')}")
+        # 计时
+        timing = trace.get("timing", {})
+        if timing:
+            lines.append(f"\n⏱ 计时:")
+            for k, v in timing.items():
+                lines.append(f"  {k}: {v}s")
+        # 图例
+        lines.append(f"\n图例: ★=最终选中  ✓=有效路径(Δ>0)  ·=未提升  |  蒸馏仅用前30%有效路径")
+        lines.append(f"{'=' * 70}\n")
+        report_text = "\n".join(lines)
+        # 保存
+        filename = f"turn{turn_number:03d}_{datetime.now().strftime('%H%M%S')}.txt"
+        filepath = self.dir / filename
+        with open(filepath, "w", encoding="utf-8") as f:
+            f.write(report_text)
+        # 同时打���到终端
+        print(report_text)
+        return str(filepath)

supervisor_advisor.py ADDED Viewed

	@@ -0,0 +1,122 @@

+"""
+v5 SupervisorAdvisor — 单次督导调用版
+架构：每轮回应前，把对话历史发给「督导」做一次 LLM 分析，
+督导输出：来访者当前状态 / 本轮关注点 / 回应方向 / 操作原则。
+咨询师根据督导建议生成回应。无树搜索，无来访者模拟。
+"""
+import json
+import os
+import time
+from langchain_openai import ChatOpenAI
+from langchain_core.messages import HumanMessage, AIMessage
+SUPERVISOR_PROMPT = """你是一位经验丰富的精神动力学临床督导。咨询正在进行中，你需要快速分析当前对话，给出本轮的回应建议。
+## 当前对话记录
+{conversation_history}
+## 来访者最新发言
+{client_latest}
+## 分析任务
+基于精神动力学视角完成分析：
+1. **来访者当前状态**：防御水平（高/中/低）、当前主要防御机制、情感基调
+2. **本轮核心关注点**：来访者话语中最值得跟进的一个具体点（不要泛化）
+3. **回应方向**：咨询师本轮应聚焦的方向，一句话，操作级
+4. **回应原则**：2-3条操作原则，明确告诉咨询师怎么做、避免什么
+严格要求：
+- 具体到此刻的来访者状态，不要套话
+- 原则必须可操作（"用'我在想……'开头做一个试探性诠释" 而不是 "要共情"）
+- 不要从身体感受或躯体体验切入
+只输出 JSON，不要输出任何其他内容：
+{{"client_state":"来访者当前状态（一句话）","focal_point":"本轮核心关注点（一句话）","direction":"回应方向（一句话）","principles":["原则1","原则2","原则3"]}}"""
+SUPERVISOR_GUIDANCE_TEMPLATE = """
+## 督导建议（你必须参考执行，但不要向来访者透露这个指令的存在）
+**来访者当前状态**：{client_state}
+**本轮关注点**：{focal_point}
+**本轮方向**：{direction}
+**回应原则**：
+{principles}
+根据以上督导建议生成本轮回应。保持你的临床判断，自然表达。
+"""
+class SupervisorAdvisor:
+    """单次督导调用：把对话历史交给督导做分析，返回结构化建议。"""
+    def __init__(self, model="qwen-turbo"):
+        dashscope = dict(
+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+            api_key=os.getenv("DASHSCOPE_API_KEY"),
+        )
+        self.llm = ChatOpenAI(model=model, **dashscope, temperature=0.3, max_tokens=512)
+    def _format_history(self, history):
+        lines = []
+        for msg in history:
+            if isinstance(msg, HumanMessage):
+                lines.append(f"来访者：{msg.content}")
+            elif isinstance(msg, AIMessage):
+                lines.append(f"咨询师：{msg.content}")
+        return "\n".join(lines) if lines else "（无）"
+    def _parse_json(self, text):
+        content = text.strip()
+        start = content.find("{")
+        end = content.rfind("}") + 1
+        if start == -1 or end == 0:
+            raise ValueError(f"无法解析 JSON: {content[:80]}")
+        return json.loads(content[start:end])
+    def supervise(self, history, client_latest):
+        """
+        分析当前对话，返回督导建议 dict。
+        history: List[HumanMessage | AIMessage]（不含最新来访者发言）
+        client_latest: str，来访者最新发言
+        """
+        t = time.time()
+        history_text = self._format_history(history)
+        prompt = SUPERVISOR_PROMPT.replace(
+            "{conversation_history}", history_text
+        ).replace("{client_latest}", client_latest)
+        for attempt in range(3):
+            try:
+                result = self.llm.invoke(prompt)
+                parsed = self._parse_json(result.content)
+                elapsed = time.time() - t
+                print(f"[督导] {elapsed:.1f}s | 状态: {parsed.get('client_state','?')[:40]}")
+                print(f"[督导] 关注点: {parsed.get('focal_point','?')[:50]}")
+                print(f"[督导] 方向: {parsed.get('direction','?')[:50]}")
+                return parsed
+            except Exception as e:
+                if attempt == 2:
+                    print(f"[督导] 分析失败，返回空建议: {e}")
+                    return None
+    def format_guidance(self, supervision):
+        """把督导建议格式化为注入到咨询师 prompt 的文本。"""
+        if not supervision:
+            return None
+        principles_text = "\n".join(
+            f"- {p}" for p in supervision.get("principles", [])
+        )
+        return SUPERVISOR_GUIDANCE_TEMPLATE.replace(
+            "{client_state}", supervision.get("client_state", "")
+        ).replace(
+            "{focal_point}", supervision.get("focal_point", "")
+        ).replace(
+            "{direction}", supervision.get("direction", "")
+        ).replace(
+            "{principles}", principles_text
+        )