add photo

2026-04-23 15:21:24 +08:00
parent 1550783eef
commit 4e33063a98
44 changed files with 3567 additions and 64 deletions
--- a/app/src/main/java/com/digitalperson/UnityDigitalPersonActivity.kt
+++ b/app/src/main/java/com/digitalperson/UnityDigitalPersonActivity.kt
@@ -6,6 +6,7 @@ import android.content.Context
 import android.content.pm.PackageManager
 import android.content.res.ColorStateList
 import android.graphics.Color
+import android.graphics.BitmapFactory
 import android.os.Build
 import android.os.Bundle
 import android.os.Handler
@@ -16,6 +17,7 @@ import android.util.Log
 import android.view.MotionEvent
 import android.view.ViewGroup
 import android.widget.Button
+import android.widget.ImageView
 import android.widget.TextView
 import android.widget.Toast
 import androidx.core.app.ActivityCompat
@@ -25,7 +27,6 @@ import android.view.View
 import androidx.lifecycle.Lifecycle
 import androidx.lifecycle.LifecycleOwner
 import androidx.lifecycle.LifecycleRegistry
-import android.widget.ImageView
 import com.unity3d.player.UnityPlayer
 import com.unity3d.player.UnityPlayerActivity
 import com.digitalperson.audio.AudioProcessor
@@ -33,6 +34,7 @@ import com.digitalperson.asr.AsrManager
 import com.digitalperson.cloud.CloudApiManager
 import com.digitalperson.cloud.CloudReflectionHelper
 import com.digitalperson.config.AppConfig
+import com.digitalperson.embedding.RefImageMatcher
 import com.digitalperson.question.QuestionGenerationAgent
 import com.digitalperson.data.AppDatabase
 import com.digitalperson.face.FaceDetectionPipeline
@@ -48,8 +50,6 @@ import com.digitalperson.tts.TtsController
 import com.digitalperson.util.FileHelper
 import com.digitalperson.vad.VadManager
 import kotlinx.coroutines.*
-import com.digitalperson.embedding.RefImageMatcher
-import android.graphics.BitmapFactory

 class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
    
@@ -260,6 +260,11 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
            recordButtonGlow = chatLayout.findViewById(R.id.record_button_glow)
            refMatchImageView = chatLayout.findViewById(R.id.ref_match_image)

+            if (!AppConfig.SHOW_DEBUG_TEXT) {
+                chatHistoryText.visibility = View.GONE
+                chatLayout.findViewById<View>(R.id.scroll_view).visibility = View.GONE
+            }
+
            // 根据配置设置按钮可见性
            if (AppConfig.USE_HOLD_TO_SPEAK) {
                holdToSpeakButton.visibility = View.VISIBLE
@@ -366,6 +371,8 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
                override fun onLlmCalled(text: String) {
                    Log.d("UnityDigitalPerson", "LLM called with: $text")
                    interactionCoordinator.onUserAsrText(text)
+                    // 用用户问题提前匹配：比等 LLM 回复更早显示图片（模拟器/真机通用）
+                    maybeShowMatchedRefImage(text)
                }
            })
            setAudioProcessor(audioProcessor)
@@ -664,6 +671,7 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
    }
    
    private fun appendChat(text: String) {
+        if (!AppConfig.SHOW_DEBUG_TEXT) return
        runOnUiThread {
            chatHistoryText.append(text + "\n")
        }
@@ -696,6 +704,8 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
            override fun onSpeak(text: String) {
                ttsController.enqueueSegment(text)
                ttsController.enqueueEnd()
+                // 主动发言（问候/主动提问）也尝试匹配参考图片
+                maybeShowMatchedRefImage(text)
            }

            override fun onRequestCloudReply(prompt: String) {
@@ -759,13 +769,22 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {

    private fun maybeShowMatchedRefImage(text: String) {
        val imageView = refMatchImageView ?: return
-        // Unity Activity already has coroutines
-        CoroutineScope(SupervisorJob() + Dispatchers.IO).launch {
+        // 每次匹配前先清掉上一张图
+        runOnUiThread {
+            imageView.setImageBitmap(null)
+            imageView.visibility = View.GONE
+        }
+        ioScope.launch {
            val match = RefImageMatcher.findBestMatch(applicationContext, text)
-            if (match == null) return@launch
+            if (match == null) {
+                Log.d("RefImageMatch", "未找到匹配图片 query=\"${text.take(80)}\"")
+                return@launch
+            }
+            Log.d("RefImageMatch", "匹配成功 score=${match.score} path=${match.pngAssetPath} query=\"${text.take(80)}\"")
            val bitmap = try {
                assets.open(match.pngAssetPath).use { BitmapFactory.decodeStream(it) }
-            } catch (_: Throwable) {
+            } catch (e: Throwable) {
+                Log.w("RefImageMatch", "图片加载失败 path=${match.pngAssetPath}", e)
                null
            }
            if (bitmap == null) return@launch
--- a/app/src/main/java/com/digitalperson/asr/AsrManager.kt
+++ b/app/src/main/java/com/digitalperson/asr/AsrManager.kt
@@ -6,6 +6,7 @@ import android.util.Log
 import com.digitalperson.BuildConfig
 import com.digitalperson.audio.AudioProcessor
 import com.digitalperson.config.AppConfig
+import com.digitalperson.env.RuntimeEnv
 import com.digitalperson.engine.SenseVoiceEngineRKNN
 import com.digitalperson.util.FileHelper
 import kotlinx.coroutines.Dispatchers
@@ -23,7 +24,6 @@ class AsrManager(private val context: Context) {
    
    private var senseVoice: SenseVoiceEngineRKNN? = null
    private val nativeLock = Any()
-    
    private val asrQueue = Channel<Pair<FloatArray, FloatArray>>(capacity = Channel.UNLIMITED)
    
    private var audioProcessor: AudioProcessor? = null
@@ -48,6 +48,10 @@ class AsrManager(private val context: Context) {
    }
    
    fun initSenseVoiceModel(): Boolean {
+        if (RuntimeEnv.isEmulator()) {
+            Log.w(TAG, "ASR: emulator detected; skip local RKNN init and use cloud ASR")
+            return false
+        }
        return try {
            Log.i(TAG, "ASR: init SenseVoice RKNN (scheme A)")
            
@@ -133,23 +137,47 @@ class AsrManager(private val context: Context) {
                Log.d(TAG, "ASR started: processing audio segment")
                
                saveAsrAudio(originalSeg, processedSeg)
-                
-                val raw = synchronized(nativeLock) {
+
+                val localText = synchronized(nativeLock) {
                    val e = senseVoice
                    if (e == null || !e.isInitialized) {
-                        Log.e(TAG, "ASR failed: SenseVoice engine not initialized")
                        ""
                    } else {
                        try {
-                            e.transcribeBuffer(processedSeg)
-                        } catch (e: Throwable) {
-                            Log.e(TAG, "ASR transcribe failed: ${e.message}")
+                            removeTokens(e.transcribeBuffer(processedSeg))
+                        } catch (t: Throwable) {
+                            Log.e(TAG, "ASR transcribe failed: ${t.message}")
                            ""
                        }
                    }
+                }.trim()
+
+                val text = if (localText.isNotBlank()) {
+                    localText
+                } else {
+                    // 模拟器或本地 RKNN 未就绪：使用腾讯云「一句话识别」SDK（app/libs/asr-one-sentence-release.aar）
+                    val shouldTryTencent =
+                        BuildConfig.HAS_TENCENT_ASR_SDK && (RuntimeEnv.isEmulator() || !isInitialized())
+                    if (!shouldTryTencent) {
+                        Log.e(
+                            TAG,
+                            "ASR failed: local RKNN not ready and Tencent SDK unavailable " +
+                                "(add libs/asr-one-sentence-release.aar or fix SenseVoice init)"
+                        )
+                        ""
+                    } else {
+                        withContext(Dispatchers.IO) {
+                            try {
+                                // 云端 ASR 使用原始录音（未经 AEC/NS）：
+                                // 模拟器上 AEC/NS 不可用，processedSeg 可能被处理成近似静音
+                                TencentOneSentenceAsr.transcribePcm16Mono(originalSeg)
+                            } catch (t: Throwable) {
+                                Log.e(TAG, "Tencent ASR failed: ${t.message}")
+                                ""
+                            }
+                        }.trim()
+                    }
                }
-                Log.d(TAG, "ASR raw result: $raw")
-                val text = removeTokens(raw)

                val filterResult = filterText(text)
                if (filterResult != null) {
@@ -220,4 +248,5 @@ class AsrManager(private val context: Context) {
        }
        return null
    }
+
 }
--- a/app/src/main/java/com/digitalperson/asr/TencentOneSentenceAsr.kt
+++ b/app/src/main/java/com/digitalperson/asr/TencentOneSentenceAsr.kt
@@ -0,0 +1,216 @@
+package com.digitalperson.asr
+
+import android.util.Base64
+import android.util.Log
+import com.digitalperson.config.AppConfig
+import okhttp3.MediaType.Companion.toMediaType
+import okhttp3.OkHttpClient
+import okhttp3.Request
+import okhttp3.RequestBody.Companion.toRequestBody
+import org.json.JSONObject
+import java.nio.ByteBuffer
+import java.nio.ByteOrder
+import java.security.MessageDigest
+import java.text.SimpleDateFormat
+import java.util.Date
+import java.util.Locale
+import java.util.TimeZone
+import java.util.concurrent.TimeUnit
+import javax.crypto.Mac
+import javax.crypto.spec.SecretKeySpec
+
+/**
+ * 腾讯云「一句话识别」REST API 直接实现（TC3-HMAC-SHA256 签名）。
+ *
+ * 不依赖 SDK AAR，而是用 OkHttp 自行签名并发起 HTTP 请求。
+ * 签名时间戳从服务器 Date 响应头获取，彻底规避模拟器时钟偏差导致的
+ * AuthFailure.SignatureExpire 错误。
+ *
+ * 文档：https://cloud.tencent.com/document/product/1093/35646
+ */
+object TencentOneSentenceAsr {
+
+    private const val TAG = "TencentOneSentenceAsr"
+    private const val HOST = "asr.tencentcloudapi.com"
+    private const val ACTION = "SentenceRecognition"
+    private const val VERSION = "2019-06-14"
+
+    private val client = OkHttpClient.Builder()
+        .connectTimeout(10, TimeUnit.SECONDS)
+        .readTimeout(30, TimeUnit.SECONDS)
+        .build()
+
+    /**
+     * 将 FloatArray (16kHz mono, -1..1) 通过腾讯云一句话识别转为文字。
+     * 阻塞直到 HTTP 响应返回或超时。请在 IO 线程中调用。
+     */
+    fun transcribePcm16Mono(pcmFloat: FloatArray): String {
+        val appId = AppConfig.QCloud.APP_ID.trim()
+        val sid   = AppConfig.QCloud.SECRET_ID.trim()
+        val skey  = AppConfig.QCloud.SECRET_KEY.trim()
+        if (appId.isEmpty() || sid.isEmpty() || skey.isEmpty()) {
+            Log.e(TAG, "APP_ID / SECRET_ID / SECRET_KEY 为空")
+            return ""
+        }
+        if (pcmFloat.isEmpty()) return ""
+
+        val pcmBytes  = floatToPcm16Bytes(pcmFloat)
+        val pcmBase64 = Base64.encodeToString(pcmBytes, Base64.NO_WRAP)
+
+        // 诊断：检查音频幅度，若 RMS 接近 0 说明麦克风没采集到声音
+        val rms = kotlin.math.sqrt(pcmFloat.fold(0.0) { acc, v -> acc + v * v } / pcmFloat.size)
+        val maxAmp = pcmFloat.maxOf { kotlin.math.abs(it) }
+        Log.d(TAG, "一句话识别：${pcmFloat.size} 采样点，${pcmFloat.size / 16000.0}s，${pcmBytes.size} bytes  RMS=${"%.4f".format(rms)} maxAmp=${"%.4f".format(maxAmp)}")
+        if (maxAmp < 0.01f) {
+            Log.w(TAG, "⚠ 音频幅度极低（maxAmp=${"%.5f".format(maxAmp)}），模拟器麦克风可能没有采集到声音！请检查：模拟器扩展控制 → 麦克风 → 使用宿主机麦克风")
+        }
+
+        // 从服务器取时间，修正模拟器时钟偏差
+        val timestamp = fetchServerTimestamp()
+        val date = utcDate(timestamp)
+
+        val payload = buildPayload(appId, pcmBase64, pcmBytes.size)
+        val auth    = buildAuthorization(sid, skey, date, timestamp, payload)
+
+        val request = Request.Builder()
+            .url("https://$HOST")
+            .addHeader("Authorization",    auth)
+            .addHeader("Content-Type",     "application/json; charset=utf-8")
+            .addHeader("Host",             HOST)
+            .addHeader("X-TC-Action",      ACTION)
+            .addHeader("X-TC-Version",     VERSION)
+            .addHeader("X-TC-Timestamp",   timestamp.toString())
+            .post(payload.toRequestBody("application/json; charset=utf-8".toMediaType()))
+            .build()
+
+        return try {
+            val response = client.newCall(request).execute()
+            val body = response.body?.string().orEmpty()
+            Log.d(TAG, "API 响应: ${body.take(400)}")
+            parseResult(body)
+        } catch (e: Exception) {
+            Log.e(TAG, "HTTP 请求失败: ${e.message}", e)
+            ""
+        }
+    }
+
+    // ─── 工具方法 ──────────────────────────────────────────────────────────
+
+    /**
+     * 向服务器发送 HEAD 请求，从 Date 响应头获取精确时间戳。
+     * 若请求失败则回退到设备时钟（可能有偏差）。
+     */
+    private fun fetchServerTimestamp(): Long {
+        return try {
+            val req  = Request.Builder().url("https://$HOST").head().build()
+            val resp = client.newCall(req).execute()
+            val dateHeader = resp.header("Date")
+            resp.close()
+            if (dateHeader != null) {
+                val sdf = SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.ENGLISH)
+                val serverTs = sdf.parse(dateHeader)?.time?.div(1000) ?: deviceTimestamp()
+                val deviceTs = deviceTimestamp()
+                val offset   = serverTs - deviceTs
+                if (kotlin.math.abs(offset) > 60) {
+                    Log.w(TAG, "设备时钟偏差 ${offset}s，使用服务器时间修正（设备=${deviceTs}, 服务器=${serverTs}）")
+                }
+                serverTs
+            } else {
+                deviceTimestamp()
+            }
+        } catch (e: Exception) {
+            Log.w(TAG, "获取服务器时间失败: ${e.message}，使用设备时间")
+            deviceTimestamp()
+        }
+    }
+
+    private fun deviceTimestamp() = System.currentTimeMillis() / 1000
+
+    private fun utcDate(timestamp: Long): String {
+        val sdf = SimpleDateFormat("yyyy-MM-dd", Locale.US)
+        sdf.timeZone = TimeZone.getTimeZone("UTC")
+        return sdf.format(Date(timestamp * 1000))
+    }
+
+    private fun buildPayload(appId: String, base64: String, dataLen: Int): String =
+        JSONObject().apply {
+            put("ProjectId",      0)
+            put("SubServiceType", 2)
+            put("EngSerViceType", "16k_zh")
+            put("SourceType",     1)   // 1 = 数据流
+            put("VoiceFormat",    "pcm")
+            put("UsrAudioKey",    "digital-person-asr")
+            put("FilterDirty",    0)
+            put("FilterModal",    0)
+            put("FilterPunc",     0)
+            put("ConvertNumMode", 1)
+            put("Data",           base64)
+            put("DataLen",        dataLen)
+        }.toString()
+
+    // ─── TC3-HMAC-SHA256 签名 ──────────────────────────────────────────────
+
+    private fun buildAuthorization(
+        secretId:  String,
+        secretKey: String,
+        date:      String,
+        timestamp: Long,
+        payload:   String,
+    ): String {
+        val payloadHash = sha256Hex(payload)
+        val canonicalRequest = listOf(
+            "POST", "/", "",
+            "content-type:application/json; charset=utf-8",
+            "host:$HOST",
+            "",
+            "content-type;host",
+            payloadHash,
+        ).joinToString("\n")
+
+        val credentialScope = "$date/asr/tc3_request"
+        val stringToSign    = "TC3-HMAC-SHA256\n$timestamp\n$credentialScope\n${sha256Hex(canonicalRequest)}"
+
+        val signingKey = hmacSha256(
+            hmacSha256(hmacSha256("TC3$secretKey".toByteArray(), date), "asr"),
+            "tc3_request",
+        )
+        val signature = hmacSha256(signingKey, stringToSign).joinToString("") { "%02x".format(it) }
+
+        return "TC3-HMAC-SHA256 Credential=$secretId/$credentialScope, SignedHeaders=content-type;host, Signature=$signature"
+    }
+
+    private fun parseResult(json: String): String {
+        if (json.isBlank()) return ""
+        return try {
+            val response = JSONObject(json).optJSONObject("Response") ?: return ""
+            val error = response.optJSONObject("Error")
+            if (error != null) {
+                Log.e(TAG, "API 错误: ${error.optString("Code")} - ${error.optString("Message")}")
+                return ""
+            }
+            response.optString("Result").also { text ->
+                if (text.isNotBlank()) Log.d(TAG, "识别结果: \"$text\"")
+            }
+        } catch (e: Exception) {
+            Log.w(TAG, "解析响应失败: ${json.take(300)}")
+            ""
+        }
+    }
+
+    private fun sha256Hex(data: String): String {
+        val md = MessageDigest.getInstance("SHA-256")
+        return md.digest(data.toByteArray(Charsets.UTF_8)).joinToString("") { "%02x".format(it) }
+    }
+
+    private fun hmacSha256(key: ByteArray, data: String): ByteArray {
+        val mac = Mac.getInstance("HmacSHA256")
+        mac.init(SecretKeySpec(key, "HmacSHA256"))
+        return mac.doFinal(data.toByteArray(Charsets.UTF_8))
+    }
+
+    private fun floatToPcm16Bytes(samples: FloatArray): ByteArray {
+        val buf = ByteBuffer.allocate(samples.size * 2).order(ByteOrder.LITTLE_ENDIAN)
+        samples.forEach { buf.putShort((it.coerceIn(-1f, 1f) * 32767f).toInt().toShort()) }
+        return buf.array()
+    }
+}
--- a/app/src/main/java/com/digitalperson/config/AppConfig.kt
+++ b/app/src/main/java/com/digitalperson/config/AppConfig.kt
@@ -116,6 +116,19 @@ object AppConfig {
        const val MODEL_FILE = "bge-small-zh-v1.5.rknn"
    }

+    /**
+     * 模拟器上 [RefImageMatcher] 使用编辑距离时的最低归一化分（与 BGE 余弦阈值不可混用）。
+     * 分数 = 1 - Levenshtein / max(len)，越接近 1 越像。
+     */
+    object RefMatchEmulator {
+        /**
+         * 模拟器混合评分（路径关键词命中率 + 编辑距离）阈值。
+         * 路径关键词：1 个词命中 ≈ 0.25，已足够确认话题相关性。
+         * 原 0.82 是纯编辑距离阈值，字面差异大时根本达不到，故降至 0.20。
+         */
+        const val MIN_NORMALIZED_EDIT_SCORE = 0.20f
+    }
+
    /**
     * app/note/ref 通过 Gradle 额外 assets 目录打入 apk 后，在 assets 中的根路径为 `ref/`。
     */
--- a/app/src/main/java/com/digitalperson/embedding/EditDistanceSimilarity.kt
+++ b/app/src/main/java/com/digitalperson/embedding/EditDistanceSimilarity.kt
@@ -0,0 +1,49 @@
+package com.digitalperson.embedding
+
+import kotlin.math.max
+import kotlin.math.min
+
+/**
+ * 基于 Levenshtein 的字符级相似度（模拟器兜底，无语义，仅用于联调/演示）。
+ *
+ * 分数：1 - dist / max(len1, len2)，与余弦相似度不可直接对比阈值。
+ */
+object EditDistanceSimilarity {
+
+    fun normalizedScore(a: String, b: String): Float {
+        val s1 = a.trim()
+        val s2 = b.trim()
+        if (s1.isEmpty() && s2.isEmpty()) return 1f
+        if (s1.isEmpty() || s2.isEmpty()) return 0f
+        val dist = levenshtein(s1, s2)
+        val denom = max(s1.length, s2.length).coerceAtLeast(1)
+        return 1f - dist.toFloat() / denom.toFloat()
+    }
+
+    /**
+     * 经典双行 DP，O(n·m)；仅适用于模拟器上中等规模语料。
+     */
+    fun levenshtein(s1: String, s2: String): Int {
+        val n = s1.length
+        val m = s2.length
+        if (n == 0) return m
+        if (m == 0) return n
+        var prev = IntArray(m + 1) { it }
+        var curr = IntArray(m + 1)
+        for (i in 1..n) {
+            curr[0] = i
+            val c1 = s1[i - 1]
+            for (j in 1..m) {
+                val cost = if (c1 == s2[j - 1]) 0 else 1
+                curr[j] = min(
+                    min(prev[j] + 1, curr[j - 1] + 1),
+                    prev[j - 1] + cost
+                )
+            }
+            val tmp = prev
+            prev = curr
+            curr = tmp
+        }
+        return prev[m]
+    }
+}
--- a/app/src/main/java/com/digitalperson/embedding/RefEmbeddingIndexer.kt
+++ b/app/src/main/java/com/digitalperson/embedding/RefEmbeddingIndexer.kt
@@ -4,6 +4,7 @@ import android.content.Context
 import android.util.Log
 import com.digitalperson.config.AppConfig
 import com.digitalperson.data.AppDatabase
+import com.digitalperson.data.dao.QuestionDao
 import com.digitalperson.data.entity.Question
 import com.digitalperson.data.entity.RefTextEmbedding
 import com.digitalperson.data.util.floatArrayToEmbeddingBytes
@@ -27,15 +28,15 @@ object RefEmbeddingIndexer {
        val dao = db.refTextEmbeddingDao()
        val questionDao = db.questionDao()

-        if (!BgeEmbedding.initialize(app)) {
-            Log.e(TAG, "[RefEmbed] BGE 初始化失败，跳过 ref 语料索引")
-            return@withContext
-        }
-
        val root = AppConfig.RefCorpus.ASSETS_ROOT
        val paths = RefCorpusAssetScanner.listTxtFilesUnder(app, root)
        Log.i(TAG, "[RefEmbed] 发现 ${paths.size} 个 txt（root=$root）")

+        val bgeOk = BgeEmbedding.initialize(app)
+        if (!bgeOk) {
+            Log.w(TAG, "[RefEmbed] BGE 未就绪（常见于模拟器），仅扫描题库；ref 配图匹配可用编辑距离")
+        }
+
        var skipped = 0
        var embedded = 0
        var empty = 0
@@ -50,28 +51,9 @@ object RefEmbeddingIndexer {
                continue
            }

-            // 题库：遇到包含 ?/？ 的行，写入 questions
-            val subject = extractSubjectFromRaw(raw)
-            val grade = extractGradeFromPath(path)
-            val questionLines = extractQuestionLines(raw)
-            for (line in questionLines) {
-                val content = line.trim()
-                if (content.isEmpty()) continue
-                val exists = questionDao.findByContentSubjectGrade(content, subject, grade)
-                if (exists == null) {
-                    questionDao.insert(
-                        Question(
-                            id = 0,
-                            content = content,
-                            answer = null,
-                            subject = subject,
-                            grade = grade,
-                            difficulty = 1,
-                            createdAt = System.currentTimeMillis()
-                        )
-                    )
-                }
-            }
+            ingestQuestionsFromRaw(raw, path, questionDao)
+
+            if (!bgeOk) continue

            val embedText = RefTxtEmbedText.fromRawFileContent(raw)
            if (embedText.isEmpty()) {
@@ -110,10 +92,34 @@ object RefEmbeddingIndexer {

        Log.i(
            TAG,
-            "[RefEmbed] 完成 embedded=$embedded skipped=$skipped empty=$empty failed=$failed cacheSize=${RefEmbeddingMemoryCache.size()}"
+            "[RefEmbed] 完成 embedded=$embedded skipped=$skipped empty=$empty failed=$failed cacheSize=${RefEmbeddingMemoryCache.size()} bgeOk=$bgeOk"
        )
    }

+    private fun ingestQuestionsFromRaw(raw: String, path: String, questionDao: QuestionDao) {
+        val subject = extractSubjectFromRaw(raw)
+        val grade = extractGradeFromPath(path)
+        val questionLines = extractQuestionLines(raw)
+        for (line in questionLines) {
+            val content = line.trim()
+            if (content.isEmpty()) continue
+            val exists = questionDao.findByContentSubjectGrade(content, subject, grade)
+            if (exists == null) {
+                questionDao.insert(
+                    Question(
+                        id = 0,
+                        content = content,
+                        answer = null,
+                        subject = subject,
+                        grade = grade,
+                        difficulty = 1,
+                        createdAt = System.currentTimeMillis()
+                    )
+                )
+            }
+        }
+    }
+
    private fun extractSubjectFromRaw(raw: String): String? {
        val line = raw.lineSequence()
            .map { it.trimEnd() }
--- a/app/src/main/java/com/digitalperson/embedding/RefImageMatcher.kt
+++ b/app/src/main/java/com/digitalperson/embedding/RefImageMatcher.kt
@@ -3,6 +3,7 @@ package com.digitalperson.embedding
 import android.content.Context
 import android.util.Log
 import com.digitalperson.config.AppConfig
+import com.digitalperson.env.RuntimeEnv
 import kotlin.math.sqrt

 data class RefImageMatch(
@@ -16,7 +17,8 @@ object RefImageMatcher {
    private const val TAG = AppConfig.TAG

    /**
-     * @param threshold 余弦相似度阈值（向量已归一化时等价于 dot product）。
+     * @param threshold 真机 BGE：余弦相似度阈值（向量已归一化时等价于 dot product）。
+     * 模拟器：忽略该参数，使用 [AppConfig.RefMatchEmulator.MIN_NORMALIZED_EDIT_SCORE]（编辑距离归一化分）。
     */
    fun findBestMatch(
        context: Context,
@@ -26,6 +28,10 @@ object RefImageMatcher {
        val query = text.trim()
        if (query.isEmpty()) return null

+        if (RuntimeEnv.isEmulator()) {
+            return findBestMatchEditDistance(context, query)
+        }
+
        if (!BgeEmbedding.isReady()) {
            val ok = BgeEmbedding.initialize(context.applicationContext)
            if (!ok) {
@@ -78,6 +84,203 @@ object RefImageMatcher {
        )
    }

+    /**
+     * 模拟器：不加载 BGE，用**路径关键词命中率**（主）+ 编辑距离（辅）混合评分。
+     *
+     * 路径关键词：取最深目录名按 "-" 分割，如 "一年级上-生活适应-社会生活-元旦"
+     * → ["一年级上", "生活适应", "社会生活", "元旦"]。
+     * 命中率 = 命中数 / 关键词总数（1 个词命中 ≈ 0.25，足以通过 0.20 阈值）。
+     *
+     * 纯编辑距离原先用 0.82 阈值，但 LLM 回复文本与参考短句字面差异很大，
+     * 即使话题相同也难达标；改为关键词方案后准确率大幅提升。
+     */
+    /** 模拟器混合匹配逻辑；对 [androidTest] 暴露以便回归「本应命中却未命中」的用例。 */
+    internal fun findBestMatchEditDistance(context: Context, query: String): RefImageMatch? {
+        val app = context.applicationContext
+        val root = AppConfig.RefCorpus.ASSETS_ROOT
+        val paths = RefCorpusAssetScanner.listTxtFilesUnder(app, root)
+        val minScore = AppConfig.RefMatchEmulator.MIN_NORMALIZED_EDIT_SCORE
+        val qNorm = normalizeTextForEmuMatch(query.trim())
+        if (qNorm.isEmpty()) return null
+
+        var bestPath: String? = null
+        var bestScore = -1f
+        var bestSubstr = -1f
+        var bestEdit = -1f
+
+        for (path in paths) {
+            // 主：路径关键词命中率（无 IO，O(1)）
+            val kwScore = pathKeywordMatchScore(path, qNorm)
+
+            // 辅：内容匹配（有 IO，仅在关键词有命中或尚无候选时读取）
+            // 策略：① 子串包含（query 句子 ⊆ candidate 或 candidate 句子 ⊆ query 句子）
+            //        ② 逐句编辑距离
+            // candidate 是 txt 去掉 # 行后的全文（可能同时含问题和答案），
+            // query 中某句若直接出现在 candidate 里，说明话题完全命中。
+            // 必须对每条 txt 做内容打分：若仅在 bestScore<0 或 kwScore>0 时才读盘，
+            // 会先被其它文件的弱编辑分「占坑」，导致题干与某文件完全一致却从未被打开（如「上厕所」目录 kw 未命中但正文含原句）。
+            var substrScore = 0f
+            var editScore = 0f
+            try {
+                val raw = app.assets.open(path).bufferedReader(Charsets.UTF_8).use { it.readText() }
+                val candidate = normalizeTextForEmuMatch(RefTxtEmbedText.fromRawFileContent(raw))
+                if (candidate.isNotEmpty()) {
+                    val querySentences = splitSentences(qNorm)
+                    val candidateSentences = splitSentences(candidate)
+                    // ① 子串：query 句 ⊆ candidate，或 candidate 句 ⊆ query 句。
+                    // 极短片段（如「小朋友」）在多篇课文里都有，一律给高分会错配；按匹配长度分级。
+                    substrScore = querySentences.maxOfOrNull { qs ->
+                        var s = 0f
+                        if (qs.length >= 4 && candidate.contains(qs)) {
+                            s = maxOf(s, emulatorSubstringScoreForLength(qs.length))
+                        }
+                        for (cs in candidateSentences) {
+                            if (cs.length >= 6 && qs.contains(cs)) {
+                                s = maxOf(s, emulatorSubstringScoreForLength(cs.length) * 0.92f)
+                            }
+                        }
+                        s
+                    } ?: 0f
+                    // ② 编辑距离（逐句 vs 逐句，取最高分）
+                    editScore = querySentences.maxOfOrNull { qs ->
+                        candidateSentences.maxOfOrNull { cs ->
+                            EditDistanceSimilarity.normalizedScore(qs, cs)
+                        } ?: 0f
+                    } ?: 0f
+                }
+            } catch (e: Exception) {
+                Log.w(TAG, "[RefMatchEmu] read fail $path: ${e.message}")
+            }
+
+            val score = maxOf(kwScore, substrScore, editScore)
+            if (score > 0f) {
+                Log.v(TAG, "[RefMatchEmu] candidate score=$score (kw=$kwScore substr=$substrScore edit=$editScore) path=$path")
+            }
+            if (isBetterEmulatorCandidate(score, substrScore, editScore, bestScore, bestSubstr, bestEdit)) {
+                bestScore = score
+                bestSubstr = substrScore
+                bestEdit = editScore
+                bestPath = path
+            }
+        }
+
+        val txtPath = bestPath ?: run {
+            Log.d(TAG, "[RefMatchEmu] 无候选文件 query=${qNorm.take(60)}")
+            return null
+        }
+        if (bestScore < minScore) {
+            Log.d(TAG, "[RefMatchEmu] 分数不足 bestScore=$bestScore minScore=$minScore bestPath=$txtPath query=${qNorm.take(60)}")
+            return null
+        }
+
+        val pngPath = if (txtPath.endsWith(".txt", ignoreCase = true)) {
+            txtPath.dropLast(4) + ".png"
+        } else {
+            "$txtPath.png"
+        }
+        val exists = try {
+            context.assets.open(pngPath).close()
+            true
+        } catch (_: Throwable) {
+            false
+        }
+        if (!exists) return null
+
+        Log.d(TAG, "[RefMatchEmu] best=$txtPath score=$bestScore query=${qNorm.take(30)}")
+        return RefImageMatch(
+            txtAssetPath = txtPath,
+            pngAssetPath = pngPath,
+            score = bestScore
+        )
+    }
+
+    /**
+     * 从文件路径提取话题关键词，计算与查询文本的关键词命中率。
+     * 如路径含目录 "一年级上-生活适应-社会生活-元旦" → 关键词 ["一年级上","生活适应","社会生活","元旦"]。
+     */
+    private fun pathKeywordMatchScore(path: String, query: String): Float {
+        val keywords = extractPathTopicKeywords(path)
+        if (keywords.isEmpty()) return 0f
+        val matches = keywords.count { kw -> queryMatchesPathKeyword(query, kw) }
+        return matches.toFloat() / keywords.size
+    }
+
+    /** 统一全角/半角标点后再匹配，避免代码或 ASR 里半角 `:` 与语料全角 `：` 导致长句子串匹配失败。 */
+    private fun normalizeTextForEmuMatch(s: String): String = buildString(s.length) {
+        for (ch in s) {
+            append(
+                when (ch) {
+                    '\uFF1A', '\uFE55', ':' -> ':'
+                    '\uFF0C' -> ','
+                    '\uFF01' -> '!'
+                    '\uFF1F' -> '?'
+                    '\uFF1B' -> ';'
+                    else -> ch
+                },
+            )
+        }
+    }
+
+    /** 总分相同时优先子串分、再比编辑分，避免「元旦到了，小朋友」等前缀在多篇课文同分却先命中排序靠前者。 */
+    private fun isBetterEmulatorCandidate(
+        score: Float,
+        substr: Float,
+        edit: Float,
+        bestScore: Float,
+        bestSubstr: Float,
+        bestEdit: Float,
+    ): Boolean {
+        if (bestScore < 0f) return true
+        when {
+            score > bestScore + 1e-5f -> return true
+            score + 1e-5f < bestScore -> return false
+            substr > bestSubstr + 1e-5f -> return true
+            substr + 1e-5f < bestSubstr -> return false
+            else -> return edit > bestEdit + 1e-5f
+        }
+    }
+
+    /** 路径片段与 query 的包含关系；题干常省略词头（如目录「上厕所」、句子里只有「厕所」）。 */
+    private fun queryMatchesPathKeyword(query: String, kw: String): Boolean {
+        if (query.contains(kw)) return true
+        // 去掉首字再匹配，避免「个人生活」用 takeLast(2) 误匹配到泛泛的「生活」
+        if (kw.length >= 3) {
+            val rest = kw.substring(1)
+            if (rest.length >= 2 && query.contains(rest)) return true
+        }
+        return false
+    }
+
+    /** 子串命中得分：越长说明越具区分度；过短（如「小朋友」）分数低，减少跨课文误配。 */
+    private fun emulatorSubstringScoreForLength(len: Int): Float = when {
+        len >= 18 -> 0.95f
+        len >= 12 -> 0.90f
+        len >= 8 -> 0.82f
+        len >= 6 -> 0.68f
+        len >= 4 -> 0.48f
+        else -> 0f
+    }
+
+    /**
+     * 按中文/英文句子分隔符拆分，返回非空句子列表。
+     * 用于模拟器编辑距离辅助评分：逐句比对，避免 LLM 前导寒暄句拉低得分。
+     */
+    private fun splitSentences(text: String): List<String> {
+        val parts = text.split(Regex("[，。！？；,!?;\n]+"))
+            .map { it.trim() }
+            .filter { it.length >= 2 }
+        return parts.ifEmpty { listOf(text) }
+    }
+
+    /** 取最深目录名，按 "-" 分割并过滤掉纯数字和单字符片段。 */
+    private fun extractPathTopicKeywords(path: String): List<String> {
+        val deepestDir = path.split("/").dropLast(1).lastOrNull() ?: return emptyList()
+        return deepestDir.split("-")
+            .map { it.replace(Regex("\\d+"), "").trim() }
+            .filter { it.length >= 2 }
+            .distinct()
+    }
+
    private fun dot(a: FloatArray, b: FloatArray): Float {
        var s = 0f
        for (i in a.indices) s += a[i] * b[i]
--- a/app/src/main/java/com/digitalperson/env/RuntimeEnv.kt
+++ b/app/src/main/java/com/digitalperson/env/RuntimeEnv.kt
@@ -0,0 +1,34 @@
+package com.digitalperson.env
+
+import android.os.Build
+
+object RuntimeEnv {
+    fun isEmulator(): Boolean {
+        val fingerprint = Build.FINGERPRINT.orEmpty()
+        val model = Build.MODEL.orEmpty()
+        val brand = Build.BRAND.orEmpty()
+        val device = Build.DEVICE.orEmpty()
+        val product = Build.PRODUCT.orEmpty()
+        val hardware = Build.HARDWARE.orEmpty()
+        val manufacturer = Build.MANUFACTURER.orEmpty()
+
+        var hits = 0
+        fun hit(b: Boolean) { if (b) hits++ }
+
+        hit(fingerprint.startsWith("generic", ignoreCase = true))
+        hit(fingerprint.contains("unknown", ignoreCase = true))
+        hit(model.contains("google_sdk", ignoreCase = true))
+        hit(model.contains("emulator", ignoreCase = true))
+        hit(model.contains("android sdk built for", ignoreCase = true))
+        hit(manufacturer.contains("genymotion", ignoreCase = true))
+        hit(brand.startsWith("generic", ignoreCase = true) && device.startsWith("generic", ignoreCase = true))
+        hit(product.contains("sdk", ignoreCase = true))
+        hit(product.contains("emulator", ignoreCase = true))
+        hit(hardware.contains("goldfish", ignoreCase = true))
+        hit(hardware.contains("ranchu", ignoreCase = true))
+
+        // Require multiple signals to avoid false positives on weird ROMs.
+        return hits >= 2
+    }
+}
+
--- a/app/src/main/java/com/digitalperson/face/FaceDetectionPipeline.kt
+++ b/app/src/main/java/com/digitalperson/face/FaceDetectionPipeline.kt
@@ -5,6 +5,7 @@ import android.graphics.Bitmap
 import android.util.Log
 import com.digitalperson.config.AppConfig
 import com.digitalperson.engine.RetinaFaceEngineRKNN
+import com.digitalperson.env.RuntimeEnv
 import java.util.ArrayDeque
 import java.util.concurrent.atomic.AtomicBoolean
 import kotlinx.coroutines.CoroutineScope
@@ -35,6 +36,12 @@ class FaceDetectionPipeline(
    private val onResult: (FaceDetectionResult) -> Unit,
    private val onPresenceChanged: (present: Boolean, isFrontal: Boolean, faceIdentityId: String?, recognizedName: String?) -> Unit,
 ) {
+    companion object {
+        /** 模拟器固定人脸 ID，对应 UserMemory 中的 userId */
+        const val EMULATOR_FACE_ID   = "face_emulator"
+        /** 模拟器固定显示名，直接作为 recognizedName 传给 coordinator */
+        const val EMULATOR_FACE_NAME = "小黑"
+    }
    private val appContext = context.applicationContext
    private val engine = RetinaFaceEngineRKNN()
    private val recognizer = FaceRecognizer(appContext)
@@ -50,6 +57,11 @@ class FaceDetectionPipeline(
    private val fusionQualities = ArrayDeque<Float>()

    fun initialize(): Boolean {
+        if (RuntimeEnv.isEmulator()) {
+            Log.i(AppConfig.TAG, "[Face] 模拟器模式：跳过 RKNN 初始化，固定返回身份「$EMULATOR_FACE_NAME」")
+            initialized.set(true)
+            return true
+        }
        val detectorOk = engine.initialize(appContext)
        val recognizerOk = recognizer.initialize()
        val ok = detectorOk && recognizerOk
@@ -68,6 +80,31 @@ class FaceDetectionPipeline(
            return
        }

+        // 模拟器：跳过 RKNN 检测，固定上报一张居中正脸
+        if (RuntimeEnv.isEmulator()) {
+            scope.launch {
+                try {
+                    val w = bitmap.width
+                    val h = bitmap.height
+                    val fakeBox = FaceBox(
+                        left   = w * 0.25f,
+                        top    = h * 0.15f,
+                        right  = w * 0.75f,
+                        bottom = h * 0.85f,
+                        score  = 0.99f,
+                    )
+                    withContext(Dispatchers.Main) {
+                        onPresenceChanged(true, true, EMULATOR_FACE_ID, EMULATOR_FACE_NAME)
+                        onResult(FaceDetectionResult(w, h, listOf(fakeBox)))
+                    }
+                } finally {
+                    bitmap.recycle()
+                    frameInFlight.set(false)
+                }
+            }
+            return
+        }
+
        scope.launch {
            try {
                val width = bitmap.width
--- a/app/src/main/java/com/digitalperson/interaction/BaseDigitalPersonCoordinator.kt
+++ b/app/src/main/java/com/digitalperson/interaction/BaseDigitalPersonCoordinator.kt
@@ -148,6 +148,8 @@ abstract class BaseDigitalPersonCoordinator(
     * (i.e. after a cloud LLM response), NOT after greeting / farewell / proactive TTS.
     */
    fun onTtsPlaybackCompleted() {
+        // Let the controller advance its own timers (greeting/proactive/dlg all count as assistant speaking).
+        controller.onAssistantTtsPlaybackCompleted()
        if (pendingDialogueFinish) {
            pendingDialogueFinish = false
            controller.onDialogueResponseFinished()
--- a/app/src/main/java/com/digitalperson/interaction/DigitalHumanInteractionController.kt
+++ b/app/src/main/java/com/digitalperson/interaction/DigitalHumanInteractionController.kt
@@ -64,6 +64,10 @@ class DigitalHumanInteractionController(
    private var memoryJob: Job? = null
    private var farewellJob: Job? = null

+    // 让超时/间隔从 TTS 播放完成后开始计时，而不是从 speak() 调用时开始
+    private var pendingWaitReplyTimeoutAfterTts: Boolean = false
+    private var pendingProactiveFollowupAfterTts: Boolean = false
+
    fun start() {
        transitionTo(InteractionState.IDLE)
        scheduleMemoryMode()
@@ -204,7 +208,7 @@ fun onFacePresenceChanged(present: Boolean, isFrontal: Boolean = true) {  // 添
            return
        }
        transitionTo(InteractionState.WAITING_REPLY)
-        scheduleWaitingReplyTimeout()
+        scheduleWaitingReplyTimeoutAfterTts()
    }

    private fun enterGreeting() {
@@ -224,7 +228,7 @@ fun onFacePresenceChanged(present: Boolean, isFrontal: Boolean = true) {  // 添
                        handler.addToChatHistory("assistant", greeting)
                        handler.addAssistantMessageToCloudHistory(greeting)
                        transitionTo(InteractionState.WAITING_REPLY)
-                        scheduleWaitingReplyTimeout()
+                        scheduleWaitingReplyTimeoutAfterTts()
                    } else {
                        useDefaultGreeting()
                    }
@@ -243,7 +247,11 @@ fun onFacePresenceChanged(present: Boolean, isFrontal: Boolean = true) {  // 添
        handler.addAssistantMessageToCloudHistory(greeting)
        
        transitionTo(InteractionState.WAITING_REPLY)
-        scheduleWaitingReplyTimeout()
+        scheduleWaitingReplyTimeoutAfterTts()
+    }
+
+    private fun scheduleWaitingReplyTimeoutAfterTts() {
+        pendingWaitReplyTimeoutAfterTts = true
    }

    private fun scheduleWaitingReplyTimeout() {
@@ -282,21 +290,34 @@ fun onFacePresenceChanged(present: Boolean, isFrontal: Boolean = true) {  // 添
        // 触发题目生成检查
        handler.onQuestionAsked(currentFaceId ?: "guest")

-        proactiveJob = scope.launch {
-            hasPendingUserReply = false
-            delay(20_000)
-            if (state != InteractionState.PROACTIVE || hasPendingUserReply) return@launch
-            if (!facePresent) {
-                enterFarewell()
-                return@launch
-            }
-            proactiveRound += 1
-            if (proactiveRound < 3) {
-                askProactiveTopic()
-            } else {
-                transitionTo(InteractionState.WAITING_REPLY)
-//                handler.playMotion("haru_g_m17.motion3.json")
-                scheduleWaitingReplyTimeout()
+        // 不立刻开始 20s 计时；等 TTS 播放完再开始计时，避免“刚说完几秒就又问”
+        pendingProactiveFollowupAfterTts = true
+    }
+
+    /** 由 Activity 在「本轮 TTS 完整播放完成」时调用（包括问候/主动提问/对话回复）。 */
+    fun onAssistantTtsPlaybackCompleted() {
+        if (pendingWaitReplyTimeoutAfterTts && state == InteractionState.WAITING_REPLY) {
+            pendingWaitReplyTimeoutAfterTts = false
+            scheduleWaitingReplyTimeout()
+        }
+        if (pendingProactiveFollowupAfterTts && state == InteractionState.PROACTIVE) {
+            pendingProactiveFollowupAfterTts = false
+            proactiveJob?.cancel()
+            proactiveJob = scope.launch {
+                hasPendingUserReply = false
+                delay(20_000)
+                if (state != InteractionState.PROACTIVE || hasPendingUserReply) return@launch
+                if (!facePresent) {
+                    enterFarewell()
+                    return@launch
+                }
+                proactiveRound += 1
+                if (proactiveRound < 3) {
+                    askProactiveTopic()
+                } else {
+                    transitionTo(InteractionState.WAITING_REPLY)
+                    scheduleWaitingReplyTimeoutAfterTts()
+                }
            }
        }
    }
--- a/app/src/main/java/com/digitalperson/tts/TtsController.kt
+++ b/app/src/main/java/com/digitalperson/tts/TtsController.kt
@@ -27,6 +27,11 @@ class TtsController(private val context: Context) {

    private var callback: TtsCallback? = null

+    // 防止 WebSocket 重连或多路回调导致同一段文案短时间内重复入队、重复播报
+    @Volatile private var lastEnqueuedText: String? = null
+    @Volatile private var lastEnqueuedAtMs: Long = 0L
+    private val dedupeWindowMs = 2500L
+
    fun setCallback(callback: TtsCallback) {
        this.callback = callback
        bindCallbacksIfReady()
@@ -147,6 +152,14 @@ class TtsController(private val context: Context) {
    fun enqueueSegment(seg: String) {
        val cleaned = seg.replace(Regex("\\[.*?\\]"), "").trim()
        if (cleaned.isEmpty()) return
+        val now = System.currentTimeMillis()
+        val lastText = lastEnqueuedText
+        if (lastText != null && lastText == cleaned && (now - lastEnqueuedAtMs) <= dedupeWindowMs) {
+            Log.w(TAG, "Skip duplicate TTS segment within ${dedupeWindowMs}ms: ${cleaned.take(60)}")
+            return
+        }
+        lastEnqueuedText = cleaned
+        lastEnqueuedAtMs = now
        if (useQCloudTts) {
            qcloudTts?.enqueueSegment(cleaned)
        } else {