add photo
This commit is contained in:
@@ -6,6 +6,7 @@ import android.content.Context
|
||||
import android.content.pm.PackageManager
|
||||
import android.content.res.ColorStateList
|
||||
import android.graphics.Color
|
||||
import android.graphics.BitmapFactory
|
||||
import android.os.Build
|
||||
import android.os.Bundle
|
||||
import android.os.Handler
|
||||
@@ -16,6 +17,7 @@ import android.util.Log
|
||||
import android.view.MotionEvent
|
||||
import android.view.ViewGroup
|
||||
import android.widget.Button
|
||||
import android.widget.ImageView
|
||||
import android.widget.TextView
|
||||
import android.widget.Toast
|
||||
import androidx.core.app.ActivityCompat
|
||||
@@ -25,7 +27,6 @@ import android.view.View
|
||||
import androidx.lifecycle.Lifecycle
|
||||
import androidx.lifecycle.LifecycleOwner
|
||||
import androidx.lifecycle.LifecycleRegistry
|
||||
import android.widget.ImageView
|
||||
import com.unity3d.player.UnityPlayer
|
||||
import com.unity3d.player.UnityPlayerActivity
|
||||
import com.digitalperson.audio.AudioProcessor
|
||||
@@ -33,6 +34,7 @@ import com.digitalperson.asr.AsrManager
|
||||
import com.digitalperson.cloud.CloudApiManager
|
||||
import com.digitalperson.cloud.CloudReflectionHelper
|
||||
import com.digitalperson.config.AppConfig
|
||||
import com.digitalperson.embedding.RefImageMatcher
|
||||
import com.digitalperson.question.QuestionGenerationAgent
|
||||
import com.digitalperson.data.AppDatabase
|
||||
import com.digitalperson.face.FaceDetectionPipeline
|
||||
@@ -48,8 +50,6 @@ import com.digitalperson.tts.TtsController
|
||||
import com.digitalperson.util.FileHelper
|
||||
import com.digitalperson.vad.VadManager
|
||||
import kotlinx.coroutines.*
|
||||
import com.digitalperson.embedding.RefImageMatcher
|
||||
import android.graphics.BitmapFactory
|
||||
|
||||
class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
|
||||
|
||||
@@ -260,6 +260,11 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
|
||||
recordButtonGlow = chatLayout.findViewById(R.id.record_button_glow)
|
||||
refMatchImageView = chatLayout.findViewById(R.id.ref_match_image)
|
||||
|
||||
if (!AppConfig.SHOW_DEBUG_TEXT) {
|
||||
chatHistoryText.visibility = View.GONE
|
||||
chatLayout.findViewById<View>(R.id.scroll_view).visibility = View.GONE
|
||||
}
|
||||
|
||||
// 根据配置设置按钮可见性
|
||||
if (AppConfig.USE_HOLD_TO_SPEAK) {
|
||||
holdToSpeakButton.visibility = View.VISIBLE
|
||||
@@ -366,6 +371,8 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
|
||||
override fun onLlmCalled(text: String) {
|
||||
Log.d("UnityDigitalPerson", "LLM called with: $text")
|
||||
interactionCoordinator.onUserAsrText(text)
|
||||
// 用用户问题提前匹配:比等 LLM 回复更早显示图片(模拟器/真机通用)
|
||||
maybeShowMatchedRefImage(text)
|
||||
}
|
||||
})
|
||||
setAudioProcessor(audioProcessor)
|
||||
@@ -664,6 +671,7 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
|
||||
}
|
||||
|
||||
private fun appendChat(text: String) {
|
||||
if (!AppConfig.SHOW_DEBUG_TEXT) return
|
||||
runOnUiThread {
|
||||
chatHistoryText.append(text + "\n")
|
||||
}
|
||||
@@ -696,6 +704,8 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
|
||||
override fun onSpeak(text: String) {
|
||||
ttsController.enqueueSegment(text)
|
||||
ttsController.enqueueEnd()
|
||||
// 主动发言(问候/主动提问)也尝试匹配参考图片
|
||||
maybeShowMatchedRefImage(text)
|
||||
}
|
||||
|
||||
override fun onRequestCloudReply(prompt: String) {
|
||||
@@ -759,13 +769,22 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
|
||||
|
||||
private fun maybeShowMatchedRefImage(text: String) {
|
||||
val imageView = refMatchImageView ?: return
|
||||
// Unity Activity already has coroutines
|
||||
CoroutineScope(SupervisorJob() + Dispatchers.IO).launch {
|
||||
// 每次匹配前先清掉上一张图
|
||||
runOnUiThread {
|
||||
imageView.setImageBitmap(null)
|
||||
imageView.visibility = View.GONE
|
||||
}
|
||||
ioScope.launch {
|
||||
val match = RefImageMatcher.findBestMatch(applicationContext, text)
|
||||
if (match == null) return@launch
|
||||
if (match == null) {
|
||||
Log.d("RefImageMatch", "未找到匹配图片 query=\"${text.take(80)}\"")
|
||||
return@launch
|
||||
}
|
||||
Log.d("RefImageMatch", "匹配成功 score=${match.score} path=${match.pngAssetPath} query=\"${text.take(80)}\"")
|
||||
val bitmap = try {
|
||||
assets.open(match.pngAssetPath).use { BitmapFactory.decodeStream(it) }
|
||||
} catch (_: Throwable) {
|
||||
} catch (e: Throwable) {
|
||||
Log.w("RefImageMatch", "图片加载失败 path=${match.pngAssetPath}", e)
|
||||
null
|
||||
}
|
||||
if (bitmap == null) return@launch
|
||||
|
||||
@@ -6,6 +6,7 @@ import android.util.Log
|
||||
import com.digitalperson.BuildConfig
|
||||
import com.digitalperson.audio.AudioProcessor
|
||||
import com.digitalperson.config.AppConfig
|
||||
import com.digitalperson.env.RuntimeEnv
|
||||
import com.digitalperson.engine.SenseVoiceEngineRKNN
|
||||
import com.digitalperson.util.FileHelper
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
@@ -23,7 +24,6 @@ class AsrManager(private val context: Context) {
|
||||
|
||||
private var senseVoice: SenseVoiceEngineRKNN? = null
|
||||
private val nativeLock = Any()
|
||||
|
||||
private val asrQueue = Channel<Pair<FloatArray, FloatArray>>(capacity = Channel.UNLIMITED)
|
||||
|
||||
private var audioProcessor: AudioProcessor? = null
|
||||
@@ -48,6 +48,10 @@ class AsrManager(private val context: Context) {
|
||||
}
|
||||
|
||||
fun initSenseVoiceModel(): Boolean {
|
||||
if (RuntimeEnv.isEmulator()) {
|
||||
Log.w(TAG, "ASR: emulator detected; skip local RKNN init and use cloud ASR")
|
||||
return false
|
||||
}
|
||||
return try {
|
||||
Log.i(TAG, "ASR: init SenseVoice RKNN (scheme A)")
|
||||
|
||||
@@ -133,23 +137,47 @@ class AsrManager(private val context: Context) {
|
||||
Log.d(TAG, "ASR started: processing audio segment")
|
||||
|
||||
saveAsrAudio(originalSeg, processedSeg)
|
||||
|
||||
val raw = synchronized(nativeLock) {
|
||||
|
||||
val localText = synchronized(nativeLock) {
|
||||
val e = senseVoice
|
||||
if (e == null || !e.isInitialized) {
|
||||
Log.e(TAG, "ASR failed: SenseVoice engine not initialized")
|
||||
""
|
||||
} else {
|
||||
try {
|
||||
e.transcribeBuffer(processedSeg)
|
||||
} catch (e: Throwable) {
|
||||
Log.e(TAG, "ASR transcribe failed: ${e.message}")
|
||||
removeTokens(e.transcribeBuffer(processedSeg))
|
||||
} catch (t: Throwable) {
|
||||
Log.e(TAG, "ASR transcribe failed: ${t.message}")
|
||||
""
|
||||
}
|
||||
}
|
||||
}.trim()
|
||||
|
||||
val text = if (localText.isNotBlank()) {
|
||||
localText
|
||||
} else {
|
||||
// 模拟器或本地 RKNN 未就绪:使用腾讯云「一句话识别」SDK(app/libs/asr-one-sentence-release.aar)
|
||||
val shouldTryTencent =
|
||||
BuildConfig.HAS_TENCENT_ASR_SDK && (RuntimeEnv.isEmulator() || !isInitialized())
|
||||
if (!shouldTryTencent) {
|
||||
Log.e(
|
||||
TAG,
|
||||
"ASR failed: local RKNN not ready and Tencent SDK unavailable " +
|
||||
"(add libs/asr-one-sentence-release.aar or fix SenseVoice init)"
|
||||
)
|
||||
""
|
||||
} else {
|
||||
withContext(Dispatchers.IO) {
|
||||
try {
|
||||
// 云端 ASR 使用原始录音(未经 AEC/NS):
|
||||
// 模拟器上 AEC/NS 不可用,processedSeg 可能被处理成近似静音
|
||||
TencentOneSentenceAsr.transcribePcm16Mono(originalSeg)
|
||||
} catch (t: Throwable) {
|
||||
Log.e(TAG, "Tencent ASR failed: ${t.message}")
|
||||
""
|
||||
}
|
||||
}.trim()
|
||||
}
|
||||
}
|
||||
Log.d(TAG, "ASR raw result: $raw")
|
||||
val text = removeTokens(raw)
|
||||
|
||||
val filterResult = filterText(text)
|
||||
if (filterResult != null) {
|
||||
@@ -220,4 +248,5 @@ class AsrManager(private val context: Context) {
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
216
app/src/main/java/com/digitalperson/asr/TencentOneSentenceAsr.kt
Normal file
216
app/src/main/java/com/digitalperson/asr/TencentOneSentenceAsr.kt
Normal file
@@ -0,0 +1,216 @@
|
||||
package com.digitalperson.asr
|
||||
|
||||
import android.util.Base64
|
||||
import android.util.Log
|
||||
import com.digitalperson.config.AppConfig
|
||||
import okhttp3.MediaType.Companion.toMediaType
|
||||
import okhttp3.OkHttpClient
|
||||
import okhttp3.Request
|
||||
import okhttp3.RequestBody.Companion.toRequestBody
|
||||
import org.json.JSONObject
|
||||
import java.nio.ByteBuffer
|
||||
import java.nio.ByteOrder
|
||||
import java.security.MessageDigest
|
||||
import java.text.SimpleDateFormat
|
||||
import java.util.Date
|
||||
import java.util.Locale
|
||||
import java.util.TimeZone
|
||||
import java.util.concurrent.TimeUnit
|
||||
import javax.crypto.Mac
|
||||
import javax.crypto.spec.SecretKeySpec
|
||||
|
||||
/**
|
||||
* 腾讯云「一句话识别」REST API 直接实现(TC3-HMAC-SHA256 签名)。
|
||||
*
|
||||
* 不依赖 SDK AAR,而是用 OkHttp 自行签名并发起 HTTP 请求。
|
||||
* 签名时间戳从服务器 Date 响应头获取,彻底规避模拟器时钟偏差导致的
|
||||
* AuthFailure.SignatureExpire 错误。
|
||||
*
|
||||
* 文档:https://cloud.tencent.com/document/product/1093/35646
|
||||
*/
|
||||
object TencentOneSentenceAsr {
|
||||
|
||||
private const val TAG = "TencentOneSentenceAsr"
|
||||
private const val HOST = "asr.tencentcloudapi.com"
|
||||
private const val ACTION = "SentenceRecognition"
|
||||
private const val VERSION = "2019-06-14"
|
||||
|
||||
private val client = OkHttpClient.Builder()
|
||||
.connectTimeout(10, TimeUnit.SECONDS)
|
||||
.readTimeout(30, TimeUnit.SECONDS)
|
||||
.build()
|
||||
|
||||
/**
|
||||
* 将 FloatArray (16kHz mono, -1..1) 通过腾讯云一句话识别转为文字。
|
||||
* 阻塞直到 HTTP 响应返回或超时。请在 IO 线程中调用。
|
||||
*/
|
||||
fun transcribePcm16Mono(pcmFloat: FloatArray): String {
|
||||
val appId = AppConfig.QCloud.APP_ID.trim()
|
||||
val sid = AppConfig.QCloud.SECRET_ID.trim()
|
||||
val skey = AppConfig.QCloud.SECRET_KEY.trim()
|
||||
if (appId.isEmpty() || sid.isEmpty() || skey.isEmpty()) {
|
||||
Log.e(TAG, "APP_ID / SECRET_ID / SECRET_KEY 为空")
|
||||
return ""
|
||||
}
|
||||
if (pcmFloat.isEmpty()) return ""
|
||||
|
||||
val pcmBytes = floatToPcm16Bytes(pcmFloat)
|
||||
val pcmBase64 = Base64.encodeToString(pcmBytes, Base64.NO_WRAP)
|
||||
|
||||
// 诊断:检查音频幅度,若 RMS 接近 0 说明麦克风没采集到声音
|
||||
val rms = kotlin.math.sqrt(pcmFloat.fold(0.0) { acc, v -> acc + v * v } / pcmFloat.size)
|
||||
val maxAmp = pcmFloat.maxOf { kotlin.math.abs(it) }
|
||||
Log.d(TAG, "一句话识别:${pcmFloat.size} 采样点,${pcmFloat.size / 16000.0}s,${pcmBytes.size} bytes RMS=${"%.4f".format(rms)} maxAmp=${"%.4f".format(maxAmp)}")
|
||||
if (maxAmp < 0.01f) {
|
||||
Log.w(TAG, "⚠ 音频幅度极低(maxAmp=${"%.5f".format(maxAmp)}),模拟器麦克风可能没有采集到声音!请检查:模拟器扩展控制 → 麦克风 → 使用宿主机麦克风")
|
||||
}
|
||||
|
||||
// 从服务器取时间,修正模拟器时钟偏差
|
||||
val timestamp = fetchServerTimestamp()
|
||||
val date = utcDate(timestamp)
|
||||
|
||||
val payload = buildPayload(appId, pcmBase64, pcmBytes.size)
|
||||
val auth = buildAuthorization(sid, skey, date, timestamp, payload)
|
||||
|
||||
val request = Request.Builder()
|
||||
.url("https://$HOST")
|
||||
.addHeader("Authorization", auth)
|
||||
.addHeader("Content-Type", "application/json; charset=utf-8")
|
||||
.addHeader("Host", HOST)
|
||||
.addHeader("X-TC-Action", ACTION)
|
||||
.addHeader("X-TC-Version", VERSION)
|
||||
.addHeader("X-TC-Timestamp", timestamp.toString())
|
||||
.post(payload.toRequestBody("application/json; charset=utf-8".toMediaType()))
|
||||
.build()
|
||||
|
||||
return try {
|
||||
val response = client.newCall(request).execute()
|
||||
val body = response.body?.string().orEmpty()
|
||||
Log.d(TAG, "API 响应: ${body.take(400)}")
|
||||
parseResult(body)
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "HTTP 请求失败: ${e.message}", e)
|
||||
""
|
||||
}
|
||||
}
|
||||
|
||||
// ─── 工具方法 ──────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* 向服务器发送 HEAD 请求,从 Date 响应头获取精确时间戳。
|
||||
* 若请求失败则回退到设备时钟(可能有偏差)。
|
||||
*/
|
||||
private fun fetchServerTimestamp(): Long {
|
||||
return try {
|
||||
val req = Request.Builder().url("https://$HOST").head().build()
|
||||
val resp = client.newCall(req).execute()
|
||||
val dateHeader = resp.header("Date")
|
||||
resp.close()
|
||||
if (dateHeader != null) {
|
||||
val sdf = SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.ENGLISH)
|
||||
val serverTs = sdf.parse(dateHeader)?.time?.div(1000) ?: deviceTimestamp()
|
||||
val deviceTs = deviceTimestamp()
|
||||
val offset = serverTs - deviceTs
|
||||
if (kotlin.math.abs(offset) > 60) {
|
||||
Log.w(TAG, "设备时钟偏差 ${offset}s,使用服务器时间修正(设备=${deviceTs}, 服务器=${serverTs})")
|
||||
}
|
||||
serverTs
|
||||
} else {
|
||||
deviceTimestamp()
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
Log.w(TAG, "获取服务器时间失败: ${e.message},使用设备时间")
|
||||
deviceTimestamp()
|
||||
}
|
||||
}
|
||||
|
||||
private fun deviceTimestamp() = System.currentTimeMillis() / 1000
|
||||
|
||||
private fun utcDate(timestamp: Long): String {
|
||||
val sdf = SimpleDateFormat("yyyy-MM-dd", Locale.US)
|
||||
sdf.timeZone = TimeZone.getTimeZone("UTC")
|
||||
return sdf.format(Date(timestamp * 1000))
|
||||
}
|
||||
|
||||
private fun buildPayload(appId: String, base64: String, dataLen: Int): String =
|
||||
JSONObject().apply {
|
||||
put("ProjectId", 0)
|
||||
put("SubServiceType", 2)
|
||||
put("EngSerViceType", "16k_zh")
|
||||
put("SourceType", 1) // 1 = 数据流
|
||||
put("VoiceFormat", "pcm")
|
||||
put("UsrAudioKey", "digital-person-asr")
|
||||
put("FilterDirty", 0)
|
||||
put("FilterModal", 0)
|
||||
put("FilterPunc", 0)
|
||||
put("ConvertNumMode", 1)
|
||||
put("Data", base64)
|
||||
put("DataLen", dataLen)
|
||||
}.toString()
|
||||
|
||||
// ─── TC3-HMAC-SHA256 签名 ──────────────────────────────────────────────
|
||||
|
||||
private fun buildAuthorization(
|
||||
secretId: String,
|
||||
secretKey: String,
|
||||
date: String,
|
||||
timestamp: Long,
|
||||
payload: String,
|
||||
): String {
|
||||
val payloadHash = sha256Hex(payload)
|
||||
val canonicalRequest = listOf(
|
||||
"POST", "/", "",
|
||||
"content-type:application/json; charset=utf-8",
|
||||
"host:$HOST",
|
||||
"",
|
||||
"content-type;host",
|
||||
payloadHash,
|
||||
).joinToString("\n")
|
||||
|
||||
val credentialScope = "$date/asr/tc3_request"
|
||||
val stringToSign = "TC3-HMAC-SHA256\n$timestamp\n$credentialScope\n${sha256Hex(canonicalRequest)}"
|
||||
|
||||
val signingKey = hmacSha256(
|
||||
hmacSha256(hmacSha256("TC3$secretKey".toByteArray(), date), "asr"),
|
||||
"tc3_request",
|
||||
)
|
||||
val signature = hmacSha256(signingKey, stringToSign).joinToString("") { "%02x".format(it) }
|
||||
|
||||
return "TC3-HMAC-SHA256 Credential=$secretId/$credentialScope, SignedHeaders=content-type;host, Signature=$signature"
|
||||
}
|
||||
|
||||
private fun parseResult(json: String): String {
|
||||
if (json.isBlank()) return ""
|
||||
return try {
|
||||
val response = JSONObject(json).optJSONObject("Response") ?: return ""
|
||||
val error = response.optJSONObject("Error")
|
||||
if (error != null) {
|
||||
Log.e(TAG, "API 错误: ${error.optString("Code")} - ${error.optString("Message")}")
|
||||
return ""
|
||||
}
|
||||
response.optString("Result").also { text ->
|
||||
if (text.isNotBlank()) Log.d(TAG, "识别结果: \"$text\"")
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
Log.w(TAG, "解析响应失败: ${json.take(300)}")
|
||||
""
|
||||
}
|
||||
}
|
||||
|
||||
private fun sha256Hex(data: String): String {
|
||||
val md = MessageDigest.getInstance("SHA-256")
|
||||
return md.digest(data.toByteArray(Charsets.UTF_8)).joinToString("") { "%02x".format(it) }
|
||||
}
|
||||
|
||||
private fun hmacSha256(key: ByteArray, data: String): ByteArray {
|
||||
val mac = Mac.getInstance("HmacSHA256")
|
||||
mac.init(SecretKeySpec(key, "HmacSHA256"))
|
||||
return mac.doFinal(data.toByteArray(Charsets.UTF_8))
|
||||
}
|
||||
|
||||
private fun floatToPcm16Bytes(samples: FloatArray): ByteArray {
|
||||
val buf = ByteBuffer.allocate(samples.size * 2).order(ByteOrder.LITTLE_ENDIAN)
|
||||
samples.forEach { buf.putShort((it.coerceIn(-1f, 1f) * 32767f).toInt().toShort()) }
|
||||
return buf.array()
|
||||
}
|
||||
}
|
||||
@@ -116,6 +116,19 @@ object AppConfig {
|
||||
const val MODEL_FILE = "bge-small-zh-v1.5.rknn"
|
||||
}
|
||||
|
||||
/**
|
||||
* 模拟器上 [RefImageMatcher] 使用编辑距离时的最低归一化分(与 BGE 余弦阈值不可混用)。
|
||||
* 分数 = 1 - Levenshtein / max(len),越接近 1 越像。
|
||||
*/
|
||||
object RefMatchEmulator {
|
||||
/**
|
||||
* 模拟器混合评分(路径关键词命中率 + 编辑距离)阈值。
|
||||
* 路径关键词:1 个词命中 ≈ 0.25,已足够确认话题相关性。
|
||||
* 原 0.82 是纯编辑距离阈值,字面差异大时根本达不到,故降至 0.20。
|
||||
*/
|
||||
const val MIN_NORMALIZED_EDIT_SCORE = 0.20f
|
||||
}
|
||||
|
||||
/**
|
||||
* app/note/ref 通过 Gradle 额外 assets 目录打入 apk 后,在 assets 中的根路径为 `ref/`。
|
||||
*/
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
package com.digitalperson.embedding
|
||||
|
||||
import kotlin.math.max
|
||||
import kotlin.math.min
|
||||
|
||||
/**
|
||||
* 基于 Levenshtein 的字符级相似度(模拟器兜底,无语义,仅用于联调/演示)。
|
||||
*
|
||||
* 分数:1 - dist / max(len1, len2),与余弦相似度不可直接对比阈值。
|
||||
*/
|
||||
object EditDistanceSimilarity {
|
||||
|
||||
fun normalizedScore(a: String, b: String): Float {
|
||||
val s1 = a.trim()
|
||||
val s2 = b.trim()
|
||||
if (s1.isEmpty() && s2.isEmpty()) return 1f
|
||||
if (s1.isEmpty() || s2.isEmpty()) return 0f
|
||||
val dist = levenshtein(s1, s2)
|
||||
val denom = max(s1.length, s2.length).coerceAtLeast(1)
|
||||
return 1f - dist.toFloat() / denom.toFloat()
|
||||
}
|
||||
|
||||
/**
|
||||
* 经典双行 DP,O(n·m);仅适用于模拟器上中等规模语料。
|
||||
*/
|
||||
fun levenshtein(s1: String, s2: String): Int {
|
||||
val n = s1.length
|
||||
val m = s2.length
|
||||
if (n == 0) return m
|
||||
if (m == 0) return n
|
||||
var prev = IntArray(m + 1) { it }
|
||||
var curr = IntArray(m + 1)
|
||||
for (i in 1..n) {
|
||||
curr[0] = i
|
||||
val c1 = s1[i - 1]
|
||||
for (j in 1..m) {
|
||||
val cost = if (c1 == s2[j - 1]) 0 else 1
|
||||
curr[j] = min(
|
||||
min(prev[j] + 1, curr[j - 1] + 1),
|
||||
prev[j - 1] + cost
|
||||
)
|
||||
}
|
||||
val tmp = prev
|
||||
prev = curr
|
||||
curr = tmp
|
||||
}
|
||||
return prev[m]
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,7 @@ import android.content.Context
|
||||
import android.util.Log
|
||||
import com.digitalperson.config.AppConfig
|
||||
import com.digitalperson.data.AppDatabase
|
||||
import com.digitalperson.data.dao.QuestionDao
|
||||
import com.digitalperson.data.entity.Question
|
||||
import com.digitalperson.data.entity.RefTextEmbedding
|
||||
import com.digitalperson.data.util.floatArrayToEmbeddingBytes
|
||||
@@ -27,15 +28,15 @@ object RefEmbeddingIndexer {
|
||||
val dao = db.refTextEmbeddingDao()
|
||||
val questionDao = db.questionDao()
|
||||
|
||||
if (!BgeEmbedding.initialize(app)) {
|
||||
Log.e(TAG, "[RefEmbed] BGE 初始化失败,跳过 ref 语料索引")
|
||||
return@withContext
|
||||
}
|
||||
|
||||
val root = AppConfig.RefCorpus.ASSETS_ROOT
|
||||
val paths = RefCorpusAssetScanner.listTxtFilesUnder(app, root)
|
||||
Log.i(TAG, "[RefEmbed] 发现 ${paths.size} 个 txt(root=$root)")
|
||||
|
||||
val bgeOk = BgeEmbedding.initialize(app)
|
||||
if (!bgeOk) {
|
||||
Log.w(TAG, "[RefEmbed] BGE 未就绪(常见于模拟器),仅扫描题库;ref 配图匹配可用编辑距离")
|
||||
}
|
||||
|
||||
var skipped = 0
|
||||
var embedded = 0
|
||||
var empty = 0
|
||||
@@ -50,28 +51,9 @@ object RefEmbeddingIndexer {
|
||||
continue
|
||||
}
|
||||
|
||||
// 题库:遇到包含 ?/? 的行,写入 questions
|
||||
val subject = extractSubjectFromRaw(raw)
|
||||
val grade = extractGradeFromPath(path)
|
||||
val questionLines = extractQuestionLines(raw)
|
||||
for (line in questionLines) {
|
||||
val content = line.trim()
|
||||
if (content.isEmpty()) continue
|
||||
val exists = questionDao.findByContentSubjectGrade(content, subject, grade)
|
||||
if (exists == null) {
|
||||
questionDao.insert(
|
||||
Question(
|
||||
id = 0,
|
||||
content = content,
|
||||
answer = null,
|
||||
subject = subject,
|
||||
grade = grade,
|
||||
difficulty = 1,
|
||||
createdAt = System.currentTimeMillis()
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
ingestQuestionsFromRaw(raw, path, questionDao)
|
||||
|
||||
if (!bgeOk) continue
|
||||
|
||||
val embedText = RefTxtEmbedText.fromRawFileContent(raw)
|
||||
if (embedText.isEmpty()) {
|
||||
@@ -110,10 +92,34 @@ object RefEmbeddingIndexer {
|
||||
|
||||
Log.i(
|
||||
TAG,
|
||||
"[RefEmbed] 完成 embedded=$embedded skipped=$skipped empty=$empty failed=$failed cacheSize=${RefEmbeddingMemoryCache.size()}"
|
||||
"[RefEmbed] 完成 embedded=$embedded skipped=$skipped empty=$empty failed=$failed cacheSize=${RefEmbeddingMemoryCache.size()} bgeOk=$bgeOk"
|
||||
)
|
||||
}
|
||||
|
||||
private fun ingestQuestionsFromRaw(raw: String, path: String, questionDao: QuestionDao) {
|
||||
val subject = extractSubjectFromRaw(raw)
|
||||
val grade = extractGradeFromPath(path)
|
||||
val questionLines = extractQuestionLines(raw)
|
||||
for (line in questionLines) {
|
||||
val content = line.trim()
|
||||
if (content.isEmpty()) continue
|
||||
val exists = questionDao.findByContentSubjectGrade(content, subject, grade)
|
||||
if (exists == null) {
|
||||
questionDao.insert(
|
||||
Question(
|
||||
id = 0,
|
||||
content = content,
|
||||
answer = null,
|
||||
subject = subject,
|
||||
grade = grade,
|
||||
difficulty = 1,
|
||||
createdAt = System.currentTimeMillis()
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun extractSubjectFromRaw(raw: String): String? {
|
||||
val line = raw.lineSequence()
|
||||
.map { it.trimEnd() }
|
||||
|
||||
@@ -3,6 +3,7 @@ package com.digitalperson.embedding
|
||||
import android.content.Context
|
||||
import android.util.Log
|
||||
import com.digitalperson.config.AppConfig
|
||||
import com.digitalperson.env.RuntimeEnv
|
||||
import kotlin.math.sqrt
|
||||
|
||||
data class RefImageMatch(
|
||||
@@ -16,7 +17,8 @@ object RefImageMatcher {
|
||||
private const val TAG = AppConfig.TAG
|
||||
|
||||
/**
|
||||
* @param threshold 余弦相似度阈值(向量已归一化时等价于 dot product)。
|
||||
* @param threshold 真机 BGE:余弦相似度阈值(向量已归一化时等价于 dot product)。
|
||||
* 模拟器:忽略该参数,使用 [AppConfig.RefMatchEmulator.MIN_NORMALIZED_EDIT_SCORE](编辑距离归一化分)。
|
||||
*/
|
||||
fun findBestMatch(
|
||||
context: Context,
|
||||
@@ -26,6 +28,10 @@ object RefImageMatcher {
|
||||
val query = text.trim()
|
||||
if (query.isEmpty()) return null
|
||||
|
||||
if (RuntimeEnv.isEmulator()) {
|
||||
return findBestMatchEditDistance(context, query)
|
||||
}
|
||||
|
||||
if (!BgeEmbedding.isReady()) {
|
||||
val ok = BgeEmbedding.initialize(context.applicationContext)
|
||||
if (!ok) {
|
||||
@@ -78,6 +84,203 @@ object RefImageMatcher {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* 模拟器:不加载 BGE,用**路径关键词命中率**(主)+ 编辑距离(辅)混合评分。
|
||||
*
|
||||
* 路径关键词:取最深目录名按 "-" 分割,如 "一年级上-生活适应-社会生活-元旦"
|
||||
* → ["一年级上", "生活适应", "社会生活", "元旦"]。
|
||||
* 命中率 = 命中数 / 关键词总数(1 个词命中 ≈ 0.25,足以通过 0.20 阈值)。
|
||||
*
|
||||
* 纯编辑距离原先用 0.82 阈值,但 LLM 回复文本与参考短句字面差异很大,
|
||||
* 即使话题相同也难达标;改为关键词方案后准确率大幅提升。
|
||||
*/
|
||||
/** 模拟器混合匹配逻辑;对 [androidTest] 暴露以便回归「本应命中却未命中」的用例。 */
|
||||
internal fun findBestMatchEditDistance(context: Context, query: String): RefImageMatch? {
|
||||
val app = context.applicationContext
|
||||
val root = AppConfig.RefCorpus.ASSETS_ROOT
|
||||
val paths = RefCorpusAssetScanner.listTxtFilesUnder(app, root)
|
||||
val minScore = AppConfig.RefMatchEmulator.MIN_NORMALIZED_EDIT_SCORE
|
||||
val qNorm = normalizeTextForEmuMatch(query.trim())
|
||||
if (qNorm.isEmpty()) return null
|
||||
|
||||
var bestPath: String? = null
|
||||
var bestScore = -1f
|
||||
var bestSubstr = -1f
|
||||
var bestEdit = -1f
|
||||
|
||||
for (path in paths) {
|
||||
// 主:路径关键词命中率(无 IO,O(1))
|
||||
val kwScore = pathKeywordMatchScore(path, qNorm)
|
||||
|
||||
// 辅:内容匹配(有 IO,仅在关键词有命中或尚无候选时读取)
|
||||
// 策略:① 子串包含(query 句子 ⊆ candidate 或 candidate 句子 ⊆ query 句子)
|
||||
// ② 逐句编辑距离
|
||||
// candidate 是 txt 去掉 # 行后的全文(可能同时含问题和答案),
|
||||
// query 中某句若直接出现在 candidate 里,说明话题完全命中。
|
||||
// 必须对每条 txt 做内容打分:若仅在 bestScore<0 或 kwScore>0 时才读盘,
|
||||
// 会先被其它文件的弱编辑分「占坑」,导致题干与某文件完全一致却从未被打开(如「上厕所」目录 kw 未命中但正文含原句)。
|
||||
var substrScore = 0f
|
||||
var editScore = 0f
|
||||
try {
|
||||
val raw = app.assets.open(path).bufferedReader(Charsets.UTF_8).use { it.readText() }
|
||||
val candidate = normalizeTextForEmuMatch(RefTxtEmbedText.fromRawFileContent(raw))
|
||||
if (candidate.isNotEmpty()) {
|
||||
val querySentences = splitSentences(qNorm)
|
||||
val candidateSentences = splitSentences(candidate)
|
||||
// ① 子串:query 句 ⊆ candidate,或 candidate 句 ⊆ query 句。
|
||||
// 极短片段(如「小朋友」)在多篇课文里都有,一律给高分会错配;按匹配长度分级。
|
||||
substrScore = querySentences.maxOfOrNull { qs ->
|
||||
var s = 0f
|
||||
if (qs.length >= 4 && candidate.contains(qs)) {
|
||||
s = maxOf(s, emulatorSubstringScoreForLength(qs.length))
|
||||
}
|
||||
for (cs in candidateSentences) {
|
||||
if (cs.length >= 6 && qs.contains(cs)) {
|
||||
s = maxOf(s, emulatorSubstringScoreForLength(cs.length) * 0.92f)
|
||||
}
|
||||
}
|
||||
s
|
||||
} ?: 0f
|
||||
// ② 编辑距离(逐句 vs 逐句,取最高分)
|
||||
editScore = querySentences.maxOfOrNull { qs ->
|
||||
candidateSentences.maxOfOrNull { cs ->
|
||||
EditDistanceSimilarity.normalizedScore(qs, cs)
|
||||
} ?: 0f
|
||||
} ?: 0f
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
Log.w(TAG, "[RefMatchEmu] read fail $path: ${e.message}")
|
||||
}
|
||||
|
||||
val score = maxOf(kwScore, substrScore, editScore)
|
||||
if (score > 0f) {
|
||||
Log.v(TAG, "[RefMatchEmu] candidate score=$score (kw=$kwScore substr=$substrScore edit=$editScore) path=$path")
|
||||
}
|
||||
if (isBetterEmulatorCandidate(score, substrScore, editScore, bestScore, bestSubstr, bestEdit)) {
|
||||
bestScore = score
|
||||
bestSubstr = substrScore
|
||||
bestEdit = editScore
|
||||
bestPath = path
|
||||
}
|
||||
}
|
||||
|
||||
val txtPath = bestPath ?: run {
|
||||
Log.d(TAG, "[RefMatchEmu] 无候选文件 query=${qNorm.take(60)}")
|
||||
return null
|
||||
}
|
||||
if (bestScore < minScore) {
|
||||
Log.d(TAG, "[RefMatchEmu] 分数不足 bestScore=$bestScore minScore=$minScore bestPath=$txtPath query=${qNorm.take(60)}")
|
||||
return null
|
||||
}
|
||||
|
||||
val pngPath = if (txtPath.endsWith(".txt", ignoreCase = true)) {
|
||||
txtPath.dropLast(4) + ".png"
|
||||
} else {
|
||||
"$txtPath.png"
|
||||
}
|
||||
val exists = try {
|
||||
context.assets.open(pngPath).close()
|
||||
true
|
||||
} catch (_: Throwable) {
|
||||
false
|
||||
}
|
||||
if (!exists) return null
|
||||
|
||||
Log.d(TAG, "[RefMatchEmu] best=$txtPath score=$bestScore query=${qNorm.take(30)}")
|
||||
return RefImageMatch(
|
||||
txtAssetPath = txtPath,
|
||||
pngAssetPath = pngPath,
|
||||
score = bestScore
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* 从文件路径提取话题关键词,计算与查询文本的关键词命中率。
|
||||
* 如路径含目录 "一年级上-生活适应-社会生活-元旦" → 关键词 ["一年级上","生活适应","社会生活","元旦"]。
|
||||
*/
|
||||
private fun pathKeywordMatchScore(path: String, query: String): Float {
|
||||
val keywords = extractPathTopicKeywords(path)
|
||||
if (keywords.isEmpty()) return 0f
|
||||
val matches = keywords.count { kw -> queryMatchesPathKeyword(query, kw) }
|
||||
return matches.toFloat() / keywords.size
|
||||
}
|
||||
|
||||
/** 统一全角/半角标点后再匹配,避免代码或 ASR 里半角 `:` 与语料全角 `:` 导致长句子串匹配失败。 */
|
||||
private fun normalizeTextForEmuMatch(s: String): String = buildString(s.length) {
|
||||
for (ch in s) {
|
||||
append(
|
||||
when (ch) {
|
||||
'\uFF1A', '\uFE55', ':' -> ':'
|
||||
'\uFF0C' -> ','
|
||||
'\uFF01' -> '!'
|
||||
'\uFF1F' -> '?'
|
||||
'\uFF1B' -> ';'
|
||||
else -> ch
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** 总分相同时优先子串分、再比编辑分,避免「元旦到了,小朋友」等前缀在多篇课文同分却先命中排序靠前者。 */
|
||||
private fun isBetterEmulatorCandidate(
|
||||
score: Float,
|
||||
substr: Float,
|
||||
edit: Float,
|
||||
bestScore: Float,
|
||||
bestSubstr: Float,
|
||||
bestEdit: Float,
|
||||
): Boolean {
|
||||
if (bestScore < 0f) return true
|
||||
when {
|
||||
score > bestScore + 1e-5f -> return true
|
||||
score + 1e-5f < bestScore -> return false
|
||||
substr > bestSubstr + 1e-5f -> return true
|
||||
substr + 1e-5f < bestSubstr -> return false
|
||||
else -> return edit > bestEdit + 1e-5f
|
||||
}
|
||||
}
|
||||
|
||||
/** 路径片段与 query 的包含关系;题干常省略词头(如目录「上厕所」、句子里只有「厕所」)。 */
|
||||
private fun queryMatchesPathKeyword(query: String, kw: String): Boolean {
|
||||
if (query.contains(kw)) return true
|
||||
// 去掉首字再匹配,避免「个人生活」用 takeLast(2) 误匹配到泛泛的「生活」
|
||||
if (kw.length >= 3) {
|
||||
val rest = kw.substring(1)
|
||||
if (rest.length >= 2 && query.contains(rest)) return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/** 子串命中得分:越长说明越具区分度;过短(如「小朋友」)分数低,减少跨课文误配。 */
|
||||
private fun emulatorSubstringScoreForLength(len: Int): Float = when {
|
||||
len >= 18 -> 0.95f
|
||||
len >= 12 -> 0.90f
|
||||
len >= 8 -> 0.82f
|
||||
len >= 6 -> 0.68f
|
||||
len >= 4 -> 0.48f
|
||||
else -> 0f
|
||||
}
|
||||
|
||||
/**
|
||||
* 按中文/英文句子分隔符拆分,返回非空句子列表。
|
||||
* 用于模拟器编辑距离辅助评分:逐句比对,避免 LLM 前导寒暄句拉低得分。
|
||||
*/
|
||||
private fun splitSentences(text: String): List<String> {
|
||||
val parts = text.split(Regex("[,。!?;,!?;\n]+"))
|
||||
.map { it.trim() }
|
||||
.filter { it.length >= 2 }
|
||||
return parts.ifEmpty { listOf(text) }
|
||||
}
|
||||
|
||||
/** 取最深目录名,按 "-" 分割并过滤掉纯数字和单字符片段。 */
|
||||
private fun extractPathTopicKeywords(path: String): List<String> {
|
||||
val deepestDir = path.split("/").dropLast(1).lastOrNull() ?: return emptyList()
|
||||
return deepestDir.split("-")
|
||||
.map { it.replace(Regex("\\d+"), "").trim() }
|
||||
.filter { it.length >= 2 }
|
||||
.distinct()
|
||||
}
|
||||
|
||||
private fun dot(a: FloatArray, b: FloatArray): Float {
|
||||
var s = 0f
|
||||
for (i in a.indices) s += a[i] * b[i]
|
||||
|
||||
34
app/src/main/java/com/digitalperson/env/RuntimeEnv.kt
vendored
Normal file
34
app/src/main/java/com/digitalperson/env/RuntimeEnv.kt
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
package com.digitalperson.env
|
||||
|
||||
import android.os.Build
|
||||
|
||||
object RuntimeEnv {
|
||||
fun isEmulator(): Boolean {
|
||||
val fingerprint = Build.FINGERPRINT.orEmpty()
|
||||
val model = Build.MODEL.orEmpty()
|
||||
val brand = Build.BRAND.orEmpty()
|
||||
val device = Build.DEVICE.orEmpty()
|
||||
val product = Build.PRODUCT.orEmpty()
|
||||
val hardware = Build.HARDWARE.orEmpty()
|
||||
val manufacturer = Build.MANUFACTURER.orEmpty()
|
||||
|
||||
var hits = 0
|
||||
fun hit(b: Boolean) { if (b) hits++ }
|
||||
|
||||
hit(fingerprint.startsWith("generic", ignoreCase = true))
|
||||
hit(fingerprint.contains("unknown", ignoreCase = true))
|
||||
hit(model.contains("google_sdk", ignoreCase = true))
|
||||
hit(model.contains("emulator", ignoreCase = true))
|
||||
hit(model.contains("android sdk built for", ignoreCase = true))
|
||||
hit(manufacturer.contains("genymotion", ignoreCase = true))
|
||||
hit(brand.startsWith("generic", ignoreCase = true) && device.startsWith("generic", ignoreCase = true))
|
||||
hit(product.contains("sdk", ignoreCase = true))
|
||||
hit(product.contains("emulator", ignoreCase = true))
|
||||
hit(hardware.contains("goldfish", ignoreCase = true))
|
||||
hit(hardware.contains("ranchu", ignoreCase = true))
|
||||
|
||||
// Require multiple signals to avoid false positives on weird ROMs.
|
||||
return hits >= 2
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import android.graphics.Bitmap
|
||||
import android.util.Log
|
||||
import com.digitalperson.config.AppConfig
|
||||
import com.digitalperson.engine.RetinaFaceEngineRKNN
|
||||
import com.digitalperson.env.RuntimeEnv
|
||||
import java.util.ArrayDeque
|
||||
import java.util.concurrent.atomic.AtomicBoolean
|
||||
import kotlinx.coroutines.CoroutineScope
|
||||
@@ -35,6 +36,12 @@ class FaceDetectionPipeline(
|
||||
private val onResult: (FaceDetectionResult) -> Unit,
|
||||
private val onPresenceChanged: (present: Boolean, isFrontal: Boolean, faceIdentityId: String?, recognizedName: String?) -> Unit,
|
||||
) {
|
||||
companion object {
|
||||
/** 模拟器固定人脸 ID,对应 UserMemory 中的 userId */
|
||||
const val EMULATOR_FACE_ID = "face_emulator"
|
||||
/** 模拟器固定显示名,直接作为 recognizedName 传给 coordinator */
|
||||
const val EMULATOR_FACE_NAME = "小黑"
|
||||
}
|
||||
private val appContext = context.applicationContext
|
||||
private val engine = RetinaFaceEngineRKNN()
|
||||
private val recognizer = FaceRecognizer(appContext)
|
||||
@@ -50,6 +57,11 @@ class FaceDetectionPipeline(
|
||||
private val fusionQualities = ArrayDeque<Float>()
|
||||
|
||||
fun initialize(): Boolean {
|
||||
if (RuntimeEnv.isEmulator()) {
|
||||
Log.i(AppConfig.TAG, "[Face] 模拟器模式:跳过 RKNN 初始化,固定返回身份「$EMULATOR_FACE_NAME」")
|
||||
initialized.set(true)
|
||||
return true
|
||||
}
|
||||
val detectorOk = engine.initialize(appContext)
|
||||
val recognizerOk = recognizer.initialize()
|
||||
val ok = detectorOk && recognizerOk
|
||||
@@ -68,6 +80,31 @@ class FaceDetectionPipeline(
|
||||
return
|
||||
}
|
||||
|
||||
// 模拟器:跳过 RKNN 检测,固定上报一张居中正脸
|
||||
if (RuntimeEnv.isEmulator()) {
|
||||
scope.launch {
|
||||
try {
|
||||
val w = bitmap.width
|
||||
val h = bitmap.height
|
||||
val fakeBox = FaceBox(
|
||||
left = w * 0.25f,
|
||||
top = h * 0.15f,
|
||||
right = w * 0.75f,
|
||||
bottom = h * 0.85f,
|
||||
score = 0.99f,
|
||||
)
|
||||
withContext(Dispatchers.Main) {
|
||||
onPresenceChanged(true, true, EMULATOR_FACE_ID, EMULATOR_FACE_NAME)
|
||||
onResult(FaceDetectionResult(w, h, listOf(fakeBox)))
|
||||
}
|
||||
} finally {
|
||||
bitmap.recycle()
|
||||
frameInFlight.set(false)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
scope.launch {
|
||||
try {
|
||||
val width = bitmap.width
|
||||
|
||||
@@ -148,6 +148,8 @@ abstract class BaseDigitalPersonCoordinator(
|
||||
* (i.e. after a cloud LLM response), NOT after greeting / farewell / proactive TTS.
|
||||
*/
|
||||
fun onTtsPlaybackCompleted() {
|
||||
// Let the controller advance its own timers (greeting/proactive/dlg all count as assistant speaking).
|
||||
controller.onAssistantTtsPlaybackCompleted()
|
||||
if (pendingDialogueFinish) {
|
||||
pendingDialogueFinish = false
|
||||
controller.onDialogueResponseFinished()
|
||||
|
||||
@@ -64,6 +64,10 @@ class DigitalHumanInteractionController(
|
||||
private var memoryJob: Job? = null
|
||||
private var farewellJob: Job? = null
|
||||
|
||||
// 让超时/间隔从 TTS 播放完成后开始计时,而不是从 speak() 调用时开始
|
||||
private var pendingWaitReplyTimeoutAfterTts: Boolean = false
|
||||
private var pendingProactiveFollowupAfterTts: Boolean = false
|
||||
|
||||
fun start() {
|
||||
transitionTo(InteractionState.IDLE)
|
||||
scheduleMemoryMode()
|
||||
@@ -204,7 +208,7 @@ fun onFacePresenceChanged(present: Boolean, isFrontal: Boolean = true) { // 添
|
||||
return
|
||||
}
|
||||
transitionTo(InteractionState.WAITING_REPLY)
|
||||
scheduleWaitingReplyTimeout()
|
||||
scheduleWaitingReplyTimeoutAfterTts()
|
||||
}
|
||||
|
||||
private fun enterGreeting() {
|
||||
@@ -224,7 +228,7 @@ fun onFacePresenceChanged(present: Boolean, isFrontal: Boolean = true) { // 添
|
||||
handler.addToChatHistory("assistant", greeting)
|
||||
handler.addAssistantMessageToCloudHistory(greeting)
|
||||
transitionTo(InteractionState.WAITING_REPLY)
|
||||
scheduleWaitingReplyTimeout()
|
||||
scheduleWaitingReplyTimeoutAfterTts()
|
||||
} else {
|
||||
useDefaultGreeting()
|
||||
}
|
||||
@@ -243,7 +247,11 @@ fun onFacePresenceChanged(present: Boolean, isFrontal: Boolean = true) { // 添
|
||||
handler.addAssistantMessageToCloudHistory(greeting)
|
||||
|
||||
transitionTo(InteractionState.WAITING_REPLY)
|
||||
scheduleWaitingReplyTimeout()
|
||||
scheduleWaitingReplyTimeoutAfterTts()
|
||||
}
|
||||
|
||||
private fun scheduleWaitingReplyTimeoutAfterTts() {
|
||||
pendingWaitReplyTimeoutAfterTts = true
|
||||
}
|
||||
|
||||
private fun scheduleWaitingReplyTimeout() {
|
||||
@@ -282,21 +290,34 @@ fun onFacePresenceChanged(present: Boolean, isFrontal: Boolean = true) { // 添
|
||||
// 触发题目生成检查
|
||||
handler.onQuestionAsked(currentFaceId ?: "guest")
|
||||
|
||||
proactiveJob = scope.launch {
|
||||
hasPendingUserReply = false
|
||||
delay(20_000)
|
||||
if (state != InteractionState.PROACTIVE || hasPendingUserReply) return@launch
|
||||
if (!facePresent) {
|
||||
enterFarewell()
|
||||
return@launch
|
||||
}
|
||||
proactiveRound += 1
|
||||
if (proactiveRound < 3) {
|
||||
askProactiveTopic()
|
||||
} else {
|
||||
transitionTo(InteractionState.WAITING_REPLY)
|
||||
// handler.playMotion("haru_g_m17.motion3.json")
|
||||
scheduleWaitingReplyTimeout()
|
||||
// 不立刻开始 20s 计时;等 TTS 播放完再开始计时,避免“刚说完几秒就又问”
|
||||
pendingProactiveFollowupAfterTts = true
|
||||
}
|
||||
|
||||
/** 由 Activity 在「本轮 TTS 完整播放完成」时调用(包括问候/主动提问/对话回复)。 */
|
||||
fun onAssistantTtsPlaybackCompleted() {
|
||||
if (pendingWaitReplyTimeoutAfterTts && state == InteractionState.WAITING_REPLY) {
|
||||
pendingWaitReplyTimeoutAfterTts = false
|
||||
scheduleWaitingReplyTimeout()
|
||||
}
|
||||
if (pendingProactiveFollowupAfterTts && state == InteractionState.PROACTIVE) {
|
||||
pendingProactiveFollowupAfterTts = false
|
||||
proactiveJob?.cancel()
|
||||
proactiveJob = scope.launch {
|
||||
hasPendingUserReply = false
|
||||
delay(20_000)
|
||||
if (state != InteractionState.PROACTIVE || hasPendingUserReply) return@launch
|
||||
if (!facePresent) {
|
||||
enterFarewell()
|
||||
return@launch
|
||||
}
|
||||
proactiveRound += 1
|
||||
if (proactiveRound < 3) {
|
||||
askProactiveTopic()
|
||||
} else {
|
||||
transitionTo(InteractionState.WAITING_REPLY)
|
||||
scheduleWaitingReplyTimeoutAfterTts()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,6 +27,11 @@ class TtsController(private val context: Context) {
|
||||
|
||||
private var callback: TtsCallback? = null
|
||||
|
||||
// 防止 WebSocket 重连或多路回调导致同一段文案短时间内重复入队、重复播报
|
||||
@Volatile private var lastEnqueuedText: String? = null
|
||||
@Volatile private var lastEnqueuedAtMs: Long = 0L
|
||||
private val dedupeWindowMs = 2500L
|
||||
|
||||
fun setCallback(callback: TtsCallback) {
|
||||
this.callback = callback
|
||||
bindCallbacksIfReady()
|
||||
@@ -147,6 +152,14 @@ class TtsController(private val context: Context) {
|
||||
fun enqueueSegment(seg: String) {
|
||||
val cleaned = seg.replace(Regex("\\[.*?\\]"), "").trim()
|
||||
if (cleaned.isEmpty()) return
|
||||
val now = System.currentTimeMillis()
|
||||
val lastText = lastEnqueuedText
|
||||
if (lastText != null && lastText == cleaned && (now - lastEnqueuedAtMs) <= dedupeWindowMs) {
|
||||
Log.w(TAG, "Skip duplicate TTS segment within ${dedupeWindowMs}ms: ${cleaned.take(60)}")
|
||||
return
|
||||
}
|
||||
lastEnqueuedText = cleaned
|
||||
lastEnqueuedAtMs = now
|
||||
if (useQCloudTts) {
|
||||
qcloudTts?.enqueueSegment(cleaned)
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user