add photo

This commit is contained in:
gcw_4spBpAfv
2026-04-23 15:21:24 +08:00
parent 1550783eef
commit 4e33063a98
44 changed files with 3567 additions and 64 deletions

View File

@@ -4,6 +4,8 @@ plugins {
id 'kotlin-kapt' id 'kotlin-kapt'
} }
def oneSentenceAsrAar = file('libs/asr-one-sentence-release.aar')
kapt { kapt {
// Room uses javac stubs under kapt; keep parameter names for :bind variables. // Room uses javac stubs under kapt; keep parameter names for :bind variables.
javacOptions { javacOptions {
@@ -62,6 +64,9 @@ android {
buildConfigField "String", "LLM_MODEL", "\"${(project.findProperty('LLM_MODEL') ?: 'doubao-1-5-pro-32k-character-250228').toString()}\"" buildConfigField "String", "LLM_MODEL", "\"${(project.findProperty('LLM_MODEL') ?: 'doubao-1-5-pro-32k-character-250228').toString()}\""
buildConfigField "boolean", "USE_LIVE2D", "${(project.findProperty('USE_LIVE2D') ?: 'true').toString()}" buildConfigField "boolean", "USE_LIVE2D", "${(project.findProperty('USE_LIVE2D') ?: 'true').toString()}"
// 腾讯云「一句话识别」Android SDK将 asr-one-sentence-release.aar 放入 app/libs/ 后为 true
buildConfigField "boolean", "HAS_TENCENT_ASR_SDK", "${oneSentenceAsrAar.exists()}"
ndk { ndk {
abiFilters "arm64-v8a" abiFilters "arm64-v8a"
} }
@@ -119,4 +124,6 @@ dependencies {
implementation 'com.google.guava:guava:31.1-android' implementation 'com.google.guava:guava:31.1-android'
implementation 'org.ejml:ejml-core:0.43.1' implementation 'org.ejml:ejml-core:0.43.1'
implementation 'org.ejml:ejml-simple:0.43.1' implementation 'org.ejml:ejml-simple:0.43.1'
// 腾讯云「一句话识别」通过 OkHttp 直接实现 TC3 签名,无需 AAR SDK
} }

View File

@@ -0,0 +1,4 @@
1. Open https://console.cloud.tencent.com/asr/download
2. Download the Android package for real-time speech recognition (实时语音识别).
3. Copy asr-realtime-release.aar into this folder (app/libs/).
4. Sync Gradle — BuildConfig.HAS_TENCENT_ASR_SDK will become true.

BIN
app/libs/_asr.zip Normal file

Binary file not shown.

View File

@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.tencent.iot.speech" >
<uses-sdk
android:minSdkVersion="16"
android:targetSdkVersion="33" />
<uses-permission android:name="android.permission.ACCESS_NETWORK_STATE" />
<uses-permission android:name="android.permission.RECORD_AUDIO" />
<uses-permission android:name="android.permission.INTERNET" />
</manifest>

1
app/libs/_asr_out/R.txt Normal file
View File

@@ -0,0 +1 @@
int string app_name 0x0

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,9 @@
# SDK
-keepclasseswithmembernames class com.tencent.aai.** { # 保持 native 方法不被混淆
native <methods>;
}
-keep public class com.tencent.aai.** {*;}
-keep interface com.tencent.aai.audio.data.PcmAudioDataSource {
void start(); throws com.tencent.aai.exception.ClientException;
}

View File

@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<string name="app_name">aai</string>
</resources>

View File

@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.tencent.cloud.qcloudasrsdk.onesentence" >
<uses-sdk
android:minSdkVersion="16"
android:targetSdkVersion="33" />
<uses-permission android:name="android.permission.INTERNET" />
</manifest>

1577
app/libs/_osr_out/R.txt Normal file

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,90 @@
-optimizationpasses 5 # 指定代码的压缩级别
-allowaccessmodification #优化时允许访问并修改有修饰符的类和类的成员
-dontusemixedcaseclassnames # 是否使用大小写混合
-dontskipnonpubliclibraryclasses # 是否混淆第三方jar
-dontpreverify # 混淆时是否做预校验
-verbose # 混淆时是否记录日志
-ignorewarnings # 忽略警告,避免打包时某些警告出现
-optimizations !code/simplification/arithmetic,!code/simplification/cast,!field/*,!class/merging/* # 混淆时所采用的算法
-keepattributes *Annotation*
-keepclasseswithmembernames class * { # 保持 native 方法不被混淆
native <methods>;
}
-keepclassmembers public class * extends android.view.View {
void set*(***);
*** get*();
}
-keepclassmembers class * extends android.app.Activity {
public void *(android.view.View);
}
-keepclassmembers enum * { # 保持枚举 enum 类不被混淆
public static **[] values();
public static ** valueOf(java.lang.String);
}
-keep class * implements android.os.Parcelable { # 保持 Parcelable 不被混淆
public static final android.os.Parcelable$Creator *;
}
-keepclassmembers class **.R$* { #不混淆R文件
public static <fields>;
}
-dontwarn android.support.**
##--- End android默认 ---
##--- For:不能被混淆的 ---
-keep public class * extends android.app.Activity
-keep public class * extends android.app.Fragment
-keep public class * extends android.app.Application
-keep public class * extends android.app.Service
-keep public class * extends android.content.BroadcastReceiver
-keep public class * extends android.content.ContentProvider
-keep public class * extends android.app.backup.BackupAgentHelper
-keep public class * extends android.preference.Preference
##--- For:保持自定义控件类不被混淆 ---
-keepclasseswithmembers class * {
public <init>(android.content.Context, android.util.AttributeSet);
}
-keepclasseswithmembers class * {
public <init>(android.content.Context, android.util.AttributeSet, int);
}
##--- For:android-support-v4 ---
-dontwarn android.support.v4.**
-keep class android.support.v4.** { *; }
-keep interface android.support.v4.app.** { *; }
-keep class * extends android.support.v4.** { *; }
-keep public class * extends android.support.v4.**
-keep class * extends android.support.v4.app.** {*;}
-keep class * extends android.support.v4.view.** {*;}
##--- For:Serializable ---
-keep class * implements java.io.Serializable {*;}
-keepnames class * implements java.io.Serializable
-keepclassmembers class * implements java.io.Serializable {*;}
##--- For:Gson ---
-keepattributes *Annotation*
-keep class com.google.gson.stream.** { *; }
##--- For:Remove log ---
-assumenosideeffects class android.util.Log {
public static boolean isLoggable(java.lang.String, int);
public static int v(...);
public static int i(...);
public static int w(...);
public static int d(...);
public static int e(...);
}
##--- For:attributes(未启用) ---
#-keepattributes SourceFile,LineNumberTable # 保持反编译工具能看到代码的行数以及release包安装后出现异常信息可以知道在哪行代码出现异常建议不启用
-keepattributes *Annotation* #使用注解
-keepattributes Signature #过滤泛型 出现类型转换错误时,启用这个
#-keepattributes *Exceptions*,EnclosingMethod #没试过,未知效果

BIN
app/libs/_tts.zip Normal file

Binary file not shown.

View File

@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.tencent.cloud.realtime.tts" >
<uses-sdk
android:minSdkVersion="16"
android:targetSdkVersion="33" />
<uses-permission android:name="android.permission.INTERNET" />
</manifest>

0
app/libs/_tts_out/R.txt Normal file
View File

Binary file not shown.

View File

Binary file not shown.

Binary file not shown.

BIN
app/libs/classes.jar Normal file

Binary file not shown.

View File

@@ -19,3 +19,10 @@
# If you keep the line number information, uncomment this to # If you keep the line number information, uncomment this to
# hide the original source file name. # hide the original source file name.
#-renamesourcefileattribute SourceFile #-renamesourcefileattribute SourceFile
# 腾讯云实时语音识别 SDKasr-realtime-release.aar
-keepclasseswithmembernames class * {
native <methods>;
}
-keep public class com.tencent.aai.** { *; }
-keep public class com.qq.wx.voice.** { *; }

View File

@@ -0,0 +1,132 @@
package com.digitalperson.embedding
import android.content.Context
import androidx.test.core.app.ApplicationProvider
import androidx.test.ext.junit.runners.AndroidJUnit4
import com.digitalperson.config.AppConfig
import org.junit.Assert.assertNotNull
import org.junit.Assert.assertTrue
import org.junit.Before
import org.junit.Test
import org.junit.runner.RunWith
/**
* 模拟器参考图匹配(路径关键词 + 子串 + 编辑距离)回归测试。
*
* 运行方式(需设备/模拟器,且 assets 含 ref 语料):
* ./gradlew :app:connectedDebugAndroidTest --tests com.digitalperson.embedding.RefImageMatcherEmulatorRegressionTest
*/
@RunWith(AndroidJUnit4::class)
class RefImageMatcherEmulatorRegressionTest {
private lateinit var context: Context
@Before
fun setUp() {
context = ApplicationProvider.getApplicationContext()
}
data class ManualCase(
val label: String,
val query: String,
/** 期望命中的 txt 路径中应包含该子串(如文件名一段) */
val expectedPathContains: String,
)
@Test
fun manualQueries_shouldMatchExpectedAsset() {
val cases = listOf(
ManualCase(
label = "上厕所指引LLM 寒暄前缀)",
query = "嗨小朋友,可以帮老师个忙吗?同学们能指引图中的小朋友们进入正确的厕所吗?",
expectedPathContains = "上厕所18",
),
ManualCase(
label = "刷牙看图题干在讲卫生6人1 为认读「人」非刷牙)",
query = "嗨小朋友,可以帮老师个忙吗?这个男生在做什么?",
expectedPathContains = "讲卫生6",
),
ManualCase(
label = "元旦短句(生活适应)",
query = "元旦到了,小朋友可以对爸爸妈妈说:'爸爸妈妈,新年快乐!'",
expectedPathContains = "元旦14",
),
)
val failures = mutableListOf<String>()
for (c in cases) {
val m = RefImageMatcher.findBestMatchEditDistance(context, c.query)
when {
m == null -> failures += "${c.label}: 无匹配,期望路径含「${c.expectedPathContains}」 query=${c.query.take(80)}"
!m.txtAssetPath.contains(c.expectedPathContains) ->
failures += "${c.label}: 得到 ${m.txtAssetPath} score=${m.score},期望路径含「${c.expectedPathContains}"
}
}
assertTrue(
"以下用例未命中预期资源:\n${failures.joinToString("\n")}",
failures.isEmpty(),
)
}
/**
* 语料自检:每条带 png 的 txt用「寒暄 + 正文长片段」构造 query应匹配回该 txt。
*
* 仅用首行会失败两类情况1多篇课文首句相同如男生女生 5/62首句拆出「小朋友」等
* 极短片段在多篇里子串命中同分,先遍历到的文件胜出。故这里用去空白后的正文前缀(约 400 字)提高区分度。
*/
@Test
fun corpus_bodyPrefix_withGreetingPrefix_shouldMatchSameTxt() {
val root = AppConfig.RefCorpus.ASSETS_ROOT
val paths = RefCorpusAssetScanner.listTxtFilesUnder(context, root)
val greeting = "嗨小朋友,可以帮老师个忙吗?"
/** 与真实 LLM 提问长度同量级;更长更易唯一,但单测耗时略增 */
val maxBodyChars = 400
val failures = mutableListOf<String>()
var skippedNoPng = 0
var skippedNoBody = 0
var checked = 0
for (txtPath in paths) {
val pngPath = if (txtPath.endsWith(".txt", ignoreCase = true)) {
txtPath.dropLast(4) + ".png"
} else {
"$txtPath.png"
}
val pngOk = try {
context.assets.open(pngPath).close()
true
} catch (_: Throwable) {
false
}
if (!pngOk) {
skippedNoPng++
continue
}
val raw = context.assets.open(txtPath).bufferedReader(Charsets.UTF_8).use { it.readText() }
val body = RefTxtEmbedText.fromRawFileContent(raw).trim()
if (body.length < 8) {
skippedNoBody++
continue
}
checked++
val compact = body.replace(Regex("\\s+"), " ").trim()
val core = compact.take(maxBodyChars)
val query = "$greeting$core"
val m = RefImageMatcher.findBestMatchEditDistance(context, query)
if (m == null) {
failures += "无匹配: $txtPath | core=${core.take(50)}"
continue
}
if (m.txtAssetPath != txtPath) {
failures += "错配: 期望 $txtPath | 得到 ${m.txtAssetPath} score=${m.score} | core=${core.take(50)}"
}
}
assertNotNull("语料为空或未打包进 assets", paths.takeIf { it.isNotEmpty() })
assertTrue(
"语料自检失败条数=${failures.size}(已检查=$checkedskip无png=$skippedNoPngskip正文过短=$skippedNoBody:\n" +
failures.take(50).joinToString("\n") +
if (failures.size > 50) "\n... 共 ${failures.size}" else "",
failures.isEmpty(),
)
}
}

View File

@@ -0,0 +1,204 @@
package com.digitalperson.question
import android.content.Context
import androidx.test.core.app.ApplicationProvider
import androidx.test.ext.junit.runners.AndroidJUnit4
import com.digitalperson.data.AppDatabase
import com.digitalperson.data.entity.Question
import com.digitalperson.interaction.UserMemoryStore
import kotlinx.coroutines.runBlocking
import org.json.JSONObject
import org.junit.After
import org.junit.Before
import org.junit.Test
import org.junit.runner.RunWith
import java.io.InputStream
/**
* 题目生成智能体测试
* 可以在模拟器或本地运行,不需要完整启动应用
*/
@RunWith(AndroidJUnit4::class)
class QuestionGenerationAgentTest {
private lateinit var context: Context
private lateinit var database: AppDatabase
private lateinit var userMemoryStore: UserMemoryStore
@Before
fun setUp() {
context = ApplicationProvider.getApplicationContext()
database = AppDatabase.getInstance(context)
userMemoryStore = UserMemoryStore(context)
}
@After
fun tearDown() {
// 清理测试数据
// database.clearAllTables()
}
@Test
fun testLoadPromptPoolFromJson() {
// 测试JSON提示词池加载
val inputStream: InputStream = context.assets.open("question_prompts.json")
val jsonString = inputStream.bufferedReader().use { it.readText() }
val jsonArray = org.json.JSONArray(jsonString)
println("✅ Loaded ${jsonArray.length()} prompts from JSON")
// 验证每个提示词的格式
for (i in 0 until jsonArray.length()) {
val json = jsonArray.getJSONObject(i)
assert(json.has("subject")) { "Missing subject in prompt $i" }
assert(json.has("grade")) { "Missing grade in prompt $i" }
assert(json.has("topic")) { "Missing topic in prompt $i" }
assert(json.has("difficulty")) { "Missing difficulty in prompt $i" }
assert(json.has("promptTemplate")) { "Missing promptTemplate in prompt $i" }
println(" - Prompt $i: ${json.getString("subject")} / ${json.getString("topic")}")
}
assert(jsonArray.length() > 0) { "Should have at least 1 prompt" }
}
@Test
fun testQuestionDatabaseOperations() = runBlocking {
// 测试数据库操作
val questionDao = database.questionDao()
// 插入测试题目
val testQuestion = Question(
id = 0,
content = "测试题目:苹果和香蕉哪个大?",
answer = "香蕉",
subject = "生活数学",
grade = 1,
difficulty = 1,
createdAt = System.currentTimeMillis()
)
val questionId = questionDao.insert(testQuestion)
println("✅ Inserted question with ID: $questionId")
// 查询题目
val retrievedQuestion = questionDao.getQuestionById(questionId)
assert(retrievedQuestion != null) { "Should retrieve inserted question" }
assert(retrievedQuestion?.content == testQuestion.content) { "Content should match" }
println("✅ Retrieved question: ${retrievedQuestion?.content}")
// 测试未答题计数
val userId = "test_user_001"
val count = questionDao.countUnansweredQuestions(userId)
println("✅ Unanswered questions count: $count")
// 测试获取随机未答题
val randomQuestion = questionDao.getRandomUnansweredQuestion(userId)
println("✅ Random unanswered question: ${randomQuestion?.content?.take(20)}...")
}
@Test
fun testJsonResponseParsing() {
// 测试LLM JSON响应解析
val testResponses = listOf(
"""{"content": "苹果和香蕉哪个大?", "answer": "香蕉", "explanation": "香蕉通常比苹果大"}""",
"""
{
"content": "2个苹果和5个苹果比谁多",
"answer": "5个苹果多",
"explanation": "5大于2"
}
""",
"""Some text before {"content": "测试题目", "answer": "答案"} some text after"""
)
testResponses.forEachIndexed { index, response ->
val json = extractJsonFromResponse(response)
if (json != null) {
println("✅ Test $index: Parsed successfully")
println(" Content: ${json.getString("content")}")
println(" Answer: ${json.getString("answer")}")
} else {
println("❌ Test $index: Failed to parse JSON")
}
}
}
@Test
fun testUserMemoryOperations() = runBlocking {
// 测试用户记忆操作
val userId = "test_user_001"
// 创建或更新用户
userMemoryStore.upsertUserSeen(userId, "测试小朋友")
println("✅ Created/updated user: $userId")
// 获取用户信息
val memory = userMemoryStore.getMemory(userId)
println("✅ User memory: displayName=${memory?.displayName}")
// 测试未答题计数
val unansweredCount = userMemoryStore.countUnansweredQuestions(userId)
println("✅ Unanswered questions for $userId: $unansweredCount")
}
@Test
fun testPromptTemplateBuilding() = runBlocking {
// 测试提示词模板构建
val userProfile = userMemoryStore.getMemory("test_user_001")
val promptTemplate = """
你是一个专门为特殊教育儿童设计题目的教育专家。请根据以下要求生成一个题目:
用户信息:
${userProfile?.displayName?.let { "姓名:$it" } ?: ""}
${userProfile?.age?.let { "年龄:$it" } ?: ""}
学科:生活数学
年级1
主题:比大小
难度1
具体要求:
基于以下学习目标针对一年级小学生出1道题目
1. 初步感知物品的大小
2. 会比较2个物品的大小
通用要求:
1. 题目要贴近生活,适合智力障碍儿童理解
2. 语言简单明了,避免复杂句式
3. 题目内容积极向上
4. 提供标准答案
5. 确保题目没有重复
6. 题目要有趣味性,能吸引学生注意力
请以JSON格式返回格式如下
{
"content": "题目内容",
"answer": "标准答案",
"explanation": "题目解析(可选)"
}
只返回JSON不要其他内容。
""".trimIndent()
println("✅ Generated prompt template:")
println(promptTemplate)
println("\n✅ Prompt length: ${promptTemplate.length} characters")
}
/**
* 从响应中提取JSON
*/
private fun extractJsonFromResponse(response: String): JSONObject? {
val trimmed = response.trim()
val start = trimmed.indexOf('{')
val end = trimmed.lastIndexOf('}')
if (start >= 0 && end > start) {
val jsonStr = trimmed.substring(start, end + 1)
return JSONObject(jsonStr)
}
return null
}
}

View File

@@ -4,6 +4,7 @@
<uses-permission android:name="android.permission.RECORD_AUDIO" /> <uses-permission android:name="android.permission.RECORD_AUDIO" />
<uses-permission android:name="android.permission.CAMERA" /> <uses-permission android:name="android.permission.CAMERA" />
<uses-permission android:name="android.permission.INTERNET" /> <uses-permission android:name="android.permission.INTERNET" />
<uses-permission android:name="android.permission.ACCESS_NETWORK_STATE" />
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" /> <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
<uses-permission android:name="android.permission.VIBRATE" /> <uses-permission android:name="android.permission.VIBRATE" />
<uses-feature android:name="android.hardware.camera.any" /> <uses-feature android:name="android.hardware.camera.any" />

View File

@@ -0,0 +1,264 @@
# Question Generation Flow - Pre-generation Strategy
## Problem Solved
**Before:** First question would fail because database was empty.
**After:** Questions are pre-generated when face appears, giving 20+ seconds before first question is asked.
## Timeline Flow
```
Time: 0s 2s 20s 40s 60s
| | | | |
↓ ↓ ↓ ↓ ↓
Face Greeting First Second Third
Appears Ends Question Question Question
| | | | |
| | | | |
└─► Start │ │ │ │
Gen │ │ │ │
Questions│ │ │ │
(~10-20s)│ │ │ │
│ │ │ │
└───────────┴───────────┴───────────┘
Questions already
in database!
```
## Detailed Flow
### 1. Face Detection (T=0s)
```
User shows face to camera
FaceDetectionPipeline detects frontal face
onFaceSignal(present=true, isFrontal=true)
DigitalHumanInteractionController.onFacePresenceChanged()
Handler.onFaceAppeared(userId) ← NEW TRIGGER
```
### 2. Pre-generation (T=0-20s)
```
QuestionGenerationAgent.onQuestionAsked(userId)
Check unanswered count:
- If 0 questions → Generate 20 questions (2x minUnansweredQuestions)
- If < 10 questions → Generate enough to reach 10
- If >= 10 questions → Skip generation
For each question needed:
1. Get next prompt from pool (avoiding recently used)
2. Build generation prompt with user profile
3. LLM generates question (JSON)
4. LLM reviews question quality
5. If passed → Save to database
Questions ready in database! ✓
```
### 3. Greeting Phase (T=2-5s)
```
Enter greeting state
Digital person waves and says hello
~3 seconds
```
### 4. First Question (T=20s)
```
enterProactive() → askProactiveTopic()
Get random unanswered question from database
✓ QUESTION EXISTS! (Pre-generated in step 2)
Ask question to user
onQuestionAsked(userId) ← Check again
If count < 10 → Generate more questions
```
### 5. Subsequent Questions (T=40s, 60s, ...)
```
Each time a question is asked:
onQuestionAsked(userId) triggered
Check unanswered count
If low → Generate more
Database always stays stocked! ✓
```
## Key Benefits
### ✅ No Empty Database
- Questions generated BEFORE first question is needed
- 20+ second head start during greeting phase
### ✅ Smart Quantity
- **First time:** Generate 20 questions (double the minimum)
- **Replenishment:** Generate only what's needed to reach 10
### ✅ Continuous Supply
- Every question asked triggers a check
- Database never runs empty
### ✅ User-Aware
- Uses user profile for personalization
- Tracks unanswered questions per user
## Configuration
```kotlin
data class AgentConfig(
val minUnansweredQuestions: Int = 10, // Minimum threshold
val batchSize: Int = 5, // (Deprecated - now calculates dynamically)
val generationTimeoutMs: Long = 30000 // LLM timeout
)
```
### Calculated Values
| Scenario | Unanswered Count | Action | Questions Generated |
|----------|-----------------|--------|---------------------|
| First time | 0 | Initial load | 20 (2x min) |
| Running low | 5 | Replenish | 5 (to reach 10) |
| Running low | 8 | Replenish | 2 (to reach 10) |
| Sufficient | 10+ | Skip | 0 |
| After question | 9 | Replenish | 1 (to reach 10) |
## Generation Time Estimates
Assuming ~3 seconds per question (generate + review):
| Questions Needed | Estimated Time |
|-----------------|----------------|
| 5 questions | ~15 seconds |
| 10 questions | ~30 seconds |
| 20 questions | ~60 seconds |
**Good news:** Generation happens in background, doesn't block greeting!
## Trigger Points
### 1. Face Appears (Pre-generation)
```kotlin
// In DigitalHumanInteractionController.kt
if (state == InteractionState.IDLE || state == InteractionState.MEMORY || state == InteractionState.FAREWELL) {
handler.onFaceAppeared(currentFaceId ?: "guest") // ← Pre-generate
enterGreeting()
}
```
### 2. Question Asked (Replenishment)
```kotlin
// In DigitalHumanInteractionController.kt
handler.speak("嗨小朋友,可以帮老师个忙吗?" + topic)
handler.onQuestionAsked(currentFaceId ?: "guest") // ← Replenish
```
## Logging Output
### First Time User
```
I/QuestionGenAgent: Face appeared, triggering question pre-generation for user: user123
I/QuestionGenAgent: User user123 has 0 unanswered questions (initial=true), generating 20 more...
D/QuestionGenAgent: Generating question 1/20 for user user123
I/QuestionGenAgent: Question saved: 请说出你家里有几口人...
D/QuestionGenAgent: Generating question 2/20 for user user123
...
I/QuestionGenAgent: Finished generating questions for user user123
```
### Returning User (Low Questions)
```
I/QuestionGenAgent: User user123 has 3 unanswered questions, generating 7 more...
D/QuestionGenAgent: Generating question 1/7 for user user123
...
I/QuestionGenAgent: Finished generating questions for user user123
```
### Sufficient Questions
```
D/QuestionGenAgent: User user123 has 15 unanswered questions, no need to generate
```
## Edge Cases Handled
### ❌ No Face Detection
- No pre-generation triggered
- User must show face first
### ❌ LLM Generation Fails
- Gracefully skips failed question
- Continues with next question
- Logs error for debugging
### ❌ Review Fails
- Question discarded
- Not saved to database
- Moves to next question
### ❌ Database Empty at Question Time
- Very unlikely (20s head start)
- Falls back to default question: "你喜欢什么颜色呀?"
## Performance Optimization
### Asynchronous Generation
```kotlin
agentScope.launch {
checkAndGenerateForUser(userId) // Non-blocking
}
```
- Doesn't block UI thread
- Doesn't block greeting
- Runs completely in background
### Smart Prompt Selection
- Tracks last 10 used prompts
- Avoids repetition
- Ensures diversity
### Delay Between Questions
```kotlin
delay(1000) // 1 second between each question
```
- Prevents overwhelming LLM
- Allows proper processing time
## Testing Scenarios
### Test 1: First Time User
1. Clear database
2. Show face to camera
3. Check logs: Should see "generating 20 more"
4. Wait 20 seconds
5. First question should work ✓
### Test 2: Returning User
1. User already has 5 unanswered questions
2. Show face to camera
3. Check logs: Should see "generating 5 more"
4. Questions should replenish ✓
### Test 3: Continuous Usage
1. User answers questions continuously
2. Each question triggers check
3. Database should stay above 10 questions ✓
## Future Enhancements
- [ ] Priority-based generation (generate harder questions as user progresses)
- [ ] Track question difficulty and adjust based on user performance
- [ ] Pre-generate during idle time (not just on face appearance)
- [ ] Cache generated questions for offline use
- [ ] Support multiple users with different question pools

View File

@@ -0,0 +1,221 @@
# Question Generation Agent - Prompt Pool Guide
## Overview
The Question Generation Agent uses a JSON-based prompt pool to generate diverse educational questions for special needs children. The system automatically loads prompts from `question_prompts.json` and intelligently selects prompts to ensure diversity.
## File Location
```
app/src/main/assets/question_prompts.json
```
## JSON Structure
Each prompt in the pool follows this structure:
```json
{
"subject": "生活适应",
"grade": 1,
"topic": "认识家庭成员",
"difficulty": 1,
"promptTemplate": "生成一个关于认识家庭成员的题目,适合自闭症儿童,要求题目贴近日常生活,语言简单易懂"
}
```
### Fields Description
| Field | Type | Description | Example |
|-------|------|-------------|---------|
| `subject` | String | Subject category | "生活适应", "生活语文" |
| `grade` | Integer | Grade level (1-6) | 1 |
| `topic` | String | Specific topic name | "认识家庭成员" |
| `difficulty` | Integer | Difficulty level (1-5) | 1 |
| `promptTemplate` | String | Detailed instruction for LLM | "生成一个关于...的题目" |
## How to Add New Prompts
### Step 1: Open the JSON file
Open `app/src/main/assets/question_prompts.json` in any text editor.
### Step 2: Add a new prompt object
Add a new object to the JSON array:
```json
{
"subject": "生活适应",
"grade": 1,
"topic": "你的新主题",
"difficulty": 2,
"promptTemplate": "生成一个关于[主题描述]的题目,要求[具体要求]"
}
```
### Step 3: Save and restart
The prompts are loaded when the app starts. Restart the app to load new prompts.
## Prompt Template Guidelines
### Good Prompt Templates
**Specific and detailed:**
```json
"promptTemplate": "生成一个关于认识家庭成员的题目,包括爸爸、妈妈、爷爷、奶奶等,要求描述家庭成员之间的关系和称呼"
```
**Include context:**
```json
"promptTemplate": "生成一个关于交通安全的题目,例如红绿灯、斑马线、过马路等,要求强调安全规则和实际应用场景"
```
**Specify requirements:**
```json
"promptTemplate": "生成一个关于认识颜色的题目,包括红、黄、蓝、绿等基本颜色,要求联系实际生活中的物品,如红色的苹果、黄色的香蕉等"
```
### Bad Prompt Templates
**Too vague:**
```json
"promptTemplate": "生成一个题目"
```
**Too broad:**
```json
"promptTemplate": "生成一个关于数学的题目"
```
**Not specific to special education:**
```json
"promptTemplate": "生成一个难一点的题目"
```
## Current Prompt Categories
### 生活适应 (Life Adaptation) - 25 prompts
- 认识家庭成员 (Family Members)
- 认识日常用品 (Daily Items)
- 认识天气 (Weather)
- 认识季节 (Seasons)
- 交通安全 (Traffic Safety)
- 认识食物 (Food)
- 个人卫生 (Personal Hygiene)
- 认识动物 (Animals)
- 情绪识别 (Emotion Recognition)
- 社交礼仪 (Social Etiquette)
- 时间概念 (Time Concepts)
- 认识颜色 (Colors)
- 认识形状 (Shapes)
- 数数练习 (Counting)
- 身体部位 (Body Parts)
- 穿衣自理 (Dressing)
- 整理物品 (Organizing)
- 认识数字 (Numbers)
- 简单加减法 (Basic Math)
- 认识钱币 (Money)
- 认识地图 (Maps)
- 紧急情况 (Emergencies)
- 公共场合行为 (Public Behavior)
- 认识职业 (Professions)
- 节日文化 (Festivals)
### 生活语文 (Life Chinese) - 6 prompts
- 认识汉字 (Chinese Characters)
- 简单句子理解 (Sentence Understanding)
- 词语配对 (Word Pairing)
- 看图说话 (Picture Description)
- 反义词 (Antonyms)
- 量词使用 (Measure Words)
- 标点符号 (Punctuation)
## Diversity Mechanism
The system ensures question diversity through:
1. **Prompt Pool Rotation**: Intelligently selects prompts that haven't been used recently
2. **Recent Usage Tracking**: Remembers the last 10 used prompts and avoids them
3. **Topic Tracking**: Records generated topics to prevent duplicate questions
4. **Automatic Replenishment**: Generates new questions when the unanswered count drops below threshold
## Configuration
You can configure the agent in `QuestionGenerationAgent.kt`:
```kotlin
data class AgentConfig(
val minUnansweredQuestions: Int = 10, // Minimum questions before triggering generation
val batchSize: Int = 5, // Number of questions to generate per batch
val generationTimeoutMs: Long = 30000 // LLM generation timeout
)
```
## Trigger Mechanism
Questions are generated **event-driven**, not time-driven:
1. User asks a question to the child
2. `onQuestionAsked(userId)` is triggered
3. System checks if unanswered questions < `minUnansweredQuestions`
4. If yes, generates `batchSize` new questions
5. Each question goes through: Generate Review Save to Database
## Testing
To manually trigger question generation:
```kotlin
questionGenerationAgent.triggerGeneration(userId)
```
## Troubleshooting
### Prompts not loading
- Check if `question_prompts.json` exists in `app/src/main/assets/`
- Verify JSON syntax is valid (no trailing commas, proper quotes)
- Check Logcat for error messages
### Questions not generating
- Ensure LLM is properly initialized
- Check if `llmManager` is not null
- Verify the user has been seen by the face detection system
### Low diversity
- Add more prompts to the JSON file
- Increase `maxRecentPrompts` in the agent
- Check if prompts have varied `topic` values
## Best Practices
1. **Be Specific**: Write detailed prompt templates that specify the exact requirements
2. **Include Examples**: Provide examples in the prompt template to guide the LLM
3. **Vary Topics**: Create many different topics within each subject
4. **Consider Difficulty**: Use appropriate difficulty levels (1-5) for special education
5. **Test Prompts**: Test each new prompt to ensure it generates quality questions
6. **Keep Updated**: Regularly review and update prompts based on generated question quality
## Example: Adding a New Subject
```json
{
"subject": "生活数学",
"grade": 1,
"topic": "比较大小",
"difficulty": 1,
"promptTemplate": "生成一个关于比较大小的题目,例如哪个更大、哪个更小,要求使用具体的物品如苹果、球等作为比较对象"
},
{
"subject": "生活数学",
"grade": 1,
"topic": "简单分类",
"difficulty": 2,
"promptTemplate": "生成一个关于分类的题目,例如把水果和蔬菜分开,把动物和植物分开,要求分类标准明确"
}
```
## Support
For questions or issues, check the Logcat output for error messages and review this guide.

View File

@@ -0,0 +1,305 @@
{
"_comment": "This is an EXAMPLE file showing how to add more prompts. Copy the prompts you want to the main question_prompts.json file.",
"_examples": [
{
"_description": "Example 1: Life Math - Comparing Sizes",
"subject": "生活数学",
"grade": 1,
"topic": "比较大小",
"difficulty": 1,
"promptTemplate": "生成一个关于比较大小的题目,例如哪个更大、哪个更小,要求使用具体的物品如苹果、球等作为比较对象"
},
{
"_description": "Example 2: Life Math - Simple Classification",
"subject": "生活数学",
"grade": 1,
"topic": "简单分类",
"difficulty": 2,
"promptTemplate": "生成一个关于分类的题目,例如把水果和蔬菜分开,把动物和植物分开,要求分类标准明确"
},
{
"_description": "Example 3: Life Adaptation - School Rules",
"subject": "生活适应",
"grade": 1,
"topic": "学校规则",
"difficulty": 2,
"promptTemplate": "生成一个关于学校规则的题目,例如上课要举手发言、不能在走廊奔跑等,要求强调遵守规则的重要性"
},
{
"_description": "Example 4: Life Chinese - Simple Reading",
"subject": "生活语文",
"grade": 1,
"topic": "简单阅读",
"difficulty": 3,
"promptTemplate": "生成一个简单阅读理解题目提供2-3句话的短文然后提出一个关于短文内容的问题"
},
{
"_description": "Example 5: Life Adaptation - Healthy Habits",
"subject": "生活适应",
"grade": 1,
"topic": "健康习惯",
"difficulty": 2,
"promptTemplate": "生成一个关于健康习惯的题目,例如早睡早起、多喝水、多运动等,要求解释为什么这些习惯好"
},
{
"_description": "Example 6: Life Adaptation - Using Phone",
"subject": "生活适应",
"grade": 1,
"topic": "使用电话",
"difficulty": 3,
"promptTemplate": "生成一个关于如何使用电话的题目,包括拨号、接听、挂断等基本操作,要求在紧急情况下如何求助"
},
{
"_description": "Example 7: Life Chinese - Sentence Completion",
"subject": "生活语文",
"grade": 1,
"topic": "句子填空",
"difficulty": 2,
"promptTemplate": "生成一个句子填空题,例如'我喜欢吃___',要求提供合适的选项让学生选择"
},
{
"_description": "Example 8: Life Adaptation - Weather Clothing",
"subject": "生活适应",
"grade": 1,
"topic": "天气与穿衣",
"difficulty": 2,
"promptTemplate": "生成一个关于根据天气选择衣服的题目,例如下雨天穿雨衣、冬天穿棉袄等,要求联系实际生活场景"
}
],
"_instructions": "To add these prompts to your system:",
"_step1": "1. Open app/src/main/assets/question_prompts.json",
"_step2": "2. Copy the objects from the _examples array (without the _description field)",
"_step3": "3. Paste them into the main JSON array",
"_step4": "4. Save the file and restart the app"
}
[
{
"subject": "生活适应",
"grade": 1,
"topic": "认识家庭成员",
"difficulty": 1,
"promptTemplate": "生成一个关于认识家庭成员的题目,适合智力障碍儿童,要求题目贴近日常生活,语言简单易懂"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "认识日常用品",
"difficulty": 1,
"promptTemplate": "生成一个关于认识日常用品的题目,例如牙刷、毛巾、杯子等,要求描述物品的用途和使用场景"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "认识天气",
"difficulty": 1,
"promptTemplate": "生成一个关于认识天气的题目,包括晴天、雨天、阴天等,要求联系实际生活场景"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "认识季节",
"difficulty": 2,
"promptTemplate": "生成一个关于认识四季的题目,要求描述不同季节的特点和对应的活动"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "交通安全",
"difficulty": 2,
"promptTemplate": "生成一个关于交通安全的题目,例如红绿灯、斑马线、过马路等,要求强调安全规则"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "认识食物",
"difficulty": 1,
"promptTemplate": "生成一个关于认识常见食物的题目,包括水果、蔬菜、主食等,要求联系实际饮食场景"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "个人卫生",
"difficulty": 1,
"promptTemplate": "生成一个关于个人卫生的题目,例如洗手、刷牙、洗澡等,要求强调卫生习惯的重要性"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "认识动物",
"difficulty": 1,
"promptTemplate": "生成一个关于认识常见动物的题目,包括猫、狗、鸟、鱼等,要求描述动物的特征和习性"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "情绪识别",
"difficulty": 2,
"promptTemplate": "生成一个关于识别情绪的题目,例如高兴、伤心、生气、害怕等,要求联系实际情境"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "社交礼仪",
"difficulty": 2,
"promptTemplate": "生成一个关于社交礼仪的题目,例如打招呼、说谢谢、对不起等,要求强调礼貌用语的使用场景"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "时间概念",
"difficulty": 2,
"promptTemplate": "生成一个关于时间概念的题目,例如早上、中午、晚上、昨天、今天、明天等,要求联系日常生活作息"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "认识颜色",
"difficulty": 1,
"promptTemplate": "生成一个关于认识颜色的题目,包括红、黄、蓝、绿等基本颜色,要求联系实际生活中的物品"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "认识形状",
"difficulty": 1,
"promptTemplate": "生成一个关于认识形状的题目,例如圆形、方形、三角形等,要求联系实际生活中的物品"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "数数练习",
"difficulty": 1,
"promptTemplate": "生成一个关于数数的题目要求10以内的数量联系实际场景如水果、玩具等"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "身体部位",
"difficulty": 1,
"promptTemplate": "生成一个关于认识身体部位的题目,例如手、脚、头、眼睛、耳朵等,要求描述部位的功能"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "穿衣自理",
"difficulty": 2,
"promptTemplate": "生成一个关于穿衣自理的题目,要求描述穿衣的步骤和注意事项"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "整理物品",
"difficulty": 2,
"promptTemplate": "生成一个关于整理物品的题目,例如整理书包、玩具、房间等,要求强调整理的重要性"
},
{
"subject": "生活语文",
"grade": 1,
"topic": "认识汉字",
"difficulty": 1,
"promptTemplate": "生成一个关于认识简单汉字的题目,要求选择日常生活中常用的高频汉字"
},
{
"subject": "生活语文",
"grade": 1,
"topic": "简单句子理解",
"difficulty": 2,
"promptTemplate": "生成一个关于简单句子理解的题目,要求句子简短,贴近生活场景"
},
{
"subject": "生活语文",
"grade": 1,
"topic": "词语配对",
"difficulty": 1,
"promptTemplate": "生成一个关于词语配对的题目,例如苹果-水果、猫-动物等,要求逻辑关系简单明了"
},
{
"subject": "生活语文",
"grade": 1,
"topic": "看图说话",
"difficulty": 2,
"promptTemplate": "生成一个看图说话的题目,用文字描述一个简单场景,让学生用一两句话描述看到的内容"
},
{
"subject": "生活语文",
"grade": 1,
"topic": "反义词",
"difficulty": 2,
"promptTemplate": "生成一个关于反义词的题目,例如大-小、高-矮、快-慢等,要求选择常用的反义词对"
},
{
"subject": "生活语文",
"grade": 1,
"topic": "量词使用",
"difficulty": 2,
"promptTemplate": "生成一个关于量词使用的题目,例如一个苹果、一本书、一只猫等,要求选择常见的量词搭配"
},
{
"subject": "生活语文",
"grade": 1,
"topic": "标点符号",
"difficulty": 2,
"promptTemplate": "生成一个关于标点符号的题目,主要涉及句号、问号、感叹号的基本使用"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "认识数字",
"difficulty": 1,
"promptTemplate": "生成一个关于认识数字的题目要求1-10的数字识别和书写"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "简单加减法",
"difficulty": 2,
"promptTemplate": "生成一个关于简单加减法的题目要求10以内的加减法联系实际场景"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "认识钱币",
"difficulty": 3,
"promptTemplate": "生成一个关于认识钱币的题目,包括元、角、分的认识,要求联系实际购物场景"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "认识地图",
"difficulty": 3,
"promptTemplate": "生成一个关于认识简单地图的题目,例如家庭、学校的平面图,要求理解基本方位"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "紧急情况",
"difficulty": 3,
"promptTemplate": "生成一个关于紧急情况处理的题目,例如着火、地震、迷路等,要求强调安全自救知识"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "公共场合行为",
"difficulty": 2,
"promptTemplate": "生成一个关于公共场合行为规范的题目,例如图书馆、医院、公交车等场景的礼仪"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "认识职业",
"difficulty": 2,
"promptTemplate": "生成一个关于认识常见职业的题目,例如医生、老师、警察、消防员等,要求描述职业的工作内容"
},
{
"subject": "生活适应",
"grade": 1,
"topic": "节日文化",
"difficulty": 2,
"promptTemplate": "生成一个关于传统节日的题目,例如春节、中秋节、端午节等,要求介绍节日的习俗和意义"
}
]

View File

@@ -6,6 +6,7 @@ import android.content.Context
import android.content.pm.PackageManager import android.content.pm.PackageManager
import android.content.res.ColorStateList import android.content.res.ColorStateList
import android.graphics.Color import android.graphics.Color
import android.graphics.BitmapFactory
import android.os.Build import android.os.Build
import android.os.Bundle import android.os.Bundle
import android.os.Handler import android.os.Handler
@@ -16,6 +17,7 @@ import android.util.Log
import android.view.MotionEvent import android.view.MotionEvent
import android.view.ViewGroup import android.view.ViewGroup
import android.widget.Button import android.widget.Button
import android.widget.ImageView
import android.widget.TextView import android.widget.TextView
import android.widget.Toast import android.widget.Toast
import androidx.core.app.ActivityCompat import androidx.core.app.ActivityCompat
@@ -25,7 +27,6 @@ import android.view.View
import androidx.lifecycle.Lifecycle import androidx.lifecycle.Lifecycle
import androidx.lifecycle.LifecycleOwner import androidx.lifecycle.LifecycleOwner
import androidx.lifecycle.LifecycleRegistry import androidx.lifecycle.LifecycleRegistry
import android.widget.ImageView
import com.unity3d.player.UnityPlayer import com.unity3d.player.UnityPlayer
import com.unity3d.player.UnityPlayerActivity import com.unity3d.player.UnityPlayerActivity
import com.digitalperson.audio.AudioProcessor import com.digitalperson.audio.AudioProcessor
@@ -33,6 +34,7 @@ import com.digitalperson.asr.AsrManager
import com.digitalperson.cloud.CloudApiManager import com.digitalperson.cloud.CloudApiManager
import com.digitalperson.cloud.CloudReflectionHelper import com.digitalperson.cloud.CloudReflectionHelper
import com.digitalperson.config.AppConfig import com.digitalperson.config.AppConfig
import com.digitalperson.embedding.RefImageMatcher
import com.digitalperson.question.QuestionGenerationAgent import com.digitalperson.question.QuestionGenerationAgent
import com.digitalperson.data.AppDatabase import com.digitalperson.data.AppDatabase
import com.digitalperson.face.FaceDetectionPipeline import com.digitalperson.face.FaceDetectionPipeline
@@ -48,8 +50,6 @@ import com.digitalperson.tts.TtsController
import com.digitalperson.util.FileHelper import com.digitalperson.util.FileHelper
import com.digitalperson.vad.VadManager import com.digitalperson.vad.VadManager
import kotlinx.coroutines.* import kotlinx.coroutines.*
import com.digitalperson.embedding.RefImageMatcher
import android.graphics.BitmapFactory
class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner { class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
@@ -260,6 +260,11 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
recordButtonGlow = chatLayout.findViewById(R.id.record_button_glow) recordButtonGlow = chatLayout.findViewById(R.id.record_button_glow)
refMatchImageView = chatLayout.findViewById(R.id.ref_match_image) refMatchImageView = chatLayout.findViewById(R.id.ref_match_image)
if (!AppConfig.SHOW_DEBUG_TEXT) {
chatHistoryText.visibility = View.GONE
chatLayout.findViewById<View>(R.id.scroll_view).visibility = View.GONE
}
// 根据配置设置按钮可见性 // 根据配置设置按钮可见性
if (AppConfig.USE_HOLD_TO_SPEAK) { if (AppConfig.USE_HOLD_TO_SPEAK) {
holdToSpeakButton.visibility = View.VISIBLE holdToSpeakButton.visibility = View.VISIBLE
@@ -366,6 +371,8 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
override fun onLlmCalled(text: String) { override fun onLlmCalled(text: String) {
Log.d("UnityDigitalPerson", "LLM called with: $text") Log.d("UnityDigitalPerson", "LLM called with: $text")
interactionCoordinator.onUserAsrText(text) interactionCoordinator.onUserAsrText(text)
// 用用户问题提前匹配:比等 LLM 回复更早显示图片(模拟器/真机通用)
maybeShowMatchedRefImage(text)
} }
}) })
setAudioProcessor(audioProcessor) setAudioProcessor(audioProcessor)
@@ -664,6 +671,7 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
} }
private fun appendChat(text: String) { private fun appendChat(text: String) {
if (!AppConfig.SHOW_DEBUG_TEXT) return
runOnUiThread { runOnUiThread {
chatHistoryText.append(text + "\n") chatHistoryText.append(text + "\n")
} }
@@ -696,6 +704,8 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
override fun onSpeak(text: String) { override fun onSpeak(text: String) {
ttsController.enqueueSegment(text) ttsController.enqueueSegment(text)
ttsController.enqueueEnd() ttsController.enqueueEnd()
// 主动发言(问候/主动提问)也尝试匹配参考图片
maybeShowMatchedRefImage(text)
} }
override fun onRequestCloudReply(prompt: String) { override fun onRequestCloudReply(prompt: String) {
@@ -759,13 +769,22 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
private fun maybeShowMatchedRefImage(text: String) { private fun maybeShowMatchedRefImage(text: String) {
val imageView = refMatchImageView ?: return val imageView = refMatchImageView ?: return
// Unity Activity already has coroutines // 每次匹配前先清掉上一张图
CoroutineScope(SupervisorJob() + Dispatchers.IO).launch { runOnUiThread {
imageView.setImageBitmap(null)
imageView.visibility = View.GONE
}
ioScope.launch {
val match = RefImageMatcher.findBestMatch(applicationContext, text) val match = RefImageMatcher.findBestMatch(applicationContext, text)
if (match == null) return@launch if (match == null) {
Log.d("RefImageMatch", "未找到匹配图片 query=\"${text.take(80)}\"")
return@launch
}
Log.d("RefImageMatch", "匹配成功 score=${match.score} path=${match.pngAssetPath} query=\"${text.take(80)}\"")
val bitmap = try { val bitmap = try {
assets.open(match.pngAssetPath).use { BitmapFactory.decodeStream(it) } assets.open(match.pngAssetPath).use { BitmapFactory.decodeStream(it) }
} catch (_: Throwable) { } catch (e: Throwable) {
Log.w("RefImageMatch", "图片加载失败 path=${match.pngAssetPath}", e)
null null
} }
if (bitmap == null) return@launch if (bitmap == null) return@launch

View File

@@ -6,6 +6,7 @@ import android.util.Log
import com.digitalperson.BuildConfig import com.digitalperson.BuildConfig
import com.digitalperson.audio.AudioProcessor import com.digitalperson.audio.AudioProcessor
import com.digitalperson.config.AppConfig import com.digitalperson.config.AppConfig
import com.digitalperson.env.RuntimeEnv
import com.digitalperson.engine.SenseVoiceEngineRKNN import com.digitalperson.engine.SenseVoiceEngineRKNN
import com.digitalperson.util.FileHelper import com.digitalperson.util.FileHelper
import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Dispatchers
@@ -23,7 +24,6 @@ class AsrManager(private val context: Context) {
private var senseVoice: SenseVoiceEngineRKNN? = null private var senseVoice: SenseVoiceEngineRKNN? = null
private val nativeLock = Any() private val nativeLock = Any()
private val asrQueue = Channel<Pair<FloatArray, FloatArray>>(capacity = Channel.UNLIMITED) private val asrQueue = Channel<Pair<FloatArray, FloatArray>>(capacity = Channel.UNLIMITED)
private var audioProcessor: AudioProcessor? = null private var audioProcessor: AudioProcessor? = null
@@ -48,6 +48,10 @@ class AsrManager(private val context: Context) {
} }
fun initSenseVoiceModel(): Boolean { fun initSenseVoiceModel(): Boolean {
if (RuntimeEnv.isEmulator()) {
Log.w(TAG, "ASR: emulator detected; skip local RKNN init and use cloud ASR")
return false
}
return try { return try {
Log.i(TAG, "ASR: init SenseVoice RKNN (scheme A)") Log.i(TAG, "ASR: init SenseVoice RKNN (scheme A)")
@@ -134,22 +138,46 @@ class AsrManager(private val context: Context) {
saveAsrAudio(originalSeg, processedSeg) saveAsrAudio(originalSeg, processedSeg)
val raw = synchronized(nativeLock) { val localText = synchronized(nativeLock) {
val e = senseVoice val e = senseVoice
if (e == null || !e.isInitialized) { if (e == null || !e.isInitialized) {
Log.e(TAG, "ASR failed: SenseVoice engine not initialized")
"" ""
} else { } else {
try { try {
e.transcribeBuffer(processedSeg) removeTokens(e.transcribeBuffer(processedSeg))
} catch (e: Throwable) { } catch (t: Throwable) {
Log.e(TAG, "ASR transcribe failed: ${e.message}") Log.e(TAG, "ASR transcribe failed: ${t.message}")
"" ""
} }
} }
}.trim()
val text = if (localText.isNotBlank()) {
localText
} else {
// 模拟器或本地 RKNN 未就绪使用腾讯云「一句话识别」SDKapp/libs/asr-one-sentence-release.aar
val shouldTryTencent =
BuildConfig.HAS_TENCENT_ASR_SDK && (RuntimeEnv.isEmulator() || !isInitialized())
if (!shouldTryTencent) {
Log.e(
TAG,
"ASR failed: local RKNN not ready and Tencent SDK unavailable " +
"(add libs/asr-one-sentence-release.aar or fix SenseVoice init)"
)
""
} else {
withContext(Dispatchers.IO) {
try {
// 云端 ASR 使用原始录音(未经 AEC/NS
// 模拟器上 AEC/NS 不可用processedSeg 可能被处理成近似静音
TencentOneSentenceAsr.transcribePcm16Mono(originalSeg)
} catch (t: Throwable) {
Log.e(TAG, "Tencent ASR failed: ${t.message}")
""
}
}.trim()
}
} }
Log.d(TAG, "ASR raw result: $raw")
val text = removeTokens(raw)
val filterResult = filterText(text) val filterResult = filterText(text)
if (filterResult != null) { if (filterResult != null) {
@@ -220,4 +248,5 @@ class AsrManager(private val context: Context) {
} }
return null return null
} }
} }

View File

@@ -0,0 +1,216 @@
package com.digitalperson.asr
import android.util.Base64
import android.util.Log
import com.digitalperson.config.AppConfig
import okhttp3.MediaType.Companion.toMediaType
import okhttp3.OkHttpClient
import okhttp3.Request
import okhttp3.RequestBody.Companion.toRequestBody
import org.json.JSONObject
import java.nio.ByteBuffer
import java.nio.ByteOrder
import java.security.MessageDigest
import java.text.SimpleDateFormat
import java.util.Date
import java.util.Locale
import java.util.TimeZone
import java.util.concurrent.TimeUnit
import javax.crypto.Mac
import javax.crypto.spec.SecretKeySpec
/**
* 腾讯云「一句话识别」REST API 直接实现TC3-HMAC-SHA256 签名)。
*
* 不依赖 SDK AAR而是用 OkHttp 自行签名并发起 HTTP 请求。
* 签名时间戳从服务器 Date 响应头获取,彻底规避模拟器时钟偏差导致的
* AuthFailure.SignatureExpire 错误。
*
* 文档https://cloud.tencent.com/document/product/1093/35646
*/
object TencentOneSentenceAsr {
private const val TAG = "TencentOneSentenceAsr"
private const val HOST = "asr.tencentcloudapi.com"
private const val ACTION = "SentenceRecognition"
private const val VERSION = "2019-06-14"
private val client = OkHttpClient.Builder()
.connectTimeout(10, TimeUnit.SECONDS)
.readTimeout(30, TimeUnit.SECONDS)
.build()
/**
* 将 FloatArray (16kHz mono, -1..1) 通过腾讯云一句话识别转为文字。
* 阻塞直到 HTTP 响应返回或超时。请在 IO 线程中调用。
*/
fun transcribePcm16Mono(pcmFloat: FloatArray): String {
val appId = AppConfig.QCloud.APP_ID.trim()
val sid = AppConfig.QCloud.SECRET_ID.trim()
val skey = AppConfig.QCloud.SECRET_KEY.trim()
if (appId.isEmpty() || sid.isEmpty() || skey.isEmpty()) {
Log.e(TAG, "APP_ID / SECRET_ID / SECRET_KEY 为空")
return ""
}
if (pcmFloat.isEmpty()) return ""
val pcmBytes = floatToPcm16Bytes(pcmFloat)
val pcmBase64 = Base64.encodeToString(pcmBytes, Base64.NO_WRAP)
// 诊断:检查音频幅度,若 RMS 接近 0 说明麦克风没采集到声音
val rms = kotlin.math.sqrt(pcmFloat.fold(0.0) { acc, v -> acc + v * v } / pcmFloat.size)
val maxAmp = pcmFloat.maxOf { kotlin.math.abs(it) }
Log.d(TAG, "一句话识别:${pcmFloat.size} 采样点,${pcmFloat.size / 16000.0}s${pcmBytes.size} bytes RMS=${"%.4f".format(rms)} maxAmp=${"%.4f".format(maxAmp)}")
if (maxAmp < 0.01f) {
Log.w(TAG, "⚠ 音频幅度极低maxAmp=${"%.5f".format(maxAmp)}),模拟器麦克风可能没有采集到声音!请检查:模拟器扩展控制 → 麦克风 → 使用宿主机麦克风")
}
// 从服务器取时间,修正模拟器时钟偏差
val timestamp = fetchServerTimestamp()
val date = utcDate(timestamp)
val payload = buildPayload(appId, pcmBase64, pcmBytes.size)
val auth = buildAuthorization(sid, skey, date, timestamp, payload)
val request = Request.Builder()
.url("https://$HOST")
.addHeader("Authorization", auth)
.addHeader("Content-Type", "application/json; charset=utf-8")
.addHeader("Host", HOST)
.addHeader("X-TC-Action", ACTION)
.addHeader("X-TC-Version", VERSION)
.addHeader("X-TC-Timestamp", timestamp.toString())
.post(payload.toRequestBody("application/json; charset=utf-8".toMediaType()))
.build()
return try {
val response = client.newCall(request).execute()
val body = response.body?.string().orEmpty()
Log.d(TAG, "API 响应: ${body.take(400)}")
parseResult(body)
} catch (e: Exception) {
Log.e(TAG, "HTTP 请求失败: ${e.message}", e)
""
}
}
// ─── 工具方法 ──────────────────────────────────────────────────────────
/**
* 向服务器发送 HEAD 请求,从 Date 响应头获取精确时间戳。
* 若请求失败则回退到设备时钟(可能有偏差)。
*/
private fun fetchServerTimestamp(): Long {
return try {
val req = Request.Builder().url("https://$HOST").head().build()
val resp = client.newCall(req).execute()
val dateHeader = resp.header("Date")
resp.close()
if (dateHeader != null) {
val sdf = SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.ENGLISH)
val serverTs = sdf.parse(dateHeader)?.time?.div(1000) ?: deviceTimestamp()
val deviceTs = deviceTimestamp()
val offset = serverTs - deviceTs
if (kotlin.math.abs(offset) > 60) {
Log.w(TAG, "设备时钟偏差 ${offset}s使用服务器时间修正设备=${deviceTs}, 服务器=${serverTs}")
}
serverTs
} else {
deviceTimestamp()
}
} catch (e: Exception) {
Log.w(TAG, "获取服务器时间失败: ${e.message},使用设备时间")
deviceTimestamp()
}
}
private fun deviceTimestamp() = System.currentTimeMillis() / 1000
private fun utcDate(timestamp: Long): String {
val sdf = SimpleDateFormat("yyyy-MM-dd", Locale.US)
sdf.timeZone = TimeZone.getTimeZone("UTC")
return sdf.format(Date(timestamp * 1000))
}
private fun buildPayload(appId: String, base64: String, dataLen: Int): String =
JSONObject().apply {
put("ProjectId", 0)
put("SubServiceType", 2)
put("EngSerViceType", "16k_zh")
put("SourceType", 1) // 1 = 数据流
put("VoiceFormat", "pcm")
put("UsrAudioKey", "digital-person-asr")
put("FilterDirty", 0)
put("FilterModal", 0)
put("FilterPunc", 0)
put("ConvertNumMode", 1)
put("Data", base64)
put("DataLen", dataLen)
}.toString()
// ─── TC3-HMAC-SHA256 签名 ──────────────────────────────────────────────
private fun buildAuthorization(
secretId: String,
secretKey: String,
date: String,
timestamp: Long,
payload: String,
): String {
val payloadHash = sha256Hex(payload)
val canonicalRequest = listOf(
"POST", "/", "",
"content-type:application/json; charset=utf-8",
"host:$HOST",
"",
"content-type;host",
payloadHash,
).joinToString("\n")
val credentialScope = "$date/asr/tc3_request"
val stringToSign = "TC3-HMAC-SHA256\n$timestamp\n$credentialScope\n${sha256Hex(canonicalRequest)}"
val signingKey = hmacSha256(
hmacSha256(hmacSha256("TC3$secretKey".toByteArray(), date), "asr"),
"tc3_request",
)
val signature = hmacSha256(signingKey, stringToSign).joinToString("") { "%02x".format(it) }
return "TC3-HMAC-SHA256 Credential=$secretId/$credentialScope, SignedHeaders=content-type;host, Signature=$signature"
}
private fun parseResult(json: String): String {
if (json.isBlank()) return ""
return try {
val response = JSONObject(json).optJSONObject("Response") ?: return ""
val error = response.optJSONObject("Error")
if (error != null) {
Log.e(TAG, "API 错误: ${error.optString("Code")} - ${error.optString("Message")}")
return ""
}
response.optString("Result").also { text ->
if (text.isNotBlank()) Log.d(TAG, "识别结果: \"$text\"")
}
} catch (e: Exception) {
Log.w(TAG, "解析响应失败: ${json.take(300)}")
""
}
}
private fun sha256Hex(data: String): String {
val md = MessageDigest.getInstance("SHA-256")
return md.digest(data.toByteArray(Charsets.UTF_8)).joinToString("") { "%02x".format(it) }
}
private fun hmacSha256(key: ByteArray, data: String): ByteArray {
val mac = Mac.getInstance("HmacSHA256")
mac.init(SecretKeySpec(key, "HmacSHA256"))
return mac.doFinal(data.toByteArray(Charsets.UTF_8))
}
private fun floatToPcm16Bytes(samples: FloatArray): ByteArray {
val buf = ByteBuffer.allocate(samples.size * 2).order(ByteOrder.LITTLE_ENDIAN)
samples.forEach { buf.putShort((it.coerceIn(-1f, 1f) * 32767f).toInt().toShort()) }
return buf.array()
}
}

View File

@@ -116,6 +116,19 @@ object AppConfig {
const val MODEL_FILE = "bge-small-zh-v1.5.rknn" const val MODEL_FILE = "bge-small-zh-v1.5.rknn"
} }
/**
* 模拟器上 [RefImageMatcher] 使用编辑距离时的最低归一化分(与 BGE 余弦阈值不可混用)。
* 分数 = 1 - Levenshtein / max(len),越接近 1 越像。
*/
object RefMatchEmulator {
/**
* 模拟器混合评分(路径关键词命中率 + 编辑距离)阈值。
* 路径关键词1 个词命中 ≈ 0.25,已足够确认话题相关性。
* 原 0.82 是纯编辑距离阈值,字面差异大时根本达不到,故降至 0.20。
*/
const val MIN_NORMALIZED_EDIT_SCORE = 0.20f
}
/** /**
* app/note/ref 通过 Gradle 额外 assets 目录打入 apk 后,在 assets 中的根路径为 `ref/`。 * app/note/ref 通过 Gradle 额外 assets 目录打入 apk 后,在 assets 中的根路径为 `ref/`。
*/ */

View File

@@ -0,0 +1,49 @@
package com.digitalperson.embedding
import kotlin.math.max
import kotlin.math.min
/**
* 基于 Levenshtein 的字符级相似度(模拟器兜底,无语义,仅用于联调/演示)。
*
* 分数1 - dist / max(len1, len2),与余弦相似度不可直接对比阈值。
*/
object EditDistanceSimilarity {
fun normalizedScore(a: String, b: String): Float {
val s1 = a.trim()
val s2 = b.trim()
if (s1.isEmpty() && s2.isEmpty()) return 1f
if (s1.isEmpty() || s2.isEmpty()) return 0f
val dist = levenshtein(s1, s2)
val denom = max(s1.length, s2.length).coerceAtLeast(1)
return 1f - dist.toFloat() / denom.toFloat()
}
/**
* 经典双行 DPO(n·m);仅适用于模拟器上中等规模语料。
*/
fun levenshtein(s1: String, s2: String): Int {
val n = s1.length
val m = s2.length
if (n == 0) return m
if (m == 0) return n
var prev = IntArray(m + 1) { it }
var curr = IntArray(m + 1)
for (i in 1..n) {
curr[0] = i
val c1 = s1[i - 1]
for (j in 1..m) {
val cost = if (c1 == s2[j - 1]) 0 else 1
curr[j] = min(
min(prev[j] + 1, curr[j - 1] + 1),
prev[j - 1] + cost
)
}
val tmp = prev
prev = curr
curr = tmp
}
return prev[m]
}
}

View File

@@ -4,6 +4,7 @@ import android.content.Context
import android.util.Log import android.util.Log
import com.digitalperson.config.AppConfig import com.digitalperson.config.AppConfig
import com.digitalperson.data.AppDatabase import com.digitalperson.data.AppDatabase
import com.digitalperson.data.dao.QuestionDao
import com.digitalperson.data.entity.Question import com.digitalperson.data.entity.Question
import com.digitalperson.data.entity.RefTextEmbedding import com.digitalperson.data.entity.RefTextEmbedding
import com.digitalperson.data.util.floatArrayToEmbeddingBytes import com.digitalperson.data.util.floatArrayToEmbeddingBytes
@@ -27,15 +28,15 @@ object RefEmbeddingIndexer {
val dao = db.refTextEmbeddingDao() val dao = db.refTextEmbeddingDao()
val questionDao = db.questionDao() val questionDao = db.questionDao()
if (!BgeEmbedding.initialize(app)) {
Log.e(TAG, "[RefEmbed] BGE 初始化失败,跳过 ref 语料索引")
return@withContext
}
val root = AppConfig.RefCorpus.ASSETS_ROOT val root = AppConfig.RefCorpus.ASSETS_ROOT
val paths = RefCorpusAssetScanner.listTxtFilesUnder(app, root) val paths = RefCorpusAssetScanner.listTxtFilesUnder(app, root)
Log.i(TAG, "[RefEmbed] 发现 ${paths.size} 个 txtroot=$root") Log.i(TAG, "[RefEmbed] 发现 ${paths.size} 个 txtroot=$root")
val bgeOk = BgeEmbedding.initialize(app)
if (!bgeOk) {
Log.w(TAG, "[RefEmbed] BGE 未就绪常见于模拟器仅扫描题库ref 配图匹配可用编辑距离")
}
var skipped = 0 var skipped = 0
var embedded = 0 var embedded = 0
var empty = 0 var empty = 0
@@ -50,28 +51,9 @@ object RefEmbeddingIndexer {
continue continue
} }
// 题库:遇到包含 ?/ 的行,写入 questions ingestQuestionsFromRaw(raw, path, questionDao)
val subject = extractSubjectFromRaw(raw)
val grade = extractGradeFromPath(path) if (!bgeOk) continue
val questionLines = extractQuestionLines(raw)
for (line in questionLines) {
val content = line.trim()
if (content.isEmpty()) continue
val exists = questionDao.findByContentSubjectGrade(content, subject, grade)
if (exists == null) {
questionDao.insert(
Question(
id = 0,
content = content,
answer = null,
subject = subject,
grade = grade,
difficulty = 1,
createdAt = System.currentTimeMillis()
)
)
}
}
val embedText = RefTxtEmbedText.fromRawFileContent(raw) val embedText = RefTxtEmbedText.fromRawFileContent(raw)
if (embedText.isEmpty()) { if (embedText.isEmpty()) {
@@ -110,10 +92,34 @@ object RefEmbeddingIndexer {
Log.i( Log.i(
TAG, TAG,
"[RefEmbed] 完成 embedded=$embedded skipped=$skipped empty=$empty failed=$failed cacheSize=${RefEmbeddingMemoryCache.size()}" "[RefEmbed] 完成 embedded=$embedded skipped=$skipped empty=$empty failed=$failed cacheSize=${RefEmbeddingMemoryCache.size()} bgeOk=$bgeOk"
) )
} }
private fun ingestQuestionsFromRaw(raw: String, path: String, questionDao: QuestionDao) {
val subject = extractSubjectFromRaw(raw)
val grade = extractGradeFromPath(path)
val questionLines = extractQuestionLines(raw)
for (line in questionLines) {
val content = line.trim()
if (content.isEmpty()) continue
val exists = questionDao.findByContentSubjectGrade(content, subject, grade)
if (exists == null) {
questionDao.insert(
Question(
id = 0,
content = content,
answer = null,
subject = subject,
grade = grade,
difficulty = 1,
createdAt = System.currentTimeMillis()
)
)
}
}
}
private fun extractSubjectFromRaw(raw: String): String? { private fun extractSubjectFromRaw(raw: String): String? {
val line = raw.lineSequence() val line = raw.lineSequence()
.map { it.trimEnd() } .map { it.trimEnd() }

View File

@@ -3,6 +3,7 @@ package com.digitalperson.embedding
import android.content.Context import android.content.Context
import android.util.Log import android.util.Log
import com.digitalperson.config.AppConfig import com.digitalperson.config.AppConfig
import com.digitalperson.env.RuntimeEnv
import kotlin.math.sqrt import kotlin.math.sqrt
data class RefImageMatch( data class RefImageMatch(
@@ -16,7 +17,8 @@ object RefImageMatcher {
private const val TAG = AppConfig.TAG private const val TAG = AppConfig.TAG
/** /**
* @param threshold 余弦相似度阈值(向量已归一化时等价于 dot product * @param threshold 真机 BGE余弦相似度阈值(向量已归一化时等价于 dot product
* 模拟器:忽略该参数,使用 [AppConfig.RefMatchEmulator.MIN_NORMALIZED_EDIT_SCORE](编辑距离归一化分)。
*/ */
fun findBestMatch( fun findBestMatch(
context: Context, context: Context,
@@ -26,6 +28,10 @@ object RefImageMatcher {
val query = text.trim() val query = text.trim()
if (query.isEmpty()) return null if (query.isEmpty()) return null
if (RuntimeEnv.isEmulator()) {
return findBestMatchEditDistance(context, query)
}
if (!BgeEmbedding.isReady()) { if (!BgeEmbedding.isReady()) {
val ok = BgeEmbedding.initialize(context.applicationContext) val ok = BgeEmbedding.initialize(context.applicationContext)
if (!ok) { if (!ok) {
@@ -78,6 +84,203 @@ object RefImageMatcher {
) )
} }
/**
* 模拟器:不加载 BGE用**路径关键词命中率**(主)+ 编辑距离(辅)混合评分。
*
* 路径关键词:取最深目录名按 "-" 分割,如 "一年级上-生活适应-社会生活-元旦"
* → ["一年级上", "生活适应", "社会生活", "元旦"]。
* 命中率 = 命中数 / 关键词总数1 个词命中 ≈ 0.25,足以通过 0.20 阈值)。
*
* 纯编辑距离原先用 0.82 阈值,但 LLM 回复文本与参考短句字面差异很大,
* 即使话题相同也难达标;改为关键词方案后准确率大幅提升。
*/
/** 模拟器混合匹配逻辑;对 [androidTest] 暴露以便回归「本应命中却未命中」的用例。 */
internal fun findBestMatchEditDistance(context: Context, query: String): RefImageMatch? {
val app = context.applicationContext
val root = AppConfig.RefCorpus.ASSETS_ROOT
val paths = RefCorpusAssetScanner.listTxtFilesUnder(app, root)
val minScore = AppConfig.RefMatchEmulator.MIN_NORMALIZED_EDIT_SCORE
val qNorm = normalizeTextForEmuMatch(query.trim())
if (qNorm.isEmpty()) return null
var bestPath: String? = null
var bestScore = -1f
var bestSubstr = -1f
var bestEdit = -1f
for (path in paths) {
// 主:路径关键词命中率(无 IOO(1)
val kwScore = pathKeywordMatchScore(path, qNorm)
// 辅:内容匹配(有 IO仅在关键词有命中或尚无候选时读取
// 策略:① 子串包含query 句子 ⊆ candidate 或 candidate 句子 ⊆ query 句子)
// ② 逐句编辑距离
// candidate 是 txt 去掉 # 行后的全文(可能同时含问题和答案),
// query 中某句若直接出现在 candidate 里,说明话题完全命中。
// 必须对每条 txt 做内容打分:若仅在 bestScore<0 或 kwScore>0 时才读盘,
// 会先被其它文件的弱编辑分「占坑」,导致题干与某文件完全一致却从未被打开(如「上厕所」目录 kw 未命中但正文含原句)。
var substrScore = 0f
var editScore = 0f
try {
val raw = app.assets.open(path).bufferedReader(Charsets.UTF_8).use { it.readText() }
val candidate = normalizeTextForEmuMatch(RefTxtEmbedText.fromRawFileContent(raw))
if (candidate.isNotEmpty()) {
val querySentences = splitSentences(qNorm)
val candidateSentences = splitSentences(candidate)
// ① 子串query 句 ⊆ candidate或 candidate 句 ⊆ query 句。
// 极短片段(如「小朋友」)在多篇课文里都有,一律给高分会错配;按匹配长度分级。
substrScore = querySentences.maxOfOrNull { qs ->
var s = 0f
if (qs.length >= 4 && candidate.contains(qs)) {
s = maxOf(s, emulatorSubstringScoreForLength(qs.length))
}
for (cs in candidateSentences) {
if (cs.length >= 6 && qs.contains(cs)) {
s = maxOf(s, emulatorSubstringScoreForLength(cs.length) * 0.92f)
}
}
s
} ?: 0f
// ② 编辑距离(逐句 vs 逐句,取最高分)
editScore = querySentences.maxOfOrNull { qs ->
candidateSentences.maxOfOrNull { cs ->
EditDistanceSimilarity.normalizedScore(qs, cs)
} ?: 0f
} ?: 0f
}
} catch (e: Exception) {
Log.w(TAG, "[RefMatchEmu] read fail $path: ${e.message}")
}
val score = maxOf(kwScore, substrScore, editScore)
if (score > 0f) {
Log.v(TAG, "[RefMatchEmu] candidate score=$score (kw=$kwScore substr=$substrScore edit=$editScore) path=$path")
}
if (isBetterEmulatorCandidate(score, substrScore, editScore, bestScore, bestSubstr, bestEdit)) {
bestScore = score
bestSubstr = substrScore
bestEdit = editScore
bestPath = path
}
}
val txtPath = bestPath ?: run {
Log.d(TAG, "[RefMatchEmu] 无候选文件 query=${qNorm.take(60)}")
return null
}
if (bestScore < minScore) {
Log.d(TAG, "[RefMatchEmu] 分数不足 bestScore=$bestScore minScore=$minScore bestPath=$txtPath query=${qNorm.take(60)}")
return null
}
val pngPath = if (txtPath.endsWith(".txt", ignoreCase = true)) {
txtPath.dropLast(4) + ".png"
} else {
"$txtPath.png"
}
val exists = try {
context.assets.open(pngPath).close()
true
} catch (_: Throwable) {
false
}
if (!exists) return null
Log.d(TAG, "[RefMatchEmu] best=$txtPath score=$bestScore query=${qNorm.take(30)}")
return RefImageMatch(
txtAssetPath = txtPath,
pngAssetPath = pngPath,
score = bestScore
)
}
/**
* 从文件路径提取话题关键词,计算与查询文本的关键词命中率。
* 如路径含目录 "一年级上-生活适应-社会生活-元旦" → 关键词 ["一年级上","生活适应","社会生活","元旦"]。
*/
private fun pathKeywordMatchScore(path: String, query: String): Float {
val keywords = extractPathTopicKeywords(path)
if (keywords.isEmpty()) return 0f
val matches = keywords.count { kw -> queryMatchesPathKeyword(query, kw) }
return matches.toFloat() / keywords.size
}
/** 统一全角/半角标点后再匹配,避免代码或 ASR 里半角 `:` 与语料全角 `` 导致长句子串匹配失败。 */
private fun normalizeTextForEmuMatch(s: String): String = buildString(s.length) {
for (ch in s) {
append(
when (ch) {
'\uFF1A', '\uFE55', ':' -> ':'
'\uFF0C' -> ','
'\uFF01' -> '!'
'\uFF1F' -> '?'
'\uFF1B' -> ';'
else -> ch
},
)
}
}
/** 总分相同时优先子串分、再比编辑分,避免「元旦到了,小朋友」等前缀在多篇课文同分却先命中排序靠前者。 */
private fun isBetterEmulatorCandidate(
score: Float,
substr: Float,
edit: Float,
bestScore: Float,
bestSubstr: Float,
bestEdit: Float,
): Boolean {
if (bestScore < 0f) return true
when {
score > bestScore + 1e-5f -> return true
score + 1e-5f < bestScore -> return false
substr > bestSubstr + 1e-5f -> return true
substr + 1e-5f < bestSubstr -> return false
else -> return edit > bestEdit + 1e-5f
}
}
/** 路径片段与 query 的包含关系;题干常省略词头(如目录「上厕所」、句子里只有「厕所」)。 */
private fun queryMatchesPathKeyword(query: String, kw: String): Boolean {
if (query.contains(kw)) return true
// 去掉首字再匹配,避免「个人生活」用 takeLast(2) 误匹配到泛泛的「生活」
if (kw.length >= 3) {
val rest = kw.substring(1)
if (rest.length >= 2 && query.contains(rest)) return true
}
return false
}
/** 子串命中得分:越长说明越具区分度;过短(如「小朋友」)分数低,减少跨课文误配。 */
private fun emulatorSubstringScoreForLength(len: Int): Float = when {
len >= 18 -> 0.95f
len >= 12 -> 0.90f
len >= 8 -> 0.82f
len >= 6 -> 0.68f
len >= 4 -> 0.48f
else -> 0f
}
/**
* 按中文/英文句子分隔符拆分,返回非空句子列表。
* 用于模拟器编辑距离辅助评分:逐句比对,避免 LLM 前导寒暄句拉低得分。
*/
private fun splitSentences(text: String): List<String> {
val parts = text.split(Regex("[,。!?;,!?;\n]+"))
.map { it.trim() }
.filter { it.length >= 2 }
return parts.ifEmpty { listOf(text) }
}
/** 取最深目录名,按 "-" 分割并过滤掉纯数字和单字符片段。 */
private fun extractPathTopicKeywords(path: String): List<String> {
val deepestDir = path.split("/").dropLast(1).lastOrNull() ?: return emptyList()
return deepestDir.split("-")
.map { it.replace(Regex("\\d+"), "").trim() }
.filter { it.length >= 2 }
.distinct()
}
private fun dot(a: FloatArray, b: FloatArray): Float { private fun dot(a: FloatArray, b: FloatArray): Float {
var s = 0f var s = 0f
for (i in a.indices) s += a[i] * b[i] for (i in a.indices) s += a[i] * b[i]

View File

@@ -0,0 +1,34 @@
package com.digitalperson.env
import android.os.Build
object RuntimeEnv {
fun isEmulator(): Boolean {
val fingerprint = Build.FINGERPRINT.orEmpty()
val model = Build.MODEL.orEmpty()
val brand = Build.BRAND.orEmpty()
val device = Build.DEVICE.orEmpty()
val product = Build.PRODUCT.orEmpty()
val hardware = Build.HARDWARE.orEmpty()
val manufacturer = Build.MANUFACTURER.orEmpty()
var hits = 0
fun hit(b: Boolean) { if (b) hits++ }
hit(fingerprint.startsWith("generic", ignoreCase = true))
hit(fingerprint.contains("unknown", ignoreCase = true))
hit(model.contains("google_sdk", ignoreCase = true))
hit(model.contains("emulator", ignoreCase = true))
hit(model.contains("android sdk built for", ignoreCase = true))
hit(manufacturer.contains("genymotion", ignoreCase = true))
hit(brand.startsWith("generic", ignoreCase = true) && device.startsWith("generic", ignoreCase = true))
hit(product.contains("sdk", ignoreCase = true))
hit(product.contains("emulator", ignoreCase = true))
hit(hardware.contains("goldfish", ignoreCase = true))
hit(hardware.contains("ranchu", ignoreCase = true))
// Require multiple signals to avoid false positives on weird ROMs.
return hits >= 2
}
}

View File

@@ -5,6 +5,7 @@ import android.graphics.Bitmap
import android.util.Log import android.util.Log
import com.digitalperson.config.AppConfig import com.digitalperson.config.AppConfig
import com.digitalperson.engine.RetinaFaceEngineRKNN import com.digitalperson.engine.RetinaFaceEngineRKNN
import com.digitalperson.env.RuntimeEnv
import java.util.ArrayDeque import java.util.ArrayDeque
import java.util.concurrent.atomic.AtomicBoolean import java.util.concurrent.atomic.AtomicBoolean
import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.CoroutineScope
@@ -35,6 +36,12 @@ class FaceDetectionPipeline(
private val onResult: (FaceDetectionResult) -> Unit, private val onResult: (FaceDetectionResult) -> Unit,
private val onPresenceChanged: (present: Boolean, isFrontal: Boolean, faceIdentityId: String?, recognizedName: String?) -> Unit, private val onPresenceChanged: (present: Boolean, isFrontal: Boolean, faceIdentityId: String?, recognizedName: String?) -> Unit,
) { ) {
companion object {
/** 模拟器固定人脸 ID对应 UserMemory 中的 userId */
const val EMULATOR_FACE_ID = "face_emulator"
/** 模拟器固定显示名,直接作为 recognizedName 传给 coordinator */
const val EMULATOR_FACE_NAME = "小黑"
}
private val appContext = context.applicationContext private val appContext = context.applicationContext
private val engine = RetinaFaceEngineRKNN() private val engine = RetinaFaceEngineRKNN()
private val recognizer = FaceRecognizer(appContext) private val recognizer = FaceRecognizer(appContext)
@@ -50,6 +57,11 @@ class FaceDetectionPipeline(
private val fusionQualities = ArrayDeque<Float>() private val fusionQualities = ArrayDeque<Float>()
fun initialize(): Boolean { fun initialize(): Boolean {
if (RuntimeEnv.isEmulator()) {
Log.i(AppConfig.TAG, "[Face] 模拟器模式:跳过 RKNN 初始化,固定返回身份「$EMULATOR_FACE_NAME」")
initialized.set(true)
return true
}
val detectorOk = engine.initialize(appContext) val detectorOk = engine.initialize(appContext)
val recognizerOk = recognizer.initialize() val recognizerOk = recognizer.initialize()
val ok = detectorOk && recognizerOk val ok = detectorOk && recognizerOk
@@ -68,6 +80,31 @@ class FaceDetectionPipeline(
return return
} }
// 模拟器:跳过 RKNN 检测,固定上报一张居中正脸
if (RuntimeEnv.isEmulator()) {
scope.launch {
try {
val w = bitmap.width
val h = bitmap.height
val fakeBox = FaceBox(
left = w * 0.25f,
top = h * 0.15f,
right = w * 0.75f,
bottom = h * 0.85f,
score = 0.99f,
)
withContext(Dispatchers.Main) {
onPresenceChanged(true, true, EMULATOR_FACE_ID, EMULATOR_FACE_NAME)
onResult(FaceDetectionResult(w, h, listOf(fakeBox)))
}
} finally {
bitmap.recycle()
frameInFlight.set(false)
}
}
return
}
scope.launch { scope.launch {
try { try {
val width = bitmap.width val width = bitmap.width

View File

@@ -148,6 +148,8 @@ abstract class BaseDigitalPersonCoordinator(
* (i.e. after a cloud LLM response), NOT after greeting / farewell / proactive TTS. * (i.e. after a cloud LLM response), NOT after greeting / farewell / proactive TTS.
*/ */
fun onTtsPlaybackCompleted() { fun onTtsPlaybackCompleted() {
// Let the controller advance its own timers (greeting/proactive/dlg all count as assistant speaking).
controller.onAssistantTtsPlaybackCompleted()
if (pendingDialogueFinish) { if (pendingDialogueFinish) {
pendingDialogueFinish = false pendingDialogueFinish = false
controller.onDialogueResponseFinished() controller.onDialogueResponseFinished()

View File

@@ -64,6 +64,10 @@ class DigitalHumanInteractionController(
private var memoryJob: Job? = null private var memoryJob: Job? = null
private var farewellJob: Job? = null private var farewellJob: Job? = null
// 让超时/间隔从 TTS 播放完成后开始计时,而不是从 speak() 调用时开始
private var pendingWaitReplyTimeoutAfterTts: Boolean = false
private var pendingProactiveFollowupAfterTts: Boolean = false
fun start() { fun start() {
transitionTo(InteractionState.IDLE) transitionTo(InteractionState.IDLE)
scheduleMemoryMode() scheduleMemoryMode()
@@ -204,7 +208,7 @@ fun onFacePresenceChanged(present: Boolean, isFrontal: Boolean = true) { // 添
return return
} }
transitionTo(InteractionState.WAITING_REPLY) transitionTo(InteractionState.WAITING_REPLY)
scheduleWaitingReplyTimeout() scheduleWaitingReplyTimeoutAfterTts()
} }
private fun enterGreeting() { private fun enterGreeting() {
@@ -224,7 +228,7 @@ fun onFacePresenceChanged(present: Boolean, isFrontal: Boolean = true) { // 添
handler.addToChatHistory("assistant", greeting) handler.addToChatHistory("assistant", greeting)
handler.addAssistantMessageToCloudHistory(greeting) handler.addAssistantMessageToCloudHistory(greeting)
transitionTo(InteractionState.WAITING_REPLY) transitionTo(InteractionState.WAITING_REPLY)
scheduleWaitingReplyTimeout() scheduleWaitingReplyTimeoutAfterTts()
} else { } else {
useDefaultGreeting() useDefaultGreeting()
} }
@@ -243,7 +247,11 @@ fun onFacePresenceChanged(present: Boolean, isFrontal: Boolean = true) { // 添
handler.addAssistantMessageToCloudHistory(greeting) handler.addAssistantMessageToCloudHistory(greeting)
transitionTo(InteractionState.WAITING_REPLY) transitionTo(InteractionState.WAITING_REPLY)
scheduleWaitingReplyTimeout() scheduleWaitingReplyTimeoutAfterTts()
}
private fun scheduleWaitingReplyTimeoutAfterTts() {
pendingWaitReplyTimeoutAfterTts = true
} }
private fun scheduleWaitingReplyTimeout() { private fun scheduleWaitingReplyTimeout() {
@@ -282,6 +290,19 @@ fun onFacePresenceChanged(present: Boolean, isFrontal: Boolean = true) { // 添
// 触发题目生成检查 // 触发题目生成检查
handler.onQuestionAsked(currentFaceId ?: "guest") handler.onQuestionAsked(currentFaceId ?: "guest")
// 不立刻开始 20s 计时;等 TTS 播放完再开始计时,避免“刚说完几秒就又问”
pendingProactiveFollowupAfterTts = true
}
/** 由 Activity 在「本轮 TTS 完整播放完成」时调用(包括问候/主动提问/对话回复)。 */
fun onAssistantTtsPlaybackCompleted() {
if (pendingWaitReplyTimeoutAfterTts && state == InteractionState.WAITING_REPLY) {
pendingWaitReplyTimeoutAfterTts = false
scheduleWaitingReplyTimeout()
}
if (pendingProactiveFollowupAfterTts && state == InteractionState.PROACTIVE) {
pendingProactiveFollowupAfterTts = false
proactiveJob?.cancel()
proactiveJob = scope.launch { proactiveJob = scope.launch {
hasPendingUserReply = false hasPendingUserReply = false
delay(20_000) delay(20_000)
@@ -295,8 +316,8 @@ fun onFacePresenceChanged(present: Boolean, isFrontal: Boolean = true) { // 添
askProactiveTopic() askProactiveTopic()
} else { } else {
transitionTo(InteractionState.WAITING_REPLY) transitionTo(InteractionState.WAITING_REPLY)
// handler.playMotion("haru_g_m17.motion3.json") scheduleWaitingReplyTimeoutAfterTts()
scheduleWaitingReplyTimeout() }
} }
} }
} }

View File

@@ -27,6 +27,11 @@ class TtsController(private val context: Context) {
private var callback: TtsCallback? = null private var callback: TtsCallback? = null
// 防止 WebSocket 重连或多路回调导致同一段文案短时间内重复入队、重复播报
@Volatile private var lastEnqueuedText: String? = null
@Volatile private var lastEnqueuedAtMs: Long = 0L
private val dedupeWindowMs = 2500L
fun setCallback(callback: TtsCallback) { fun setCallback(callback: TtsCallback) {
this.callback = callback this.callback = callback
bindCallbacksIfReady() bindCallbacksIfReady()
@@ -147,6 +152,14 @@ class TtsController(private val context: Context) {
fun enqueueSegment(seg: String) { fun enqueueSegment(seg: String) {
val cleaned = seg.replace(Regex("\\[.*?\\]"), "").trim() val cleaned = seg.replace(Regex("\\[.*?\\]"), "").trim()
if (cleaned.isEmpty()) return if (cleaned.isEmpty()) return
val now = System.currentTimeMillis()
val lastText = lastEnqueuedText
if (lastText != null && lastText == cleaned && (now - lastEnqueuedAtMs) <= dedupeWindowMs) {
Log.w(TAG, "Skip duplicate TTS segment within ${dedupeWindowMs}ms: ${cleaned.take(60)}")
return
}
lastEnqueuedText = cleaned
lastEnqueuedAtMs = now
if (useQCloudTts) { if (useQCloudTts) {
qcloudTts?.enqueueSegment(cleaned) qcloudTts?.enqueueSegment(cleaned)
} else { } else {