add photo
This commit is contained in:
@@ -4,6 +4,8 @@ plugins {
|
||||
id 'kotlin-kapt'
|
||||
}
|
||||
|
||||
def oneSentenceAsrAar = file('libs/asr-one-sentence-release.aar')
|
||||
|
||||
kapt {
|
||||
// Room uses javac stubs under kapt; keep parameter names for :bind variables.
|
||||
javacOptions {
|
||||
@@ -62,6 +64,9 @@ android {
|
||||
buildConfigField "String", "LLM_MODEL", "\"${(project.findProperty('LLM_MODEL') ?: 'doubao-1-5-pro-32k-character-250228').toString()}\""
|
||||
buildConfigField "boolean", "USE_LIVE2D", "${(project.findProperty('USE_LIVE2D') ?: 'true').toString()}"
|
||||
|
||||
// 腾讯云「一句话识别」Android SDK:将 asr-one-sentence-release.aar 放入 app/libs/ 后为 true
|
||||
buildConfigField "boolean", "HAS_TENCENT_ASR_SDK", "${oneSentenceAsrAar.exists()}"
|
||||
|
||||
ndk {
|
||||
abiFilters "arm64-v8a"
|
||||
}
|
||||
@@ -119,4 +124,6 @@ dependencies {
|
||||
implementation 'com.google.guava:guava:31.1-android'
|
||||
implementation 'org.ejml:ejml-core:0.43.1'
|
||||
implementation 'org.ejml:ejml-simple:0.43.1'
|
||||
|
||||
// 腾讯云「一句话识别」通过 OkHttp 直接实现 TC3 签名,无需 AAR SDK
|
||||
}
|
||||
|
||||
4
app/libs/ASR_SDK_PLACE_AAR_HERE.txt
Normal file
4
app/libs/ASR_SDK_PLACE_AAR_HERE.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
1. Open https://console.cloud.tencent.com/asr/download
|
||||
2. Download the Android package for real-time speech recognition (实时语音识别).
|
||||
3. Copy asr-realtime-release.aar into this folder (app/libs/).
|
||||
4. Sync Gradle — BuildConfig.HAS_TENCENT_ASR_SDK will become true.
|
||||
BIN
app/libs/_asr.zip
Normal file
BIN
app/libs/_asr.zip
Normal file
Binary file not shown.
13
app/libs/_asr_out/AndroidManifest.xml
Normal file
13
app/libs/_asr_out/AndroidManifest.xml
Normal file
@@ -0,0 +1,13 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
package="com.tencent.iot.speech" >
|
||||
|
||||
<uses-sdk
|
||||
android:minSdkVersion="16"
|
||||
android:targetSdkVersion="33" />
|
||||
|
||||
<uses-permission android:name="android.permission.ACCESS_NETWORK_STATE" />
|
||||
<uses-permission android:name="android.permission.RECORD_AUDIO" />
|
||||
<uses-permission android:name="android.permission.INTERNET" />
|
||||
|
||||
</manifest>
|
||||
1
app/libs/_asr_out/R.txt
Normal file
1
app/libs/_asr_out/R.txt
Normal file
@@ -0,0 +1 @@
|
||||
int string app_name 0x0
|
||||
BIN
app/libs/_asr_out/classes.jar
Normal file
BIN
app/libs/_asr_out/classes.jar
Normal file
Binary file not shown.
BIN
app/libs/_asr_out/jni/arm64-v8a/libqcloud_asr_realtime.so
Normal file
BIN
app/libs/_asr_out/jni/arm64-v8a/libqcloud_asr_realtime.so
Normal file
Binary file not shown.
BIN
app/libs/_asr_out/jni/armeabi-v7a/libqcloud_asr_realtime.so
Normal file
BIN
app/libs/_asr_out/jni/armeabi-v7a/libqcloud_asr_realtime.so
Normal file
Binary file not shown.
BIN
app/libs/_asr_out/jni/x86/libqcloud_asr_realtime.so
Normal file
BIN
app/libs/_asr_out/jni/x86/libqcloud_asr_realtime.so
Normal file
Binary file not shown.
BIN
app/libs/_asr_out/jni/x86_64/libqcloud_asr_realtime.so
Normal file
BIN
app/libs/_asr_out/jni/x86_64/libqcloud_asr_realtime.so
Normal file
Binary file not shown.
9
app/libs/_asr_out/proguard.txt
Normal file
9
app/libs/_asr_out/proguard.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
# SDK
|
||||
-keepclasseswithmembernames class com.tencent.aai.** { # 保持 native 方法不被混淆
|
||||
native <methods>;
|
||||
}
|
||||
|
||||
-keep public class com.tencent.aai.** {*;}
|
||||
-keep interface com.tencent.aai.audio.data.PcmAudioDataSource {
|
||||
void start(); throws com.tencent.aai.exception.ClientException;
|
||||
}
|
||||
4
app/libs/_asr_out/res/values/values.xml
Normal file
4
app/libs/_asr_out/res/values/values.xml
Normal file
@@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<resources>
|
||||
<string name="app_name">aai</string>
|
||||
</resources>
|
||||
11
app/libs/_osr_out/AndroidManifest.xml
Normal file
11
app/libs/_osr_out/AndroidManifest.xml
Normal file
@@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
package="com.tencent.cloud.qcloudasrsdk.onesentence" >
|
||||
|
||||
<uses-sdk
|
||||
android:minSdkVersion="16"
|
||||
android:targetSdkVersion="33" />
|
||||
|
||||
<uses-permission android:name="android.permission.INTERNET" />
|
||||
|
||||
</manifest>
|
||||
1577
app/libs/_osr_out/R.txt
Normal file
1577
app/libs/_osr_out/R.txt
Normal file
File diff suppressed because it is too large
Load Diff
BIN
app/libs/_osr_out/asr-one-sentence.zip
Normal file
BIN
app/libs/_osr_out/asr-one-sentence.zip
Normal file
Binary file not shown.
BIN
app/libs/_osr_out/classes.jar
Normal file
BIN
app/libs/_osr_out/classes.jar
Normal file
Binary file not shown.
90
app/libs/_osr_out/proguard.txt
Normal file
90
app/libs/_osr_out/proguard.txt
Normal file
@@ -0,0 +1,90 @@
|
||||
-optimizationpasses 5 # 指定代码的压缩级别
|
||||
-allowaccessmodification #优化时允许访问并修改有修饰符的类和类的成员
|
||||
-dontusemixedcaseclassnames # 是否使用大小写混合
|
||||
-dontskipnonpubliclibraryclasses # 是否混淆第三方jar
|
||||
-dontpreverify # 混淆时是否做预校验
|
||||
-verbose # 混淆时是否记录日志
|
||||
-ignorewarnings # 忽略警告,避免打包时某些警告出现
|
||||
-optimizations !code/simplification/arithmetic,!code/simplification/cast,!field/*,!class/merging/* # 混淆时所采用的算法
|
||||
|
||||
-keepattributes *Annotation*
|
||||
-keepclasseswithmembernames class * { # 保持 native 方法不被混淆
|
||||
native <methods>;
|
||||
}
|
||||
|
||||
-keepclassmembers public class * extends android.view.View {
|
||||
void set*(***);
|
||||
*** get*();
|
||||
}
|
||||
|
||||
-keepclassmembers class * extends android.app.Activity {
|
||||
public void *(android.view.View);
|
||||
}
|
||||
|
||||
-keepclassmembers enum * { # 保持枚举 enum 类不被混淆
|
||||
public static **[] values();
|
||||
public static ** valueOf(java.lang.String);
|
||||
}
|
||||
|
||||
-keep class * implements android.os.Parcelable { # 保持 Parcelable 不被混淆
|
||||
public static final android.os.Parcelable$Creator *;
|
||||
}
|
||||
|
||||
-keepclassmembers class **.R$* { #不混淆R文件
|
||||
public static <fields>;
|
||||
}
|
||||
|
||||
-dontwarn android.support.**
|
||||
##--- End android默认 ---
|
||||
|
||||
##--- For:不能被混淆的 ---
|
||||
-keep public class * extends android.app.Activity
|
||||
-keep public class * extends android.app.Fragment
|
||||
-keep public class * extends android.app.Application
|
||||
-keep public class * extends android.app.Service
|
||||
-keep public class * extends android.content.BroadcastReceiver
|
||||
-keep public class * extends android.content.ContentProvider
|
||||
-keep public class * extends android.app.backup.BackupAgentHelper
|
||||
-keep public class * extends android.preference.Preference
|
||||
|
||||
##--- For:保持自定义控件类不被混淆 ---
|
||||
-keepclasseswithmembers class * {
|
||||
public <init>(android.content.Context, android.util.AttributeSet);
|
||||
}
|
||||
-keepclasseswithmembers class * {
|
||||
public <init>(android.content.Context, android.util.AttributeSet, int);
|
||||
}
|
||||
##--- For:android-support-v4 ---
|
||||
-dontwarn android.support.v4.**
|
||||
-keep class android.support.v4.** { *; }
|
||||
-keep interface android.support.v4.app.** { *; }
|
||||
-keep class * extends android.support.v4.** { *; }
|
||||
-keep public class * extends android.support.v4.**
|
||||
-keep class * extends android.support.v4.app.** {*;}
|
||||
-keep class * extends android.support.v4.view.** {*;}
|
||||
|
||||
##--- For:Serializable ---
|
||||
-keep class * implements java.io.Serializable {*;}
|
||||
-keepnames class * implements java.io.Serializable
|
||||
-keepclassmembers class * implements java.io.Serializable {*;}
|
||||
|
||||
##--- For:Gson ---
|
||||
-keepattributes *Annotation*
|
||||
-keep class com.google.gson.stream.** { *; }
|
||||
|
||||
|
||||
##--- For:Remove log ---
|
||||
-assumenosideeffects class android.util.Log {
|
||||
public static boolean isLoggable(java.lang.String, int);
|
||||
public static int v(...);
|
||||
public static int i(...);
|
||||
public static int w(...);
|
||||
public static int d(...);
|
||||
public static int e(...);
|
||||
}
|
||||
|
||||
##--- For:attributes(未启用) ---
|
||||
#-keepattributes SourceFile,LineNumberTable # 保持反编译工具能看到代码的行数,以及release包安装后出现异常信息可以知道在哪行代码出现异常,建议不启用
|
||||
-keepattributes *Annotation* #使用注解
|
||||
-keepattributes Signature #过滤泛型 出现类型转换错误时,启用这个
|
||||
#-keepattributes *Exceptions*,EnclosingMethod #没试过,未知效果
|
||||
BIN
app/libs/_tts.zip
Normal file
BIN
app/libs/_tts.zip
Normal file
Binary file not shown.
11
app/libs/_tts_out/AndroidManifest.xml
Normal file
11
app/libs/_tts_out/AndroidManifest.xml
Normal file
@@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
package="com.tencent.cloud.realtime.tts" >
|
||||
|
||||
<uses-sdk
|
||||
android:minSdkVersion="16"
|
||||
android:targetSdkVersion="33" />
|
||||
|
||||
<uses-permission android:name="android.permission.INTERNET" />
|
||||
|
||||
</manifest>
|
||||
0
app/libs/_tts_out/R.txt
Normal file
0
app/libs/_tts_out/R.txt
Normal file
BIN
app/libs/_tts_out/classes.jar
Normal file
BIN
app/libs/_tts_out/classes.jar
Normal file
Binary file not shown.
0
app/libs/_tts_out/proguard.txt
Normal file
0
app/libs/_tts_out/proguard.txt
Normal file
BIN
app/libs/asr-one-sentence-release.aar
Normal file
BIN
app/libs/asr-one-sentence-release.aar
Normal file
Binary file not shown.
BIN
app/libs/asr-realtime-release.aar
Normal file
BIN
app/libs/asr-realtime-release.aar
Normal file
Binary file not shown.
BIN
app/libs/classes.jar
Normal file
BIN
app/libs/classes.jar
Normal file
Binary file not shown.
9
app/proguard-rules.pro
vendored
9
app/proguard-rules.pro
vendored
@@ -18,4 +18,11 @@
|
||||
|
||||
# If you keep the line number information, uncomment this to
|
||||
# hide the original source file name.
|
||||
#-renamesourcefileattribute SourceFile
|
||||
#-renamesourcefileattribute SourceFile
|
||||
|
||||
# 腾讯云实时语音识别 SDK(asr-realtime-release.aar)
|
||||
-keepclasseswithmembernames class * {
|
||||
native <methods>;
|
||||
}
|
||||
-keep public class com.tencent.aai.** { *; }
|
||||
-keep public class com.qq.wx.voice.** { *; }
|
||||
@@ -0,0 +1,132 @@
|
||||
package com.digitalperson.embedding
|
||||
|
||||
import android.content.Context
|
||||
import androidx.test.core.app.ApplicationProvider
|
||||
import androidx.test.ext.junit.runners.AndroidJUnit4
|
||||
import com.digitalperson.config.AppConfig
|
||||
import org.junit.Assert.assertNotNull
|
||||
import org.junit.Assert.assertTrue
|
||||
import org.junit.Before
|
||||
import org.junit.Test
|
||||
import org.junit.runner.RunWith
|
||||
|
||||
/**
|
||||
* 模拟器参考图匹配(路径关键词 + 子串 + 编辑距离)回归测试。
|
||||
*
|
||||
* 运行方式(需设备/模拟器,且 assets 含 ref 语料):
|
||||
* ./gradlew :app:connectedDebugAndroidTest --tests com.digitalperson.embedding.RefImageMatcherEmulatorRegressionTest
|
||||
*/
|
||||
@RunWith(AndroidJUnit4::class)
|
||||
class RefImageMatcherEmulatorRegressionTest {
|
||||
|
||||
private lateinit var context: Context
|
||||
|
||||
@Before
|
||||
fun setUp() {
|
||||
context = ApplicationProvider.getApplicationContext()
|
||||
}
|
||||
|
||||
data class ManualCase(
|
||||
val label: String,
|
||||
val query: String,
|
||||
/** 期望命中的 txt 路径中应包含该子串(如文件名一段) */
|
||||
val expectedPathContains: String,
|
||||
)
|
||||
|
||||
@Test
|
||||
fun manualQueries_shouldMatchExpectedAsset() {
|
||||
val cases = listOf(
|
||||
ManualCase(
|
||||
label = "上厕所指引(LLM 寒暄前缀)",
|
||||
query = "嗨小朋友,可以帮老师个忙吗?同学们能指引图中的小朋友们进入正确的厕所吗?",
|
||||
expectedPathContains = "上厕所18",
|
||||
),
|
||||
ManualCase(
|
||||
label = "刷牙看图(题干在讲卫生6,人1 为认读「人」非刷牙)",
|
||||
query = "嗨小朋友,可以帮老师个忙吗?这个男生在做什么?",
|
||||
expectedPathContains = "讲卫生6",
|
||||
),
|
||||
ManualCase(
|
||||
label = "元旦短句(生活适应)",
|
||||
query = "元旦到了,小朋友可以对爸爸妈妈说:'爸爸妈妈,新年快乐!'",
|
||||
expectedPathContains = "元旦14",
|
||||
),
|
||||
)
|
||||
val failures = mutableListOf<String>()
|
||||
for (c in cases) {
|
||||
val m = RefImageMatcher.findBestMatchEditDistance(context, c.query)
|
||||
when {
|
||||
m == null -> failures += "${c.label}: 无匹配,期望路径含「${c.expectedPathContains}」 query=${c.query.take(80)}"
|
||||
!m.txtAssetPath.contains(c.expectedPathContains) ->
|
||||
failures += "${c.label}: 得到 ${m.txtAssetPath} score=${m.score},期望路径含「${c.expectedPathContains}」"
|
||||
}
|
||||
}
|
||||
assertTrue(
|
||||
"以下用例未命中预期资源:\n${failures.joinToString("\n")}",
|
||||
failures.isEmpty(),
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* 语料自检:每条带 png 的 txt,用「寒暄 + 正文长片段」构造 query,应匹配回该 txt。
|
||||
*
|
||||
* 仅用首行会失败两类情况:(1)多篇课文首句相同(如男生女生 5/6);(2)首句拆出「小朋友」等
|
||||
* 极短片段在多篇里子串命中同分,先遍历到的文件胜出。故这里用去空白后的正文前缀(约 400 字)提高区分度。
|
||||
*/
|
||||
@Test
|
||||
fun corpus_bodyPrefix_withGreetingPrefix_shouldMatchSameTxt() {
|
||||
val root = AppConfig.RefCorpus.ASSETS_ROOT
|
||||
val paths = RefCorpusAssetScanner.listTxtFilesUnder(context, root)
|
||||
val greeting = "嗨小朋友,可以帮老师个忙吗?"
|
||||
/** 与真实 LLM 提问长度同量级;更长更易唯一,但单测耗时略增 */
|
||||
val maxBodyChars = 400
|
||||
val failures = mutableListOf<String>()
|
||||
var skippedNoPng = 0
|
||||
var skippedNoBody = 0
|
||||
var checked = 0
|
||||
|
||||
for (txtPath in paths) {
|
||||
val pngPath = if (txtPath.endsWith(".txt", ignoreCase = true)) {
|
||||
txtPath.dropLast(4) + ".png"
|
||||
} else {
|
||||
"$txtPath.png"
|
||||
}
|
||||
val pngOk = try {
|
||||
context.assets.open(pngPath).close()
|
||||
true
|
||||
} catch (_: Throwable) {
|
||||
false
|
||||
}
|
||||
if (!pngOk) {
|
||||
skippedNoPng++
|
||||
continue
|
||||
}
|
||||
val raw = context.assets.open(txtPath).bufferedReader(Charsets.UTF_8).use { it.readText() }
|
||||
val body = RefTxtEmbedText.fromRawFileContent(raw).trim()
|
||||
if (body.length < 8) {
|
||||
skippedNoBody++
|
||||
continue
|
||||
}
|
||||
checked++
|
||||
val compact = body.replace(Regex("\\s+"), " ").trim()
|
||||
val core = compact.take(maxBodyChars)
|
||||
val query = "$greeting$core"
|
||||
val m = RefImageMatcher.findBestMatchEditDistance(context, query)
|
||||
if (m == null) {
|
||||
failures += "无匹配: $txtPath | core=${core.take(50)}"
|
||||
continue
|
||||
}
|
||||
if (m.txtAssetPath != txtPath) {
|
||||
failures += "错配: 期望 $txtPath | 得到 ${m.txtAssetPath} score=${m.score} | core=${core.take(50)}"
|
||||
}
|
||||
}
|
||||
|
||||
assertNotNull("语料为空或未打包进 assets", paths.takeIf { it.isNotEmpty() })
|
||||
assertTrue(
|
||||
"语料自检失败条数=${failures.size}(已检查=$checked,skip无png=$skippedNoPng,skip正文过短=$skippedNoBody):\n" +
|
||||
failures.take(50).joinToString("\n") +
|
||||
if (failures.size > 50) "\n... 共 ${failures.size} 条" else "",
|
||||
failures.isEmpty(),
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,204 @@
|
||||
package com.digitalperson.question
|
||||
|
||||
import android.content.Context
|
||||
import androidx.test.core.app.ApplicationProvider
|
||||
import androidx.test.ext.junit.runners.AndroidJUnit4
|
||||
import com.digitalperson.data.AppDatabase
|
||||
import com.digitalperson.data.entity.Question
|
||||
import com.digitalperson.interaction.UserMemoryStore
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import org.json.JSONObject
|
||||
import org.junit.After
|
||||
import org.junit.Before
|
||||
import org.junit.Test
|
||||
import org.junit.runner.RunWith
|
||||
import java.io.InputStream
|
||||
|
||||
/**
|
||||
* 题目生成智能体测试
|
||||
* 可以在模拟器或本地运行,不需要完整启动应用
|
||||
*/
|
||||
@RunWith(AndroidJUnit4::class)
|
||||
class QuestionGenerationAgentTest {
|
||||
|
||||
private lateinit var context: Context
|
||||
private lateinit var database: AppDatabase
|
||||
private lateinit var userMemoryStore: UserMemoryStore
|
||||
|
||||
@Before
|
||||
fun setUp() {
|
||||
context = ApplicationProvider.getApplicationContext()
|
||||
database = AppDatabase.getInstance(context)
|
||||
userMemoryStore = UserMemoryStore(context)
|
||||
}
|
||||
|
||||
@After
|
||||
fun tearDown() {
|
||||
// 清理测试数据
|
||||
// database.clearAllTables()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testLoadPromptPoolFromJson() {
|
||||
// 测试JSON提示词池加载
|
||||
val inputStream: InputStream = context.assets.open("question_prompts.json")
|
||||
val jsonString = inputStream.bufferedReader().use { it.readText() }
|
||||
val jsonArray = org.json.JSONArray(jsonString)
|
||||
|
||||
println("✅ Loaded ${jsonArray.length()} prompts from JSON")
|
||||
|
||||
// 验证每个提示词的格式
|
||||
for (i in 0 until jsonArray.length()) {
|
||||
val json = jsonArray.getJSONObject(i)
|
||||
assert(json.has("subject")) { "Missing subject in prompt $i" }
|
||||
assert(json.has("grade")) { "Missing grade in prompt $i" }
|
||||
assert(json.has("topic")) { "Missing topic in prompt $i" }
|
||||
assert(json.has("difficulty")) { "Missing difficulty in prompt $i" }
|
||||
assert(json.has("promptTemplate")) { "Missing promptTemplate in prompt $i" }
|
||||
|
||||
println(" - Prompt $i: ${json.getString("subject")} / ${json.getString("topic")}")
|
||||
}
|
||||
|
||||
assert(jsonArray.length() > 0) { "Should have at least 1 prompt" }
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testQuestionDatabaseOperations() = runBlocking {
|
||||
// 测试数据库操作
|
||||
val questionDao = database.questionDao()
|
||||
|
||||
// 插入测试题目
|
||||
val testQuestion = Question(
|
||||
id = 0,
|
||||
content = "测试题目:苹果和香蕉哪个大?",
|
||||
answer = "香蕉",
|
||||
subject = "生活数学",
|
||||
grade = 1,
|
||||
difficulty = 1,
|
||||
createdAt = System.currentTimeMillis()
|
||||
)
|
||||
|
||||
val questionId = questionDao.insert(testQuestion)
|
||||
println("✅ Inserted question with ID: $questionId")
|
||||
|
||||
// 查询题目
|
||||
val retrievedQuestion = questionDao.getQuestionById(questionId)
|
||||
assert(retrievedQuestion != null) { "Should retrieve inserted question" }
|
||||
assert(retrievedQuestion?.content == testQuestion.content) { "Content should match" }
|
||||
println("✅ Retrieved question: ${retrievedQuestion?.content}")
|
||||
|
||||
// 测试未答题计数
|
||||
val userId = "test_user_001"
|
||||
val count = questionDao.countUnansweredQuestions(userId)
|
||||
println("✅ Unanswered questions count: $count")
|
||||
|
||||
// 测试获取随机未答题
|
||||
val randomQuestion = questionDao.getRandomUnansweredQuestion(userId)
|
||||
println("✅ Random unanswered question: ${randomQuestion?.content?.take(20)}...")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testJsonResponseParsing() {
|
||||
// 测试LLM JSON响应解析
|
||||
val testResponses = listOf(
|
||||
"""{"content": "苹果和香蕉哪个大?", "answer": "香蕉", "explanation": "香蕉通常比苹果大"}""",
|
||||
"""
|
||||
{
|
||||
"content": "2个苹果和5个苹果比,谁多?",
|
||||
"answer": "5个苹果多",
|
||||
"explanation": "5大于2"
|
||||
}
|
||||
""",
|
||||
"""Some text before {"content": "测试题目", "answer": "答案"} some text after"""
|
||||
)
|
||||
|
||||
testResponses.forEachIndexed { index, response ->
|
||||
val json = extractJsonFromResponse(response)
|
||||
if (json != null) {
|
||||
println("✅ Test $index: Parsed successfully")
|
||||
println(" Content: ${json.getString("content")}")
|
||||
println(" Answer: ${json.getString("answer")}")
|
||||
} else {
|
||||
println("❌ Test $index: Failed to parse JSON")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testUserMemoryOperations() = runBlocking {
|
||||
// 测试用户记忆操作
|
||||
val userId = "test_user_001"
|
||||
|
||||
// 创建或更新用户
|
||||
userMemoryStore.upsertUserSeen(userId, "测试小朋友")
|
||||
println("✅ Created/updated user: $userId")
|
||||
|
||||
// 获取用户信息
|
||||
val memory = userMemoryStore.getMemory(userId)
|
||||
println("✅ User memory: displayName=${memory?.displayName}")
|
||||
|
||||
// 测试未答题计数
|
||||
val unansweredCount = userMemoryStore.countUnansweredQuestions(userId)
|
||||
println("✅ Unanswered questions for $userId: $unansweredCount")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testPromptTemplateBuilding() = runBlocking {
|
||||
// 测试提示词模板构建
|
||||
val userProfile = userMemoryStore.getMemory("test_user_001")
|
||||
|
||||
val promptTemplate = """
|
||||
你是一个专门为特殊教育儿童设计题目的教育专家。请根据以下要求生成一个题目:
|
||||
|
||||
用户信息:
|
||||
${userProfile?.displayName?.let { "姓名:$it," } ?: ""}
|
||||
${userProfile?.age?.let { "年龄:$it," } ?: ""}
|
||||
|
||||
学科:生活数学
|
||||
年级:1
|
||||
主题:比大小
|
||||
难度:1
|
||||
|
||||
具体要求:
|
||||
基于以下学习目标,针对一年级小学生出1道题目:
|
||||
1. 初步感知物品的大小
|
||||
2. 会比较2个物品的大小
|
||||
|
||||
通用要求:
|
||||
1. 题目要贴近生活,适合智力障碍儿童理解
|
||||
2. 语言简单明了,避免复杂句式
|
||||
3. 题目内容积极向上
|
||||
4. 提供标准答案
|
||||
5. 确保题目没有重复
|
||||
6. 题目要有趣味性,能吸引学生注意力
|
||||
|
||||
请以JSON格式返回,格式如下:
|
||||
{
|
||||
"content": "题目内容",
|
||||
"answer": "标准答案",
|
||||
"explanation": "题目解析(可选)"
|
||||
}
|
||||
|
||||
只返回JSON,不要其他内容。
|
||||
""".trimIndent()
|
||||
|
||||
println("✅ Generated prompt template:")
|
||||
println(promptTemplate)
|
||||
println("\n✅ Prompt length: ${promptTemplate.length} characters")
|
||||
}
|
||||
|
||||
/**
|
||||
* 从响应中提取JSON
|
||||
*/
|
||||
private fun extractJsonFromResponse(response: String): JSONObject? {
|
||||
val trimmed = response.trim()
|
||||
val start = trimmed.indexOf('{')
|
||||
val end = trimmed.lastIndexOf('}')
|
||||
|
||||
if (start >= 0 && end > start) {
|
||||
val jsonStr = trimmed.substring(start, end + 1)
|
||||
return JSONObject(jsonStr)
|
||||
}
|
||||
return null
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,7 @@
|
||||
<uses-permission android:name="android.permission.RECORD_AUDIO" />
|
||||
<uses-permission android:name="android.permission.CAMERA" />
|
||||
<uses-permission android:name="android.permission.INTERNET" />
|
||||
<uses-permission android:name="android.permission.ACCESS_NETWORK_STATE" />
|
||||
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
|
||||
<uses-permission android:name="android.permission.VIBRATE" />
|
||||
<uses-feature android:name="android.hardware.camera.any" />
|
||||
|
||||
264
app/src/main/assets/question_generation_flow.md
Normal file
264
app/src/main/assets/question_generation_flow.md
Normal file
@@ -0,0 +1,264 @@
|
||||
# Question Generation Flow - Pre-generation Strategy
|
||||
|
||||
## Problem Solved
|
||||
|
||||
**Before:** First question would fail because database was empty.
|
||||
|
||||
**After:** Questions are pre-generated when face appears, giving 20+ seconds before first question is asked.
|
||||
|
||||
## Timeline Flow
|
||||
|
||||
```
|
||||
Time: 0s 2s 20s 40s 60s
|
||||
| | | | |
|
||||
↓ ↓ ↓ ↓ ↓
|
||||
Face Greeting First Second Third
|
||||
Appears Ends Question Question Question
|
||||
| | | | |
|
||||
| | | | |
|
||||
└─► Start │ │ │ │
|
||||
Gen │ │ │ │
|
||||
Questions│ │ │ │
|
||||
(~10-20s)│ │ │ │
|
||||
│ │ │ │
|
||||
└───────────┴───────────┴───────────┘
|
||||
Questions already
|
||||
in database!
|
||||
```
|
||||
|
||||
## Detailed Flow
|
||||
|
||||
### 1. Face Detection (T=0s)
|
||||
```
|
||||
User shows face to camera
|
||||
↓
|
||||
FaceDetectionPipeline detects frontal face
|
||||
↓
|
||||
onFaceSignal(present=true, isFrontal=true)
|
||||
↓
|
||||
DigitalHumanInteractionController.onFacePresenceChanged()
|
||||
↓
|
||||
Handler.onFaceAppeared(userId) ← NEW TRIGGER
|
||||
```
|
||||
|
||||
### 2. Pre-generation (T=0-20s)
|
||||
```
|
||||
QuestionGenerationAgent.onQuestionAsked(userId)
|
||||
↓
|
||||
Check unanswered count:
|
||||
- If 0 questions → Generate 20 questions (2x minUnansweredQuestions)
|
||||
- If < 10 questions → Generate enough to reach 10
|
||||
- If >= 10 questions → Skip generation
|
||||
↓
|
||||
For each question needed:
|
||||
1. Get next prompt from pool (avoiding recently used)
|
||||
2. Build generation prompt with user profile
|
||||
3. LLM generates question (JSON)
|
||||
4. LLM reviews question quality
|
||||
5. If passed → Save to database
|
||||
↓
|
||||
Questions ready in database! ✓
|
||||
```
|
||||
|
||||
### 3. Greeting Phase (T=2-5s)
|
||||
```
|
||||
Enter greeting state
|
||||
↓
|
||||
Digital person waves and says hello
|
||||
↓
|
||||
~3 seconds
|
||||
```
|
||||
|
||||
### 4. First Question (T=20s)
|
||||
```
|
||||
enterProactive() → askProactiveTopic()
|
||||
↓
|
||||
Get random unanswered question from database
|
||||
↓
|
||||
✓ QUESTION EXISTS! (Pre-generated in step 2)
|
||||
↓
|
||||
Ask question to user
|
||||
↓
|
||||
onQuestionAsked(userId) ← Check again
|
||||
↓
|
||||
If count < 10 → Generate more questions
|
||||
```
|
||||
|
||||
### 5. Subsequent Questions (T=40s, 60s, ...)
|
||||
```
|
||||
Each time a question is asked:
|
||||
↓
|
||||
onQuestionAsked(userId) triggered
|
||||
↓
|
||||
Check unanswered count
|
||||
↓
|
||||
If low → Generate more
|
||||
↓
|
||||
Database always stays stocked! ✓
|
||||
```
|
||||
|
||||
## Key Benefits
|
||||
|
||||
### ✅ No Empty Database
|
||||
- Questions generated BEFORE first question is needed
|
||||
- 20+ second head start during greeting phase
|
||||
|
||||
### ✅ Smart Quantity
|
||||
- **First time:** Generate 20 questions (double the minimum)
|
||||
- **Replenishment:** Generate only what's needed to reach 10
|
||||
|
||||
### ✅ Continuous Supply
|
||||
- Every question asked triggers a check
|
||||
- Database never runs empty
|
||||
|
||||
### ✅ User-Aware
|
||||
- Uses user profile for personalization
|
||||
- Tracks unanswered questions per user
|
||||
|
||||
## Configuration
|
||||
|
||||
```kotlin
|
||||
data class AgentConfig(
|
||||
val minUnansweredQuestions: Int = 10, // Minimum threshold
|
||||
val batchSize: Int = 5, // (Deprecated - now calculates dynamically)
|
||||
val generationTimeoutMs: Long = 30000 // LLM timeout
|
||||
)
|
||||
```
|
||||
|
||||
### Calculated Values
|
||||
|
||||
| Scenario | Unanswered Count | Action | Questions Generated |
|
||||
|----------|-----------------|--------|---------------------|
|
||||
| First time | 0 | Initial load | 20 (2x min) |
|
||||
| Running low | 5 | Replenish | 5 (to reach 10) |
|
||||
| Running low | 8 | Replenish | 2 (to reach 10) |
|
||||
| Sufficient | 10+ | Skip | 0 |
|
||||
| After question | 9 | Replenish | 1 (to reach 10) |
|
||||
|
||||
## Generation Time Estimates
|
||||
|
||||
Assuming ~3 seconds per question (generate + review):
|
||||
|
||||
| Questions Needed | Estimated Time |
|
||||
|-----------------|----------------|
|
||||
| 5 questions | ~15 seconds |
|
||||
| 10 questions | ~30 seconds |
|
||||
| 20 questions | ~60 seconds |
|
||||
|
||||
**Good news:** Generation happens in background, doesn't block greeting!
|
||||
|
||||
## Trigger Points
|
||||
|
||||
### 1. Face Appears (Pre-generation)
|
||||
```kotlin
|
||||
// In DigitalHumanInteractionController.kt
|
||||
if (state == InteractionState.IDLE || state == InteractionState.MEMORY || state == InteractionState.FAREWELL) {
|
||||
handler.onFaceAppeared(currentFaceId ?: "guest") // ← Pre-generate
|
||||
enterGreeting()
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Question Asked (Replenishment)
|
||||
```kotlin
|
||||
// In DigitalHumanInteractionController.kt
|
||||
handler.speak("嗨小朋友,可以帮老师个忙吗?" + topic)
|
||||
handler.onQuestionAsked(currentFaceId ?: "guest") // ← Replenish
|
||||
```
|
||||
|
||||
## Logging Output
|
||||
|
||||
### First Time User
|
||||
```
|
||||
I/QuestionGenAgent: Face appeared, triggering question pre-generation for user: user123
|
||||
I/QuestionGenAgent: User user123 has 0 unanswered questions (initial=true), generating 20 more...
|
||||
D/QuestionGenAgent: Generating question 1/20 for user user123
|
||||
I/QuestionGenAgent: Question saved: 请说出你家里有几口人...
|
||||
D/QuestionGenAgent: Generating question 2/20 for user user123
|
||||
...
|
||||
I/QuestionGenAgent: Finished generating questions for user user123
|
||||
```
|
||||
|
||||
### Returning User (Low Questions)
|
||||
```
|
||||
I/QuestionGenAgent: User user123 has 3 unanswered questions, generating 7 more...
|
||||
D/QuestionGenAgent: Generating question 1/7 for user user123
|
||||
...
|
||||
I/QuestionGenAgent: Finished generating questions for user user123
|
||||
```
|
||||
|
||||
### Sufficient Questions
|
||||
```
|
||||
D/QuestionGenAgent: User user123 has 15 unanswered questions, no need to generate
|
||||
```
|
||||
|
||||
## Edge Cases Handled
|
||||
|
||||
### ❌ No Face Detection
|
||||
- No pre-generation triggered
|
||||
- User must show face first
|
||||
|
||||
### ❌ LLM Generation Fails
|
||||
- Gracefully skips failed question
|
||||
- Continues with next question
|
||||
- Logs error for debugging
|
||||
|
||||
### ❌ Review Fails
|
||||
- Question discarded
|
||||
- Not saved to database
|
||||
- Moves to next question
|
||||
|
||||
### ❌ Database Empty at Question Time
|
||||
- Very unlikely (20s head start)
|
||||
- Falls back to default question: "你喜欢什么颜色呀?"
|
||||
|
||||
## Performance Optimization
|
||||
|
||||
### Asynchronous Generation
|
||||
```kotlin
|
||||
agentScope.launch {
|
||||
checkAndGenerateForUser(userId) // Non-blocking
|
||||
}
|
||||
```
|
||||
- Doesn't block UI thread
|
||||
- Doesn't block greeting
|
||||
- Runs completely in background
|
||||
|
||||
### Smart Prompt Selection
|
||||
- Tracks last 10 used prompts
|
||||
- Avoids repetition
|
||||
- Ensures diversity
|
||||
|
||||
### Delay Between Questions
|
||||
```kotlin
|
||||
delay(1000) // 1 second between each question
|
||||
```
|
||||
- Prevents overwhelming LLM
|
||||
- Allows proper processing time
|
||||
|
||||
## Testing Scenarios
|
||||
|
||||
### Test 1: First Time User
|
||||
1. Clear database
|
||||
2. Show face to camera
|
||||
3. Check logs: Should see "generating 20 more"
|
||||
4. Wait 20 seconds
|
||||
5. First question should work ✓
|
||||
|
||||
### Test 2: Returning User
|
||||
1. User already has 5 unanswered questions
|
||||
2. Show face to camera
|
||||
3. Check logs: Should see "generating 5 more"
|
||||
4. Questions should replenish ✓
|
||||
|
||||
### Test 3: Continuous Usage
|
||||
1. User answers questions continuously
|
||||
2. Each question triggers check
|
||||
3. Database should stay above 10 questions ✓
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
- [ ] Priority-based generation (generate harder questions as user progresses)
|
||||
- [ ] Track question difficulty and adjust based on user performance
|
||||
- [ ] Pre-generate during idle time (not just on face appearance)
|
||||
- [ ] Cache generated questions for offline use
|
||||
- [ ] Support multiple users with different question pools
|
||||
221
app/src/main/assets/question_prompts_README.md
Normal file
221
app/src/main/assets/question_prompts_README.md
Normal file
@@ -0,0 +1,221 @@
|
||||
# Question Generation Agent - Prompt Pool Guide
|
||||
|
||||
## Overview
|
||||
|
||||
The Question Generation Agent uses a JSON-based prompt pool to generate diverse educational questions for special needs children. The system automatically loads prompts from `question_prompts.json` and intelligently selects prompts to ensure diversity.
|
||||
|
||||
## File Location
|
||||
|
||||
```
|
||||
app/src/main/assets/question_prompts.json
|
||||
```
|
||||
|
||||
## JSON Structure
|
||||
|
||||
Each prompt in the pool follows this structure:
|
||||
|
||||
```json
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "认识家庭成员",
|
||||
"difficulty": 1,
|
||||
"promptTemplate": "生成一个关于认识家庭成员的题目,适合自闭症儿童,要求题目贴近日常生活,语言简单易懂"
|
||||
}
|
||||
```
|
||||
|
||||
### Fields Description
|
||||
|
||||
| Field | Type | Description | Example |
|
||||
|-------|------|-------------|---------|
|
||||
| `subject` | String | Subject category | "生活适应", "生活语文" |
|
||||
| `grade` | Integer | Grade level (1-6) | 1 |
|
||||
| `topic` | String | Specific topic name | "认识家庭成员" |
|
||||
| `difficulty` | Integer | Difficulty level (1-5) | 1 |
|
||||
| `promptTemplate` | String | Detailed instruction for LLM | "生成一个关于...的题目" |
|
||||
|
||||
## How to Add New Prompts
|
||||
|
||||
### Step 1: Open the JSON file
|
||||
|
||||
Open `app/src/main/assets/question_prompts.json` in any text editor.
|
||||
|
||||
### Step 2: Add a new prompt object
|
||||
|
||||
Add a new object to the JSON array:
|
||||
|
||||
```json
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "你的新主题",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于[主题描述]的题目,要求[具体要求]"
|
||||
}
|
||||
```
|
||||
|
||||
### Step 3: Save and restart
|
||||
|
||||
The prompts are loaded when the app starts. Restart the app to load new prompts.
|
||||
|
||||
## Prompt Template Guidelines
|
||||
|
||||
### Good Prompt Templates
|
||||
|
||||
✅ **Specific and detailed:**
|
||||
```json
|
||||
"promptTemplate": "生成一个关于认识家庭成员的题目,包括爸爸、妈妈、爷爷、奶奶等,要求描述家庭成员之间的关系和称呼"
|
||||
```
|
||||
|
||||
✅ **Include context:**
|
||||
```json
|
||||
"promptTemplate": "生成一个关于交通安全的题目,例如红绿灯、斑马线、过马路等,要求强调安全规则和实际应用场景"
|
||||
```
|
||||
|
||||
✅ **Specify requirements:**
|
||||
```json
|
||||
"promptTemplate": "生成一个关于认识颜色的题目,包括红、黄、蓝、绿等基本颜色,要求联系实际生活中的物品,如红色的苹果、黄色的香蕉等"
|
||||
```
|
||||
|
||||
### Bad Prompt Templates
|
||||
|
||||
❌ **Too vague:**
|
||||
```json
|
||||
"promptTemplate": "生成一个题目"
|
||||
```
|
||||
|
||||
❌ **Too broad:**
|
||||
```json
|
||||
"promptTemplate": "生成一个关于数学的题目"
|
||||
```
|
||||
|
||||
❌ **Not specific to special education:**
|
||||
```json
|
||||
"promptTemplate": "生成一个难一点的题目"
|
||||
```
|
||||
|
||||
## Current Prompt Categories
|
||||
|
||||
### 生活适应 (Life Adaptation) - 25 prompts
|
||||
- 认识家庭成员 (Family Members)
|
||||
- 认识日常用品 (Daily Items)
|
||||
- 认识天气 (Weather)
|
||||
- 认识季节 (Seasons)
|
||||
- 交通安全 (Traffic Safety)
|
||||
- 认识食物 (Food)
|
||||
- 个人卫生 (Personal Hygiene)
|
||||
- 认识动物 (Animals)
|
||||
- 情绪识别 (Emotion Recognition)
|
||||
- 社交礼仪 (Social Etiquette)
|
||||
- 时间概念 (Time Concepts)
|
||||
- 认识颜色 (Colors)
|
||||
- 认识形状 (Shapes)
|
||||
- 数数练习 (Counting)
|
||||
- 身体部位 (Body Parts)
|
||||
- 穿衣自理 (Dressing)
|
||||
- 整理物品 (Organizing)
|
||||
- 认识数字 (Numbers)
|
||||
- 简单加减法 (Basic Math)
|
||||
- 认识钱币 (Money)
|
||||
- 认识地图 (Maps)
|
||||
- 紧急情况 (Emergencies)
|
||||
- 公共场合行为 (Public Behavior)
|
||||
- 认识职业 (Professions)
|
||||
- 节日文化 (Festivals)
|
||||
|
||||
### 生活语文 (Life Chinese) - 6 prompts
|
||||
- 认识汉字 (Chinese Characters)
|
||||
- 简单句子理解 (Sentence Understanding)
|
||||
- 词语配对 (Word Pairing)
|
||||
- 看图说话 (Picture Description)
|
||||
- 反义词 (Antonyms)
|
||||
- 量词使用 (Measure Words)
|
||||
- 标点符号 (Punctuation)
|
||||
|
||||
## Diversity Mechanism
|
||||
|
||||
The system ensures question diversity through:
|
||||
|
||||
1. **Prompt Pool Rotation**: Intelligently selects prompts that haven't been used recently
|
||||
2. **Recent Usage Tracking**: Remembers the last 10 used prompts and avoids them
|
||||
3. **Topic Tracking**: Records generated topics to prevent duplicate questions
|
||||
4. **Automatic Replenishment**: Generates new questions when the unanswered count drops below threshold
|
||||
|
||||
## Configuration
|
||||
|
||||
You can configure the agent in `QuestionGenerationAgent.kt`:
|
||||
|
||||
```kotlin
|
||||
data class AgentConfig(
|
||||
val minUnansweredQuestions: Int = 10, // Minimum questions before triggering generation
|
||||
val batchSize: Int = 5, // Number of questions to generate per batch
|
||||
val generationTimeoutMs: Long = 30000 // LLM generation timeout
|
||||
)
|
||||
```
|
||||
|
||||
## Trigger Mechanism
|
||||
|
||||
Questions are generated **event-driven**, not time-driven:
|
||||
|
||||
1. User asks a question to the child
|
||||
2. `onQuestionAsked(userId)` is triggered
|
||||
3. System checks if unanswered questions < `minUnansweredQuestions`
|
||||
4. If yes, generates `batchSize` new questions
|
||||
5. Each question goes through: Generate → Review → Save to Database
|
||||
|
||||
## Testing
|
||||
|
||||
To manually trigger question generation:
|
||||
|
||||
```kotlin
|
||||
questionGenerationAgent.triggerGeneration(userId)
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Prompts not loading
|
||||
- Check if `question_prompts.json` exists in `app/src/main/assets/`
|
||||
- Verify JSON syntax is valid (no trailing commas, proper quotes)
|
||||
- Check Logcat for error messages
|
||||
|
||||
### Questions not generating
|
||||
- Ensure LLM is properly initialized
|
||||
- Check if `llmManager` is not null
|
||||
- Verify the user has been seen by the face detection system
|
||||
|
||||
### Low diversity
|
||||
- Add more prompts to the JSON file
|
||||
- Increase `maxRecentPrompts` in the agent
|
||||
- Check if prompts have varied `topic` values
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Be Specific**: Write detailed prompt templates that specify the exact requirements
|
||||
2. **Include Examples**: Provide examples in the prompt template to guide the LLM
|
||||
3. **Vary Topics**: Create many different topics within each subject
|
||||
4. **Consider Difficulty**: Use appropriate difficulty levels (1-5) for special education
|
||||
5. **Test Prompts**: Test each new prompt to ensure it generates quality questions
|
||||
6. **Keep Updated**: Regularly review and update prompts based on generated question quality
|
||||
|
||||
## Example: Adding a New Subject
|
||||
|
||||
```json
|
||||
{
|
||||
"subject": "生活数学",
|
||||
"grade": 1,
|
||||
"topic": "比较大小",
|
||||
"difficulty": 1,
|
||||
"promptTemplate": "生成一个关于比较大小的题目,例如哪个更大、哪个更小,要求使用具体的物品如苹果、球等作为比较对象"
|
||||
},
|
||||
{
|
||||
"subject": "生活数学",
|
||||
"grade": 1,
|
||||
"topic": "简单分类",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于分类的题目,例如把水果和蔬菜分开,把动物和植物分开,要求分类标准明确"
|
||||
}
|
||||
```
|
||||
|
||||
## Support
|
||||
|
||||
For questions or issues, check the Logcat output for error messages and review this guide.
|
||||
305
app/src/main/assets/question_prompts_example.json
Normal file
305
app/src/main/assets/question_prompts_example.json
Normal file
@@ -0,0 +1,305 @@
|
||||
{
|
||||
"_comment": "This is an EXAMPLE file showing how to add more prompts. Copy the prompts you want to the main question_prompts.json file.",
|
||||
|
||||
"_examples": [
|
||||
{
|
||||
"_description": "Example 1: Life Math - Comparing Sizes",
|
||||
"subject": "生活数学",
|
||||
"grade": 1,
|
||||
"topic": "比较大小",
|
||||
"difficulty": 1,
|
||||
"promptTemplate": "生成一个关于比较大小的题目,例如哪个更大、哪个更小,要求使用具体的物品如苹果、球等作为比较对象"
|
||||
},
|
||||
{
|
||||
"_description": "Example 2: Life Math - Simple Classification",
|
||||
"subject": "生活数学",
|
||||
"grade": 1,
|
||||
"topic": "简单分类",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于分类的题目,例如把水果和蔬菜分开,把动物和植物分开,要求分类标准明确"
|
||||
},
|
||||
{
|
||||
"_description": "Example 3: Life Adaptation - School Rules",
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "学校规则",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于学校规则的题目,例如上课要举手发言、不能在走廊奔跑等,要求强调遵守规则的重要性"
|
||||
},
|
||||
{
|
||||
"_description": "Example 4: Life Chinese - Simple Reading",
|
||||
"subject": "生活语文",
|
||||
"grade": 1,
|
||||
"topic": "简单阅读",
|
||||
"difficulty": 3,
|
||||
"promptTemplate": "生成一个简单阅读理解题目,提供2-3句话的短文,然后提出一个关于短文内容的问题"
|
||||
},
|
||||
{
|
||||
"_description": "Example 5: Life Adaptation - Healthy Habits",
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "健康习惯",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于健康习惯的题目,例如早睡早起、多喝水、多运动等,要求解释为什么这些习惯好"
|
||||
},
|
||||
{
|
||||
"_description": "Example 6: Life Adaptation - Using Phone",
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "使用电话",
|
||||
"difficulty": 3,
|
||||
"promptTemplate": "生成一个关于如何使用电话的题目,包括拨号、接听、挂断等基本操作,要求在紧急情况下如何求助"
|
||||
},
|
||||
{
|
||||
"_description": "Example 7: Life Chinese - Sentence Completion",
|
||||
"subject": "生活语文",
|
||||
"grade": 1,
|
||||
"topic": "句子填空",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个句子填空题,例如'我喜欢吃___',要求提供合适的选项让学生选择"
|
||||
},
|
||||
{
|
||||
"_description": "Example 8: Life Adaptation - Weather Clothing",
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "天气与穿衣",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于根据天气选择衣服的题目,例如下雨天穿雨衣、冬天穿棉袄等,要求联系实际生活场景"
|
||||
}
|
||||
],
|
||||
|
||||
"_instructions": "To add these prompts to your system:",
|
||||
"_step1": "1. Open app/src/main/assets/question_prompts.json",
|
||||
"_step2": "2. Copy the objects from the _examples array (without the _description field)",
|
||||
"_step3": "3. Paste them into the main JSON array",
|
||||
"_step4": "4. Save the file and restart the app"
|
||||
}
|
||||
|
||||
|
||||
|
||||
[
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "认识家庭成员",
|
||||
"difficulty": 1,
|
||||
"promptTemplate": "生成一个关于认识家庭成员的题目,适合智力障碍儿童,要求题目贴近日常生活,语言简单易懂"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "认识日常用品",
|
||||
"difficulty": 1,
|
||||
"promptTemplate": "生成一个关于认识日常用品的题目,例如牙刷、毛巾、杯子等,要求描述物品的用途和使用场景"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "认识天气",
|
||||
"difficulty": 1,
|
||||
"promptTemplate": "生成一个关于认识天气的题目,包括晴天、雨天、阴天等,要求联系实际生活场景"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "认识季节",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于认识四季的题目,要求描述不同季节的特点和对应的活动"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "交通安全",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于交通安全的题目,例如红绿灯、斑马线、过马路等,要求强调安全规则"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "认识食物",
|
||||
"difficulty": 1,
|
||||
"promptTemplate": "生成一个关于认识常见食物的题目,包括水果、蔬菜、主食等,要求联系实际饮食场景"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "个人卫生",
|
||||
"difficulty": 1,
|
||||
"promptTemplate": "生成一个关于个人卫生的题目,例如洗手、刷牙、洗澡等,要求强调卫生习惯的重要性"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "认识动物",
|
||||
"difficulty": 1,
|
||||
"promptTemplate": "生成一个关于认识常见动物的题目,包括猫、狗、鸟、鱼等,要求描述动物的特征和习性"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "情绪识别",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于识别情绪的题目,例如高兴、伤心、生气、害怕等,要求联系实际情境"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "社交礼仪",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于社交礼仪的题目,例如打招呼、说谢谢、对不起等,要求强调礼貌用语的使用场景"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "时间概念",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于时间概念的题目,例如早上、中午、晚上、昨天、今天、明天等,要求联系日常生活作息"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "认识颜色",
|
||||
"difficulty": 1,
|
||||
"promptTemplate": "生成一个关于认识颜色的题目,包括红、黄、蓝、绿等基本颜色,要求联系实际生活中的物品"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "认识形状",
|
||||
"difficulty": 1,
|
||||
"promptTemplate": "生成一个关于认识形状的题目,例如圆形、方形、三角形等,要求联系实际生活中的物品"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "数数练习",
|
||||
"difficulty": 1,
|
||||
"promptTemplate": "生成一个关于数数的题目,要求10以内的数量,联系实际场景如水果、玩具等"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "身体部位",
|
||||
"difficulty": 1,
|
||||
"promptTemplate": "生成一个关于认识身体部位的题目,例如手、脚、头、眼睛、耳朵等,要求描述部位的功能"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "穿衣自理",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于穿衣自理的题目,要求描述穿衣的步骤和注意事项"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "整理物品",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于整理物品的题目,例如整理书包、玩具、房间等,要求强调整理的重要性"
|
||||
},
|
||||
{
|
||||
"subject": "生活语文",
|
||||
"grade": 1,
|
||||
"topic": "认识汉字",
|
||||
"difficulty": 1,
|
||||
"promptTemplate": "生成一个关于认识简单汉字的题目,要求选择日常生活中常用的高频汉字"
|
||||
},
|
||||
{
|
||||
"subject": "生活语文",
|
||||
"grade": 1,
|
||||
"topic": "简单句子理解",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于简单句子理解的题目,要求句子简短,贴近生活场景"
|
||||
},
|
||||
{
|
||||
"subject": "生活语文",
|
||||
"grade": 1,
|
||||
"topic": "词语配对",
|
||||
"difficulty": 1,
|
||||
"promptTemplate": "生成一个关于词语配对的题目,例如苹果-水果、猫-动物等,要求逻辑关系简单明了"
|
||||
},
|
||||
{
|
||||
"subject": "生活语文",
|
||||
"grade": 1,
|
||||
"topic": "看图说话",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个看图说话的题目,用文字描述一个简单场景,让学生用一两句话描述看到的内容"
|
||||
},
|
||||
{
|
||||
"subject": "生活语文",
|
||||
"grade": 1,
|
||||
"topic": "反义词",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于反义词的题目,例如大-小、高-矮、快-慢等,要求选择常用的反义词对"
|
||||
},
|
||||
{
|
||||
"subject": "生活语文",
|
||||
"grade": 1,
|
||||
"topic": "量词使用",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于量词使用的题目,例如一个苹果、一本书、一只猫等,要求选择常见的量词搭配"
|
||||
},
|
||||
{
|
||||
"subject": "生活语文",
|
||||
"grade": 1,
|
||||
"topic": "标点符号",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于标点符号的题目,主要涉及句号、问号、感叹号的基本使用"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "认识数字",
|
||||
"difficulty": 1,
|
||||
"promptTemplate": "生成一个关于认识数字的题目,要求1-10的数字识别和书写"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "简单加减法",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于简单加减法的题目,要求10以内的加减法,联系实际场景"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "认识钱币",
|
||||
"difficulty": 3,
|
||||
"promptTemplate": "生成一个关于认识钱币的题目,包括元、角、分的认识,要求联系实际购物场景"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "认识地图",
|
||||
"difficulty": 3,
|
||||
"promptTemplate": "生成一个关于认识简单地图的题目,例如家庭、学校的平面图,要求理解基本方位"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "紧急情况",
|
||||
"difficulty": 3,
|
||||
"promptTemplate": "生成一个关于紧急情况处理的题目,例如着火、地震、迷路等,要求强调安全自救知识"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "公共场合行为",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于公共场合行为规范的题目,例如图书馆、医院、公交车等场景的礼仪"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "认识职业",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于认识常见职业的题目,例如医生、老师、警察、消防员等,要求描述职业的工作内容"
|
||||
},
|
||||
{
|
||||
"subject": "生活适应",
|
||||
"grade": 1,
|
||||
"topic": "节日文化",
|
||||
"difficulty": 2,
|
||||
"promptTemplate": "生成一个关于传统节日的题目,例如春节、中秋节、端午节等,要求介绍节日的习俗和意义"
|
||||
}
|
||||
]
|
||||
@@ -6,6 +6,7 @@ import android.content.Context
|
||||
import android.content.pm.PackageManager
|
||||
import android.content.res.ColorStateList
|
||||
import android.graphics.Color
|
||||
import android.graphics.BitmapFactory
|
||||
import android.os.Build
|
||||
import android.os.Bundle
|
||||
import android.os.Handler
|
||||
@@ -16,6 +17,7 @@ import android.util.Log
|
||||
import android.view.MotionEvent
|
||||
import android.view.ViewGroup
|
||||
import android.widget.Button
|
||||
import android.widget.ImageView
|
||||
import android.widget.TextView
|
||||
import android.widget.Toast
|
||||
import androidx.core.app.ActivityCompat
|
||||
@@ -25,7 +27,6 @@ import android.view.View
|
||||
import androidx.lifecycle.Lifecycle
|
||||
import androidx.lifecycle.LifecycleOwner
|
||||
import androidx.lifecycle.LifecycleRegistry
|
||||
import android.widget.ImageView
|
||||
import com.unity3d.player.UnityPlayer
|
||||
import com.unity3d.player.UnityPlayerActivity
|
||||
import com.digitalperson.audio.AudioProcessor
|
||||
@@ -33,6 +34,7 @@ import com.digitalperson.asr.AsrManager
|
||||
import com.digitalperson.cloud.CloudApiManager
|
||||
import com.digitalperson.cloud.CloudReflectionHelper
|
||||
import com.digitalperson.config.AppConfig
|
||||
import com.digitalperson.embedding.RefImageMatcher
|
||||
import com.digitalperson.question.QuestionGenerationAgent
|
||||
import com.digitalperson.data.AppDatabase
|
||||
import com.digitalperson.face.FaceDetectionPipeline
|
||||
@@ -48,8 +50,6 @@ import com.digitalperson.tts.TtsController
|
||||
import com.digitalperson.util.FileHelper
|
||||
import com.digitalperson.vad.VadManager
|
||||
import kotlinx.coroutines.*
|
||||
import com.digitalperson.embedding.RefImageMatcher
|
||||
import android.graphics.BitmapFactory
|
||||
|
||||
class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
|
||||
|
||||
@@ -260,6 +260,11 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
|
||||
recordButtonGlow = chatLayout.findViewById(R.id.record_button_glow)
|
||||
refMatchImageView = chatLayout.findViewById(R.id.ref_match_image)
|
||||
|
||||
if (!AppConfig.SHOW_DEBUG_TEXT) {
|
||||
chatHistoryText.visibility = View.GONE
|
||||
chatLayout.findViewById<View>(R.id.scroll_view).visibility = View.GONE
|
||||
}
|
||||
|
||||
// 根据配置设置按钮可见性
|
||||
if (AppConfig.USE_HOLD_TO_SPEAK) {
|
||||
holdToSpeakButton.visibility = View.VISIBLE
|
||||
@@ -366,6 +371,8 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
|
||||
override fun onLlmCalled(text: String) {
|
||||
Log.d("UnityDigitalPerson", "LLM called with: $text")
|
||||
interactionCoordinator.onUserAsrText(text)
|
||||
// 用用户问题提前匹配:比等 LLM 回复更早显示图片(模拟器/真机通用)
|
||||
maybeShowMatchedRefImage(text)
|
||||
}
|
||||
})
|
||||
setAudioProcessor(audioProcessor)
|
||||
@@ -664,6 +671,7 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
|
||||
}
|
||||
|
||||
private fun appendChat(text: String) {
|
||||
if (!AppConfig.SHOW_DEBUG_TEXT) return
|
||||
runOnUiThread {
|
||||
chatHistoryText.append(text + "\n")
|
||||
}
|
||||
@@ -696,6 +704,8 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
|
||||
override fun onSpeak(text: String) {
|
||||
ttsController.enqueueSegment(text)
|
||||
ttsController.enqueueEnd()
|
||||
// 主动发言(问候/主动提问)也尝试匹配参考图片
|
||||
maybeShowMatchedRefImage(text)
|
||||
}
|
||||
|
||||
override fun onRequestCloudReply(prompt: String) {
|
||||
@@ -759,13 +769,22 @@ class UnityDigitalPersonActivity : UnityPlayerActivity(), LifecycleOwner {
|
||||
|
||||
private fun maybeShowMatchedRefImage(text: String) {
|
||||
val imageView = refMatchImageView ?: return
|
||||
// Unity Activity already has coroutines
|
||||
CoroutineScope(SupervisorJob() + Dispatchers.IO).launch {
|
||||
// 每次匹配前先清掉上一张图
|
||||
runOnUiThread {
|
||||
imageView.setImageBitmap(null)
|
||||
imageView.visibility = View.GONE
|
||||
}
|
||||
ioScope.launch {
|
||||
val match = RefImageMatcher.findBestMatch(applicationContext, text)
|
||||
if (match == null) return@launch
|
||||
if (match == null) {
|
||||
Log.d("RefImageMatch", "未找到匹配图片 query=\"${text.take(80)}\"")
|
||||
return@launch
|
||||
}
|
||||
Log.d("RefImageMatch", "匹配成功 score=${match.score} path=${match.pngAssetPath} query=\"${text.take(80)}\"")
|
||||
val bitmap = try {
|
||||
assets.open(match.pngAssetPath).use { BitmapFactory.decodeStream(it) }
|
||||
} catch (_: Throwable) {
|
||||
} catch (e: Throwable) {
|
||||
Log.w("RefImageMatch", "图片加载失败 path=${match.pngAssetPath}", e)
|
||||
null
|
||||
}
|
||||
if (bitmap == null) return@launch
|
||||
|
||||
@@ -6,6 +6,7 @@ import android.util.Log
|
||||
import com.digitalperson.BuildConfig
|
||||
import com.digitalperson.audio.AudioProcessor
|
||||
import com.digitalperson.config.AppConfig
|
||||
import com.digitalperson.env.RuntimeEnv
|
||||
import com.digitalperson.engine.SenseVoiceEngineRKNN
|
||||
import com.digitalperson.util.FileHelper
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
@@ -23,7 +24,6 @@ class AsrManager(private val context: Context) {
|
||||
|
||||
private var senseVoice: SenseVoiceEngineRKNN? = null
|
||||
private val nativeLock = Any()
|
||||
|
||||
private val asrQueue = Channel<Pair<FloatArray, FloatArray>>(capacity = Channel.UNLIMITED)
|
||||
|
||||
private var audioProcessor: AudioProcessor? = null
|
||||
@@ -48,6 +48,10 @@ class AsrManager(private val context: Context) {
|
||||
}
|
||||
|
||||
fun initSenseVoiceModel(): Boolean {
|
||||
if (RuntimeEnv.isEmulator()) {
|
||||
Log.w(TAG, "ASR: emulator detected; skip local RKNN init and use cloud ASR")
|
||||
return false
|
||||
}
|
||||
return try {
|
||||
Log.i(TAG, "ASR: init SenseVoice RKNN (scheme A)")
|
||||
|
||||
@@ -133,23 +137,47 @@ class AsrManager(private val context: Context) {
|
||||
Log.d(TAG, "ASR started: processing audio segment")
|
||||
|
||||
saveAsrAudio(originalSeg, processedSeg)
|
||||
|
||||
val raw = synchronized(nativeLock) {
|
||||
|
||||
val localText = synchronized(nativeLock) {
|
||||
val e = senseVoice
|
||||
if (e == null || !e.isInitialized) {
|
||||
Log.e(TAG, "ASR failed: SenseVoice engine not initialized")
|
||||
""
|
||||
} else {
|
||||
try {
|
||||
e.transcribeBuffer(processedSeg)
|
||||
} catch (e: Throwable) {
|
||||
Log.e(TAG, "ASR transcribe failed: ${e.message}")
|
||||
removeTokens(e.transcribeBuffer(processedSeg))
|
||||
} catch (t: Throwable) {
|
||||
Log.e(TAG, "ASR transcribe failed: ${t.message}")
|
||||
""
|
||||
}
|
||||
}
|
||||
}.trim()
|
||||
|
||||
val text = if (localText.isNotBlank()) {
|
||||
localText
|
||||
} else {
|
||||
// 模拟器或本地 RKNN 未就绪:使用腾讯云「一句话识别」SDK(app/libs/asr-one-sentence-release.aar)
|
||||
val shouldTryTencent =
|
||||
BuildConfig.HAS_TENCENT_ASR_SDK && (RuntimeEnv.isEmulator() || !isInitialized())
|
||||
if (!shouldTryTencent) {
|
||||
Log.e(
|
||||
TAG,
|
||||
"ASR failed: local RKNN not ready and Tencent SDK unavailable " +
|
||||
"(add libs/asr-one-sentence-release.aar or fix SenseVoice init)"
|
||||
)
|
||||
""
|
||||
} else {
|
||||
withContext(Dispatchers.IO) {
|
||||
try {
|
||||
// 云端 ASR 使用原始录音(未经 AEC/NS):
|
||||
// 模拟器上 AEC/NS 不可用,processedSeg 可能被处理成近似静音
|
||||
TencentOneSentenceAsr.transcribePcm16Mono(originalSeg)
|
||||
} catch (t: Throwable) {
|
||||
Log.e(TAG, "Tencent ASR failed: ${t.message}")
|
||||
""
|
||||
}
|
||||
}.trim()
|
||||
}
|
||||
}
|
||||
Log.d(TAG, "ASR raw result: $raw")
|
||||
val text = removeTokens(raw)
|
||||
|
||||
val filterResult = filterText(text)
|
||||
if (filterResult != null) {
|
||||
@@ -220,4 +248,5 @@ class AsrManager(private val context: Context) {
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
216
app/src/main/java/com/digitalperson/asr/TencentOneSentenceAsr.kt
Normal file
216
app/src/main/java/com/digitalperson/asr/TencentOneSentenceAsr.kt
Normal file
@@ -0,0 +1,216 @@
|
||||
package com.digitalperson.asr
|
||||
|
||||
import android.util.Base64
|
||||
import android.util.Log
|
||||
import com.digitalperson.config.AppConfig
|
||||
import okhttp3.MediaType.Companion.toMediaType
|
||||
import okhttp3.OkHttpClient
|
||||
import okhttp3.Request
|
||||
import okhttp3.RequestBody.Companion.toRequestBody
|
||||
import org.json.JSONObject
|
||||
import java.nio.ByteBuffer
|
||||
import java.nio.ByteOrder
|
||||
import java.security.MessageDigest
|
||||
import java.text.SimpleDateFormat
|
||||
import java.util.Date
|
||||
import java.util.Locale
|
||||
import java.util.TimeZone
|
||||
import java.util.concurrent.TimeUnit
|
||||
import javax.crypto.Mac
|
||||
import javax.crypto.spec.SecretKeySpec
|
||||
|
||||
/**
|
||||
* 腾讯云「一句话识别」REST API 直接实现(TC3-HMAC-SHA256 签名)。
|
||||
*
|
||||
* 不依赖 SDK AAR,而是用 OkHttp 自行签名并发起 HTTP 请求。
|
||||
* 签名时间戳从服务器 Date 响应头获取,彻底规避模拟器时钟偏差导致的
|
||||
* AuthFailure.SignatureExpire 错误。
|
||||
*
|
||||
* 文档:https://cloud.tencent.com/document/product/1093/35646
|
||||
*/
|
||||
object TencentOneSentenceAsr {
|
||||
|
||||
private const val TAG = "TencentOneSentenceAsr"
|
||||
private const val HOST = "asr.tencentcloudapi.com"
|
||||
private const val ACTION = "SentenceRecognition"
|
||||
private const val VERSION = "2019-06-14"
|
||||
|
||||
private val client = OkHttpClient.Builder()
|
||||
.connectTimeout(10, TimeUnit.SECONDS)
|
||||
.readTimeout(30, TimeUnit.SECONDS)
|
||||
.build()
|
||||
|
||||
/**
|
||||
* 将 FloatArray (16kHz mono, -1..1) 通过腾讯云一句话识别转为文字。
|
||||
* 阻塞直到 HTTP 响应返回或超时。请在 IO 线程中调用。
|
||||
*/
|
||||
fun transcribePcm16Mono(pcmFloat: FloatArray): String {
|
||||
val appId = AppConfig.QCloud.APP_ID.trim()
|
||||
val sid = AppConfig.QCloud.SECRET_ID.trim()
|
||||
val skey = AppConfig.QCloud.SECRET_KEY.trim()
|
||||
if (appId.isEmpty() || sid.isEmpty() || skey.isEmpty()) {
|
||||
Log.e(TAG, "APP_ID / SECRET_ID / SECRET_KEY 为空")
|
||||
return ""
|
||||
}
|
||||
if (pcmFloat.isEmpty()) return ""
|
||||
|
||||
val pcmBytes = floatToPcm16Bytes(pcmFloat)
|
||||
val pcmBase64 = Base64.encodeToString(pcmBytes, Base64.NO_WRAP)
|
||||
|
||||
// 诊断:检查音频幅度,若 RMS 接近 0 说明麦克风没采集到声音
|
||||
val rms = kotlin.math.sqrt(pcmFloat.fold(0.0) { acc, v -> acc + v * v } / pcmFloat.size)
|
||||
val maxAmp = pcmFloat.maxOf { kotlin.math.abs(it) }
|
||||
Log.d(TAG, "一句话识别:${pcmFloat.size} 采样点,${pcmFloat.size / 16000.0}s,${pcmBytes.size} bytes RMS=${"%.4f".format(rms)} maxAmp=${"%.4f".format(maxAmp)}")
|
||||
if (maxAmp < 0.01f) {
|
||||
Log.w(TAG, "⚠ 音频幅度极低(maxAmp=${"%.5f".format(maxAmp)}),模拟器麦克风可能没有采集到声音!请检查:模拟器扩展控制 → 麦克风 → 使用宿主机麦克风")
|
||||
}
|
||||
|
||||
// 从服务器取时间,修正模拟器时钟偏差
|
||||
val timestamp = fetchServerTimestamp()
|
||||
val date = utcDate(timestamp)
|
||||
|
||||
val payload = buildPayload(appId, pcmBase64, pcmBytes.size)
|
||||
val auth = buildAuthorization(sid, skey, date, timestamp, payload)
|
||||
|
||||
val request = Request.Builder()
|
||||
.url("https://$HOST")
|
||||
.addHeader("Authorization", auth)
|
||||
.addHeader("Content-Type", "application/json; charset=utf-8")
|
||||
.addHeader("Host", HOST)
|
||||
.addHeader("X-TC-Action", ACTION)
|
||||
.addHeader("X-TC-Version", VERSION)
|
||||
.addHeader("X-TC-Timestamp", timestamp.toString())
|
||||
.post(payload.toRequestBody("application/json; charset=utf-8".toMediaType()))
|
||||
.build()
|
||||
|
||||
return try {
|
||||
val response = client.newCall(request).execute()
|
||||
val body = response.body?.string().orEmpty()
|
||||
Log.d(TAG, "API 响应: ${body.take(400)}")
|
||||
parseResult(body)
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "HTTP 请求失败: ${e.message}", e)
|
||||
""
|
||||
}
|
||||
}
|
||||
|
||||
// ─── 工具方法 ──────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* 向服务器发送 HEAD 请求,从 Date 响应头获取精确时间戳。
|
||||
* 若请求失败则回退到设备时钟(可能有偏差)。
|
||||
*/
|
||||
private fun fetchServerTimestamp(): Long {
|
||||
return try {
|
||||
val req = Request.Builder().url("https://$HOST").head().build()
|
||||
val resp = client.newCall(req).execute()
|
||||
val dateHeader = resp.header("Date")
|
||||
resp.close()
|
||||
if (dateHeader != null) {
|
||||
val sdf = SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.ENGLISH)
|
||||
val serverTs = sdf.parse(dateHeader)?.time?.div(1000) ?: deviceTimestamp()
|
||||
val deviceTs = deviceTimestamp()
|
||||
val offset = serverTs - deviceTs
|
||||
if (kotlin.math.abs(offset) > 60) {
|
||||
Log.w(TAG, "设备时钟偏差 ${offset}s,使用服务器时间修正(设备=${deviceTs}, 服务器=${serverTs})")
|
||||
}
|
||||
serverTs
|
||||
} else {
|
||||
deviceTimestamp()
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
Log.w(TAG, "获取服务器时间失败: ${e.message},使用设备时间")
|
||||
deviceTimestamp()
|
||||
}
|
||||
}
|
||||
|
||||
private fun deviceTimestamp() = System.currentTimeMillis() / 1000
|
||||
|
||||
private fun utcDate(timestamp: Long): String {
|
||||
val sdf = SimpleDateFormat("yyyy-MM-dd", Locale.US)
|
||||
sdf.timeZone = TimeZone.getTimeZone("UTC")
|
||||
return sdf.format(Date(timestamp * 1000))
|
||||
}
|
||||
|
||||
private fun buildPayload(appId: String, base64: String, dataLen: Int): String =
|
||||
JSONObject().apply {
|
||||
put("ProjectId", 0)
|
||||
put("SubServiceType", 2)
|
||||
put("EngSerViceType", "16k_zh")
|
||||
put("SourceType", 1) // 1 = 数据流
|
||||
put("VoiceFormat", "pcm")
|
||||
put("UsrAudioKey", "digital-person-asr")
|
||||
put("FilterDirty", 0)
|
||||
put("FilterModal", 0)
|
||||
put("FilterPunc", 0)
|
||||
put("ConvertNumMode", 1)
|
||||
put("Data", base64)
|
||||
put("DataLen", dataLen)
|
||||
}.toString()
|
||||
|
||||
// ─── TC3-HMAC-SHA256 签名 ──────────────────────────────────────────────
|
||||
|
||||
private fun buildAuthorization(
|
||||
secretId: String,
|
||||
secretKey: String,
|
||||
date: String,
|
||||
timestamp: Long,
|
||||
payload: String,
|
||||
): String {
|
||||
val payloadHash = sha256Hex(payload)
|
||||
val canonicalRequest = listOf(
|
||||
"POST", "/", "",
|
||||
"content-type:application/json; charset=utf-8",
|
||||
"host:$HOST",
|
||||
"",
|
||||
"content-type;host",
|
||||
payloadHash,
|
||||
).joinToString("\n")
|
||||
|
||||
val credentialScope = "$date/asr/tc3_request"
|
||||
val stringToSign = "TC3-HMAC-SHA256\n$timestamp\n$credentialScope\n${sha256Hex(canonicalRequest)}"
|
||||
|
||||
val signingKey = hmacSha256(
|
||||
hmacSha256(hmacSha256("TC3$secretKey".toByteArray(), date), "asr"),
|
||||
"tc3_request",
|
||||
)
|
||||
val signature = hmacSha256(signingKey, stringToSign).joinToString("") { "%02x".format(it) }
|
||||
|
||||
return "TC3-HMAC-SHA256 Credential=$secretId/$credentialScope, SignedHeaders=content-type;host, Signature=$signature"
|
||||
}
|
||||
|
||||
private fun parseResult(json: String): String {
|
||||
if (json.isBlank()) return ""
|
||||
return try {
|
||||
val response = JSONObject(json).optJSONObject("Response") ?: return ""
|
||||
val error = response.optJSONObject("Error")
|
||||
if (error != null) {
|
||||
Log.e(TAG, "API 错误: ${error.optString("Code")} - ${error.optString("Message")}")
|
||||
return ""
|
||||
}
|
||||
response.optString("Result").also { text ->
|
||||
if (text.isNotBlank()) Log.d(TAG, "识别结果: \"$text\"")
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
Log.w(TAG, "解析响应失败: ${json.take(300)}")
|
||||
""
|
||||
}
|
||||
}
|
||||
|
||||
private fun sha256Hex(data: String): String {
|
||||
val md = MessageDigest.getInstance("SHA-256")
|
||||
return md.digest(data.toByteArray(Charsets.UTF_8)).joinToString("") { "%02x".format(it) }
|
||||
}
|
||||
|
||||
private fun hmacSha256(key: ByteArray, data: String): ByteArray {
|
||||
val mac = Mac.getInstance("HmacSHA256")
|
||||
mac.init(SecretKeySpec(key, "HmacSHA256"))
|
||||
return mac.doFinal(data.toByteArray(Charsets.UTF_8))
|
||||
}
|
||||
|
||||
private fun floatToPcm16Bytes(samples: FloatArray): ByteArray {
|
||||
val buf = ByteBuffer.allocate(samples.size * 2).order(ByteOrder.LITTLE_ENDIAN)
|
||||
samples.forEach { buf.putShort((it.coerceIn(-1f, 1f) * 32767f).toInt().toShort()) }
|
||||
return buf.array()
|
||||
}
|
||||
}
|
||||
@@ -116,6 +116,19 @@ object AppConfig {
|
||||
const val MODEL_FILE = "bge-small-zh-v1.5.rknn"
|
||||
}
|
||||
|
||||
/**
|
||||
* 模拟器上 [RefImageMatcher] 使用编辑距离时的最低归一化分(与 BGE 余弦阈值不可混用)。
|
||||
* 分数 = 1 - Levenshtein / max(len),越接近 1 越像。
|
||||
*/
|
||||
object RefMatchEmulator {
|
||||
/**
|
||||
* 模拟器混合评分(路径关键词命中率 + 编辑距离)阈值。
|
||||
* 路径关键词:1 个词命中 ≈ 0.25,已足够确认话题相关性。
|
||||
* 原 0.82 是纯编辑距离阈值,字面差异大时根本达不到,故降至 0.20。
|
||||
*/
|
||||
const val MIN_NORMALIZED_EDIT_SCORE = 0.20f
|
||||
}
|
||||
|
||||
/**
|
||||
* app/note/ref 通过 Gradle 额外 assets 目录打入 apk 后,在 assets 中的根路径为 `ref/`。
|
||||
*/
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
package com.digitalperson.embedding
|
||||
|
||||
import kotlin.math.max
|
||||
import kotlin.math.min
|
||||
|
||||
/**
|
||||
* 基于 Levenshtein 的字符级相似度(模拟器兜底,无语义,仅用于联调/演示)。
|
||||
*
|
||||
* 分数:1 - dist / max(len1, len2),与余弦相似度不可直接对比阈值。
|
||||
*/
|
||||
object EditDistanceSimilarity {
|
||||
|
||||
fun normalizedScore(a: String, b: String): Float {
|
||||
val s1 = a.trim()
|
||||
val s2 = b.trim()
|
||||
if (s1.isEmpty() && s2.isEmpty()) return 1f
|
||||
if (s1.isEmpty() || s2.isEmpty()) return 0f
|
||||
val dist = levenshtein(s1, s2)
|
||||
val denom = max(s1.length, s2.length).coerceAtLeast(1)
|
||||
return 1f - dist.toFloat() / denom.toFloat()
|
||||
}
|
||||
|
||||
/**
|
||||
* 经典双行 DP,O(n·m);仅适用于模拟器上中等规模语料。
|
||||
*/
|
||||
fun levenshtein(s1: String, s2: String): Int {
|
||||
val n = s1.length
|
||||
val m = s2.length
|
||||
if (n == 0) return m
|
||||
if (m == 0) return n
|
||||
var prev = IntArray(m + 1) { it }
|
||||
var curr = IntArray(m + 1)
|
||||
for (i in 1..n) {
|
||||
curr[0] = i
|
||||
val c1 = s1[i - 1]
|
||||
for (j in 1..m) {
|
||||
val cost = if (c1 == s2[j - 1]) 0 else 1
|
||||
curr[j] = min(
|
||||
min(prev[j] + 1, curr[j - 1] + 1),
|
||||
prev[j - 1] + cost
|
||||
)
|
||||
}
|
||||
val tmp = prev
|
||||
prev = curr
|
||||
curr = tmp
|
||||
}
|
||||
return prev[m]
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,7 @@ import android.content.Context
|
||||
import android.util.Log
|
||||
import com.digitalperson.config.AppConfig
|
||||
import com.digitalperson.data.AppDatabase
|
||||
import com.digitalperson.data.dao.QuestionDao
|
||||
import com.digitalperson.data.entity.Question
|
||||
import com.digitalperson.data.entity.RefTextEmbedding
|
||||
import com.digitalperson.data.util.floatArrayToEmbeddingBytes
|
||||
@@ -27,15 +28,15 @@ object RefEmbeddingIndexer {
|
||||
val dao = db.refTextEmbeddingDao()
|
||||
val questionDao = db.questionDao()
|
||||
|
||||
if (!BgeEmbedding.initialize(app)) {
|
||||
Log.e(TAG, "[RefEmbed] BGE 初始化失败,跳过 ref 语料索引")
|
||||
return@withContext
|
||||
}
|
||||
|
||||
val root = AppConfig.RefCorpus.ASSETS_ROOT
|
||||
val paths = RefCorpusAssetScanner.listTxtFilesUnder(app, root)
|
||||
Log.i(TAG, "[RefEmbed] 发现 ${paths.size} 个 txt(root=$root)")
|
||||
|
||||
val bgeOk = BgeEmbedding.initialize(app)
|
||||
if (!bgeOk) {
|
||||
Log.w(TAG, "[RefEmbed] BGE 未就绪(常见于模拟器),仅扫描题库;ref 配图匹配可用编辑距离")
|
||||
}
|
||||
|
||||
var skipped = 0
|
||||
var embedded = 0
|
||||
var empty = 0
|
||||
@@ -50,28 +51,9 @@ object RefEmbeddingIndexer {
|
||||
continue
|
||||
}
|
||||
|
||||
// 题库:遇到包含 ?/? 的行,写入 questions
|
||||
val subject = extractSubjectFromRaw(raw)
|
||||
val grade = extractGradeFromPath(path)
|
||||
val questionLines = extractQuestionLines(raw)
|
||||
for (line in questionLines) {
|
||||
val content = line.trim()
|
||||
if (content.isEmpty()) continue
|
||||
val exists = questionDao.findByContentSubjectGrade(content, subject, grade)
|
||||
if (exists == null) {
|
||||
questionDao.insert(
|
||||
Question(
|
||||
id = 0,
|
||||
content = content,
|
||||
answer = null,
|
||||
subject = subject,
|
||||
grade = grade,
|
||||
difficulty = 1,
|
||||
createdAt = System.currentTimeMillis()
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
ingestQuestionsFromRaw(raw, path, questionDao)
|
||||
|
||||
if (!bgeOk) continue
|
||||
|
||||
val embedText = RefTxtEmbedText.fromRawFileContent(raw)
|
||||
if (embedText.isEmpty()) {
|
||||
@@ -110,10 +92,34 @@ object RefEmbeddingIndexer {
|
||||
|
||||
Log.i(
|
||||
TAG,
|
||||
"[RefEmbed] 完成 embedded=$embedded skipped=$skipped empty=$empty failed=$failed cacheSize=${RefEmbeddingMemoryCache.size()}"
|
||||
"[RefEmbed] 完成 embedded=$embedded skipped=$skipped empty=$empty failed=$failed cacheSize=${RefEmbeddingMemoryCache.size()} bgeOk=$bgeOk"
|
||||
)
|
||||
}
|
||||
|
||||
private fun ingestQuestionsFromRaw(raw: String, path: String, questionDao: QuestionDao) {
|
||||
val subject = extractSubjectFromRaw(raw)
|
||||
val grade = extractGradeFromPath(path)
|
||||
val questionLines = extractQuestionLines(raw)
|
||||
for (line in questionLines) {
|
||||
val content = line.trim()
|
||||
if (content.isEmpty()) continue
|
||||
val exists = questionDao.findByContentSubjectGrade(content, subject, grade)
|
||||
if (exists == null) {
|
||||
questionDao.insert(
|
||||
Question(
|
||||
id = 0,
|
||||
content = content,
|
||||
answer = null,
|
||||
subject = subject,
|
||||
grade = grade,
|
||||
difficulty = 1,
|
||||
createdAt = System.currentTimeMillis()
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun extractSubjectFromRaw(raw: String): String? {
|
||||
val line = raw.lineSequence()
|
||||
.map { it.trimEnd() }
|
||||
|
||||
@@ -3,6 +3,7 @@ package com.digitalperson.embedding
|
||||
import android.content.Context
|
||||
import android.util.Log
|
||||
import com.digitalperson.config.AppConfig
|
||||
import com.digitalperson.env.RuntimeEnv
|
||||
import kotlin.math.sqrt
|
||||
|
||||
data class RefImageMatch(
|
||||
@@ -16,7 +17,8 @@ object RefImageMatcher {
|
||||
private const val TAG = AppConfig.TAG
|
||||
|
||||
/**
|
||||
* @param threshold 余弦相似度阈值(向量已归一化时等价于 dot product)。
|
||||
* @param threshold 真机 BGE:余弦相似度阈值(向量已归一化时等价于 dot product)。
|
||||
* 模拟器:忽略该参数,使用 [AppConfig.RefMatchEmulator.MIN_NORMALIZED_EDIT_SCORE](编辑距离归一化分)。
|
||||
*/
|
||||
fun findBestMatch(
|
||||
context: Context,
|
||||
@@ -26,6 +28,10 @@ object RefImageMatcher {
|
||||
val query = text.trim()
|
||||
if (query.isEmpty()) return null
|
||||
|
||||
if (RuntimeEnv.isEmulator()) {
|
||||
return findBestMatchEditDistance(context, query)
|
||||
}
|
||||
|
||||
if (!BgeEmbedding.isReady()) {
|
||||
val ok = BgeEmbedding.initialize(context.applicationContext)
|
||||
if (!ok) {
|
||||
@@ -78,6 +84,203 @@ object RefImageMatcher {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* 模拟器:不加载 BGE,用**路径关键词命中率**(主)+ 编辑距离(辅)混合评分。
|
||||
*
|
||||
* 路径关键词:取最深目录名按 "-" 分割,如 "一年级上-生活适应-社会生活-元旦"
|
||||
* → ["一年级上", "生活适应", "社会生活", "元旦"]。
|
||||
* 命中率 = 命中数 / 关键词总数(1 个词命中 ≈ 0.25,足以通过 0.20 阈值)。
|
||||
*
|
||||
* 纯编辑距离原先用 0.82 阈值,但 LLM 回复文本与参考短句字面差异很大,
|
||||
* 即使话题相同也难达标;改为关键词方案后准确率大幅提升。
|
||||
*/
|
||||
/** 模拟器混合匹配逻辑;对 [androidTest] 暴露以便回归「本应命中却未命中」的用例。 */
|
||||
internal fun findBestMatchEditDistance(context: Context, query: String): RefImageMatch? {
|
||||
val app = context.applicationContext
|
||||
val root = AppConfig.RefCorpus.ASSETS_ROOT
|
||||
val paths = RefCorpusAssetScanner.listTxtFilesUnder(app, root)
|
||||
val minScore = AppConfig.RefMatchEmulator.MIN_NORMALIZED_EDIT_SCORE
|
||||
val qNorm = normalizeTextForEmuMatch(query.trim())
|
||||
if (qNorm.isEmpty()) return null
|
||||
|
||||
var bestPath: String? = null
|
||||
var bestScore = -1f
|
||||
var bestSubstr = -1f
|
||||
var bestEdit = -1f
|
||||
|
||||
for (path in paths) {
|
||||
// 主:路径关键词命中率(无 IO,O(1))
|
||||
val kwScore = pathKeywordMatchScore(path, qNorm)
|
||||
|
||||
// 辅:内容匹配(有 IO,仅在关键词有命中或尚无候选时读取)
|
||||
// 策略:① 子串包含(query 句子 ⊆ candidate 或 candidate 句子 ⊆ query 句子)
|
||||
// ② 逐句编辑距离
|
||||
// candidate 是 txt 去掉 # 行后的全文(可能同时含问题和答案),
|
||||
// query 中某句若直接出现在 candidate 里,说明话题完全命中。
|
||||
// 必须对每条 txt 做内容打分:若仅在 bestScore<0 或 kwScore>0 时才读盘,
|
||||
// 会先被其它文件的弱编辑分「占坑」,导致题干与某文件完全一致却从未被打开(如「上厕所」目录 kw 未命中但正文含原句)。
|
||||
var substrScore = 0f
|
||||
var editScore = 0f
|
||||
try {
|
||||
val raw = app.assets.open(path).bufferedReader(Charsets.UTF_8).use { it.readText() }
|
||||
val candidate = normalizeTextForEmuMatch(RefTxtEmbedText.fromRawFileContent(raw))
|
||||
if (candidate.isNotEmpty()) {
|
||||
val querySentences = splitSentences(qNorm)
|
||||
val candidateSentences = splitSentences(candidate)
|
||||
// ① 子串:query 句 ⊆ candidate,或 candidate 句 ⊆ query 句。
|
||||
// 极短片段(如「小朋友」)在多篇课文里都有,一律给高分会错配;按匹配长度分级。
|
||||
substrScore = querySentences.maxOfOrNull { qs ->
|
||||
var s = 0f
|
||||
if (qs.length >= 4 && candidate.contains(qs)) {
|
||||
s = maxOf(s, emulatorSubstringScoreForLength(qs.length))
|
||||
}
|
||||
for (cs in candidateSentences) {
|
||||
if (cs.length >= 6 && qs.contains(cs)) {
|
||||
s = maxOf(s, emulatorSubstringScoreForLength(cs.length) * 0.92f)
|
||||
}
|
||||
}
|
||||
s
|
||||
} ?: 0f
|
||||
// ② 编辑距离(逐句 vs 逐句,取最高分)
|
||||
editScore = querySentences.maxOfOrNull { qs ->
|
||||
candidateSentences.maxOfOrNull { cs ->
|
||||
EditDistanceSimilarity.normalizedScore(qs, cs)
|
||||
} ?: 0f
|
||||
} ?: 0f
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
Log.w(TAG, "[RefMatchEmu] read fail $path: ${e.message}")
|
||||
}
|
||||
|
||||
val score = maxOf(kwScore, substrScore, editScore)
|
||||
if (score > 0f) {
|
||||
Log.v(TAG, "[RefMatchEmu] candidate score=$score (kw=$kwScore substr=$substrScore edit=$editScore) path=$path")
|
||||
}
|
||||
if (isBetterEmulatorCandidate(score, substrScore, editScore, bestScore, bestSubstr, bestEdit)) {
|
||||
bestScore = score
|
||||
bestSubstr = substrScore
|
||||
bestEdit = editScore
|
||||
bestPath = path
|
||||
}
|
||||
}
|
||||
|
||||
val txtPath = bestPath ?: run {
|
||||
Log.d(TAG, "[RefMatchEmu] 无候选文件 query=${qNorm.take(60)}")
|
||||
return null
|
||||
}
|
||||
if (bestScore < minScore) {
|
||||
Log.d(TAG, "[RefMatchEmu] 分数不足 bestScore=$bestScore minScore=$minScore bestPath=$txtPath query=${qNorm.take(60)}")
|
||||
return null
|
||||
}
|
||||
|
||||
val pngPath = if (txtPath.endsWith(".txt", ignoreCase = true)) {
|
||||
txtPath.dropLast(4) + ".png"
|
||||
} else {
|
||||
"$txtPath.png"
|
||||
}
|
||||
val exists = try {
|
||||
context.assets.open(pngPath).close()
|
||||
true
|
||||
} catch (_: Throwable) {
|
||||
false
|
||||
}
|
||||
if (!exists) return null
|
||||
|
||||
Log.d(TAG, "[RefMatchEmu] best=$txtPath score=$bestScore query=${qNorm.take(30)}")
|
||||
return RefImageMatch(
|
||||
txtAssetPath = txtPath,
|
||||
pngAssetPath = pngPath,
|
||||
score = bestScore
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* 从文件路径提取话题关键词,计算与查询文本的关键词命中率。
|
||||
* 如路径含目录 "一年级上-生活适应-社会生活-元旦" → 关键词 ["一年级上","生活适应","社会生活","元旦"]。
|
||||
*/
|
||||
private fun pathKeywordMatchScore(path: String, query: String): Float {
|
||||
val keywords = extractPathTopicKeywords(path)
|
||||
if (keywords.isEmpty()) return 0f
|
||||
val matches = keywords.count { kw -> queryMatchesPathKeyword(query, kw) }
|
||||
return matches.toFloat() / keywords.size
|
||||
}
|
||||
|
||||
/** 统一全角/半角标点后再匹配,避免代码或 ASR 里半角 `:` 与语料全角 `:` 导致长句子串匹配失败。 */
|
||||
private fun normalizeTextForEmuMatch(s: String): String = buildString(s.length) {
|
||||
for (ch in s) {
|
||||
append(
|
||||
when (ch) {
|
||||
'\uFF1A', '\uFE55', ':' -> ':'
|
||||
'\uFF0C' -> ','
|
||||
'\uFF01' -> '!'
|
||||
'\uFF1F' -> '?'
|
||||
'\uFF1B' -> ';'
|
||||
else -> ch
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** 总分相同时优先子串分、再比编辑分,避免「元旦到了,小朋友」等前缀在多篇课文同分却先命中排序靠前者。 */
|
||||
private fun isBetterEmulatorCandidate(
|
||||
score: Float,
|
||||
substr: Float,
|
||||
edit: Float,
|
||||
bestScore: Float,
|
||||
bestSubstr: Float,
|
||||
bestEdit: Float,
|
||||
): Boolean {
|
||||
if (bestScore < 0f) return true
|
||||
when {
|
||||
score > bestScore + 1e-5f -> return true
|
||||
score + 1e-5f < bestScore -> return false
|
||||
substr > bestSubstr + 1e-5f -> return true
|
||||
substr + 1e-5f < bestSubstr -> return false
|
||||
else -> return edit > bestEdit + 1e-5f
|
||||
}
|
||||
}
|
||||
|
||||
/** 路径片段与 query 的包含关系;题干常省略词头(如目录「上厕所」、句子里只有「厕所」)。 */
|
||||
private fun queryMatchesPathKeyword(query: String, kw: String): Boolean {
|
||||
if (query.contains(kw)) return true
|
||||
// 去掉首字再匹配,避免「个人生活」用 takeLast(2) 误匹配到泛泛的「生活」
|
||||
if (kw.length >= 3) {
|
||||
val rest = kw.substring(1)
|
||||
if (rest.length >= 2 && query.contains(rest)) return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/** 子串命中得分:越长说明越具区分度;过短(如「小朋友」)分数低,减少跨课文误配。 */
|
||||
private fun emulatorSubstringScoreForLength(len: Int): Float = when {
|
||||
len >= 18 -> 0.95f
|
||||
len >= 12 -> 0.90f
|
||||
len >= 8 -> 0.82f
|
||||
len >= 6 -> 0.68f
|
||||
len >= 4 -> 0.48f
|
||||
else -> 0f
|
||||
}
|
||||
|
||||
/**
|
||||
* 按中文/英文句子分隔符拆分,返回非空句子列表。
|
||||
* 用于模拟器编辑距离辅助评分:逐句比对,避免 LLM 前导寒暄句拉低得分。
|
||||
*/
|
||||
private fun splitSentences(text: String): List<String> {
|
||||
val parts = text.split(Regex("[,。!?;,!?;\n]+"))
|
||||
.map { it.trim() }
|
||||
.filter { it.length >= 2 }
|
||||
return parts.ifEmpty { listOf(text) }
|
||||
}
|
||||
|
||||
/** 取最深目录名,按 "-" 分割并过滤掉纯数字和单字符片段。 */
|
||||
private fun extractPathTopicKeywords(path: String): List<String> {
|
||||
val deepestDir = path.split("/").dropLast(1).lastOrNull() ?: return emptyList()
|
||||
return deepestDir.split("-")
|
||||
.map { it.replace(Regex("\\d+"), "").trim() }
|
||||
.filter { it.length >= 2 }
|
||||
.distinct()
|
||||
}
|
||||
|
||||
private fun dot(a: FloatArray, b: FloatArray): Float {
|
||||
var s = 0f
|
||||
for (i in a.indices) s += a[i] * b[i]
|
||||
|
||||
34
app/src/main/java/com/digitalperson/env/RuntimeEnv.kt
vendored
Normal file
34
app/src/main/java/com/digitalperson/env/RuntimeEnv.kt
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
package com.digitalperson.env
|
||||
|
||||
import android.os.Build
|
||||
|
||||
object RuntimeEnv {
|
||||
fun isEmulator(): Boolean {
|
||||
val fingerprint = Build.FINGERPRINT.orEmpty()
|
||||
val model = Build.MODEL.orEmpty()
|
||||
val brand = Build.BRAND.orEmpty()
|
||||
val device = Build.DEVICE.orEmpty()
|
||||
val product = Build.PRODUCT.orEmpty()
|
||||
val hardware = Build.HARDWARE.orEmpty()
|
||||
val manufacturer = Build.MANUFACTURER.orEmpty()
|
||||
|
||||
var hits = 0
|
||||
fun hit(b: Boolean) { if (b) hits++ }
|
||||
|
||||
hit(fingerprint.startsWith("generic", ignoreCase = true))
|
||||
hit(fingerprint.contains("unknown", ignoreCase = true))
|
||||
hit(model.contains("google_sdk", ignoreCase = true))
|
||||
hit(model.contains("emulator", ignoreCase = true))
|
||||
hit(model.contains("android sdk built for", ignoreCase = true))
|
||||
hit(manufacturer.contains("genymotion", ignoreCase = true))
|
||||
hit(brand.startsWith("generic", ignoreCase = true) && device.startsWith("generic", ignoreCase = true))
|
||||
hit(product.contains("sdk", ignoreCase = true))
|
||||
hit(product.contains("emulator", ignoreCase = true))
|
||||
hit(hardware.contains("goldfish", ignoreCase = true))
|
||||
hit(hardware.contains("ranchu", ignoreCase = true))
|
||||
|
||||
// Require multiple signals to avoid false positives on weird ROMs.
|
||||
return hits >= 2
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import android.graphics.Bitmap
|
||||
import android.util.Log
|
||||
import com.digitalperson.config.AppConfig
|
||||
import com.digitalperson.engine.RetinaFaceEngineRKNN
|
||||
import com.digitalperson.env.RuntimeEnv
|
||||
import java.util.ArrayDeque
|
||||
import java.util.concurrent.atomic.AtomicBoolean
|
||||
import kotlinx.coroutines.CoroutineScope
|
||||
@@ -35,6 +36,12 @@ class FaceDetectionPipeline(
|
||||
private val onResult: (FaceDetectionResult) -> Unit,
|
||||
private val onPresenceChanged: (present: Boolean, isFrontal: Boolean, faceIdentityId: String?, recognizedName: String?) -> Unit,
|
||||
) {
|
||||
companion object {
|
||||
/** 模拟器固定人脸 ID,对应 UserMemory 中的 userId */
|
||||
const val EMULATOR_FACE_ID = "face_emulator"
|
||||
/** 模拟器固定显示名,直接作为 recognizedName 传给 coordinator */
|
||||
const val EMULATOR_FACE_NAME = "小黑"
|
||||
}
|
||||
private val appContext = context.applicationContext
|
||||
private val engine = RetinaFaceEngineRKNN()
|
||||
private val recognizer = FaceRecognizer(appContext)
|
||||
@@ -50,6 +57,11 @@ class FaceDetectionPipeline(
|
||||
private val fusionQualities = ArrayDeque<Float>()
|
||||
|
||||
fun initialize(): Boolean {
|
||||
if (RuntimeEnv.isEmulator()) {
|
||||
Log.i(AppConfig.TAG, "[Face] 模拟器模式:跳过 RKNN 初始化,固定返回身份「$EMULATOR_FACE_NAME」")
|
||||
initialized.set(true)
|
||||
return true
|
||||
}
|
||||
val detectorOk = engine.initialize(appContext)
|
||||
val recognizerOk = recognizer.initialize()
|
||||
val ok = detectorOk && recognizerOk
|
||||
@@ -68,6 +80,31 @@ class FaceDetectionPipeline(
|
||||
return
|
||||
}
|
||||
|
||||
// 模拟器:跳过 RKNN 检测,固定上报一张居中正脸
|
||||
if (RuntimeEnv.isEmulator()) {
|
||||
scope.launch {
|
||||
try {
|
||||
val w = bitmap.width
|
||||
val h = bitmap.height
|
||||
val fakeBox = FaceBox(
|
||||
left = w * 0.25f,
|
||||
top = h * 0.15f,
|
||||
right = w * 0.75f,
|
||||
bottom = h * 0.85f,
|
||||
score = 0.99f,
|
||||
)
|
||||
withContext(Dispatchers.Main) {
|
||||
onPresenceChanged(true, true, EMULATOR_FACE_ID, EMULATOR_FACE_NAME)
|
||||
onResult(FaceDetectionResult(w, h, listOf(fakeBox)))
|
||||
}
|
||||
} finally {
|
||||
bitmap.recycle()
|
||||
frameInFlight.set(false)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
scope.launch {
|
||||
try {
|
||||
val width = bitmap.width
|
||||
|
||||
@@ -148,6 +148,8 @@ abstract class BaseDigitalPersonCoordinator(
|
||||
* (i.e. after a cloud LLM response), NOT after greeting / farewell / proactive TTS.
|
||||
*/
|
||||
fun onTtsPlaybackCompleted() {
|
||||
// Let the controller advance its own timers (greeting/proactive/dlg all count as assistant speaking).
|
||||
controller.onAssistantTtsPlaybackCompleted()
|
||||
if (pendingDialogueFinish) {
|
||||
pendingDialogueFinish = false
|
||||
controller.onDialogueResponseFinished()
|
||||
|
||||
@@ -64,6 +64,10 @@ class DigitalHumanInteractionController(
|
||||
private var memoryJob: Job? = null
|
||||
private var farewellJob: Job? = null
|
||||
|
||||
// 让超时/间隔从 TTS 播放完成后开始计时,而不是从 speak() 调用时开始
|
||||
private var pendingWaitReplyTimeoutAfterTts: Boolean = false
|
||||
private var pendingProactiveFollowupAfterTts: Boolean = false
|
||||
|
||||
fun start() {
|
||||
transitionTo(InteractionState.IDLE)
|
||||
scheduleMemoryMode()
|
||||
@@ -204,7 +208,7 @@ fun onFacePresenceChanged(present: Boolean, isFrontal: Boolean = true) { // 添
|
||||
return
|
||||
}
|
||||
transitionTo(InteractionState.WAITING_REPLY)
|
||||
scheduleWaitingReplyTimeout()
|
||||
scheduleWaitingReplyTimeoutAfterTts()
|
||||
}
|
||||
|
||||
private fun enterGreeting() {
|
||||
@@ -224,7 +228,7 @@ fun onFacePresenceChanged(present: Boolean, isFrontal: Boolean = true) { // 添
|
||||
handler.addToChatHistory("assistant", greeting)
|
||||
handler.addAssistantMessageToCloudHistory(greeting)
|
||||
transitionTo(InteractionState.WAITING_REPLY)
|
||||
scheduleWaitingReplyTimeout()
|
||||
scheduleWaitingReplyTimeoutAfterTts()
|
||||
} else {
|
||||
useDefaultGreeting()
|
||||
}
|
||||
@@ -243,7 +247,11 @@ fun onFacePresenceChanged(present: Boolean, isFrontal: Boolean = true) { // 添
|
||||
handler.addAssistantMessageToCloudHistory(greeting)
|
||||
|
||||
transitionTo(InteractionState.WAITING_REPLY)
|
||||
scheduleWaitingReplyTimeout()
|
||||
scheduleWaitingReplyTimeoutAfterTts()
|
||||
}
|
||||
|
||||
private fun scheduleWaitingReplyTimeoutAfterTts() {
|
||||
pendingWaitReplyTimeoutAfterTts = true
|
||||
}
|
||||
|
||||
private fun scheduleWaitingReplyTimeout() {
|
||||
@@ -282,21 +290,34 @@ fun onFacePresenceChanged(present: Boolean, isFrontal: Boolean = true) { // 添
|
||||
// 触发题目生成检查
|
||||
handler.onQuestionAsked(currentFaceId ?: "guest")
|
||||
|
||||
proactiveJob = scope.launch {
|
||||
hasPendingUserReply = false
|
||||
delay(20_000)
|
||||
if (state != InteractionState.PROACTIVE || hasPendingUserReply) return@launch
|
||||
if (!facePresent) {
|
||||
enterFarewell()
|
||||
return@launch
|
||||
}
|
||||
proactiveRound += 1
|
||||
if (proactiveRound < 3) {
|
||||
askProactiveTopic()
|
||||
} else {
|
||||
transitionTo(InteractionState.WAITING_REPLY)
|
||||
// handler.playMotion("haru_g_m17.motion3.json")
|
||||
scheduleWaitingReplyTimeout()
|
||||
// 不立刻开始 20s 计时;等 TTS 播放完再开始计时,避免“刚说完几秒就又问”
|
||||
pendingProactiveFollowupAfterTts = true
|
||||
}
|
||||
|
||||
/** 由 Activity 在「本轮 TTS 完整播放完成」时调用(包括问候/主动提问/对话回复)。 */
|
||||
fun onAssistantTtsPlaybackCompleted() {
|
||||
if (pendingWaitReplyTimeoutAfterTts && state == InteractionState.WAITING_REPLY) {
|
||||
pendingWaitReplyTimeoutAfterTts = false
|
||||
scheduleWaitingReplyTimeout()
|
||||
}
|
||||
if (pendingProactiveFollowupAfterTts && state == InteractionState.PROACTIVE) {
|
||||
pendingProactiveFollowupAfterTts = false
|
||||
proactiveJob?.cancel()
|
||||
proactiveJob = scope.launch {
|
||||
hasPendingUserReply = false
|
||||
delay(20_000)
|
||||
if (state != InteractionState.PROACTIVE || hasPendingUserReply) return@launch
|
||||
if (!facePresent) {
|
||||
enterFarewell()
|
||||
return@launch
|
||||
}
|
||||
proactiveRound += 1
|
||||
if (proactiveRound < 3) {
|
||||
askProactiveTopic()
|
||||
} else {
|
||||
transitionTo(InteractionState.WAITING_REPLY)
|
||||
scheduleWaitingReplyTimeoutAfterTts()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,6 +27,11 @@ class TtsController(private val context: Context) {
|
||||
|
||||
private var callback: TtsCallback? = null
|
||||
|
||||
// 防止 WebSocket 重连或多路回调导致同一段文案短时间内重复入队、重复播报
|
||||
@Volatile private var lastEnqueuedText: String? = null
|
||||
@Volatile private var lastEnqueuedAtMs: Long = 0L
|
||||
private val dedupeWindowMs = 2500L
|
||||
|
||||
fun setCallback(callback: TtsCallback) {
|
||||
this.callback = callback
|
||||
bindCallbacksIfReady()
|
||||
@@ -147,6 +152,14 @@ class TtsController(private val context: Context) {
|
||||
fun enqueueSegment(seg: String) {
|
||||
val cleaned = seg.replace(Regex("\\[.*?\\]"), "").trim()
|
||||
if (cleaned.isEmpty()) return
|
||||
val now = System.currentTimeMillis()
|
||||
val lastText = lastEnqueuedText
|
||||
if (lastText != null && lastText == cleaned && (now - lastEnqueuedAtMs) <= dedupeWindowMs) {
|
||||
Log.w(TAG, "Skip duplicate TTS segment within ${dedupeWindowMs}ms: ${cleaned.take(60)}")
|
||||
return
|
||||
}
|
||||
lastEnqueuedText = cleaned
|
||||
lastEnqueuedAtMs = now
|
||||
if (useQCloudTts) {
|
||||
qcloudTts?.enqueueSegment(cleaned)
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user