face regonition refine

This commit is contained in:
gcw_4spBpAfv
2026-03-10 15:40:05 +08:00
parent d5767156b9
commit ec1f7d2e72
1579 changed files with 4286 additions and 319 deletions

View File

@@ -6,7 +6,10 @@ import android.graphics.Bitmap
import android.os.Bundle
import android.util.Log
import android.widget.Toast
import androidx.appcompat.app.AlertDialog
import androidx.camera.core.CameraSelector
import com.digitalperson.engine.RetinaFaceEngineRKNN
import com.digitalperson.face.FaceBox
import androidx.camera.core.ImageAnalysis
import androidx.camera.core.ImageProxy
import androidx.camera.core.Preview
@@ -28,13 +31,19 @@ import com.digitalperson.metrics.TraceManager
import com.digitalperson.metrics.TraceSession
import com.digitalperson.tts.TtsController
import com.digitalperson.interaction.DigitalHumanInteractionController
import com.digitalperson.data.DatabaseInitializer
import com.digitalperson.interaction.InteractionActionHandler
import com.digitalperson.interaction.InteractionState
import com.digitalperson.interaction.UserMemoryStore
import com.digitalperson.llm.LLMManager
import com.digitalperson.llm.LLMManagerCallback
import com.digitalperson.util.FileHelper
import com.digitalperson.data.AppDatabase
import com.digitalperson.data.entity.ChatMessage
import com.digitalperson.interaction.ConversationBufferMemory
import com.digitalperson.interaction.ConversationSummaryMemory
import java.io.File
import android.graphics.BitmapFactory
import org.json.JSONObject
import java.util.concurrent.ExecutorService
import java.util.concurrent.Executors
@@ -92,6 +101,8 @@ class Live2DChatActivity : AppCompatActivity() {
private lateinit var faceDetectionPipeline: FaceDetectionPipeline
private lateinit var interactionController: DigitalHumanInteractionController
private lateinit var userMemoryStore: UserMemoryStore
private lateinit var conversationBufferMemory: ConversationBufferMemory
private lateinit var conversationSummaryMemory: ConversationSummaryMemory
private var facePipelineReady: Boolean = false
private var cameraProvider: ProcessCameraProvider? = null
private lateinit var cameraAnalyzerExecutor: ExecutorService
@@ -102,6 +113,8 @@ class Live2DChatActivity : AppCompatActivity() {
private val recentConversationLines = ArrayList<String>()
private var recentConversationDirty: Boolean = false
private var lastFacePresent: Boolean = false
private var lastFaceIdentityId: String? = null
private var lastFaceRecognizedName: String? = null
override fun onRequestPermissionsResult(
requestCode: Int,
@@ -152,12 +165,20 @@ class Live2DChatActivity : AppCompatActivity() {
speakingPlayerViewId = 0,
live2dViewId = R.id.live2d_view
)
cameraPreviewView = findViewById(R.id.camera_preview)
cameraPreviewView.implementationMode = PreviewView.ImplementationMode.COMPATIBLE
faceOverlayView = findViewById(R.id.face_overlay)
cameraAnalyzerExecutor = Executors.newSingleThreadExecutor()
// 初始化数据库
val databaseInitializer = DatabaseInitializer(applicationContext)
databaseInitializer.initialize()
userMemoryStore = UserMemoryStore(applicationContext)
val database = AppDatabase.getInstance(applicationContext)
conversationBufferMemory = ConversationBufferMemory(database)
conversationSummaryMemory = ConversationSummaryMemory(database, llmManager)
interactionController = DigitalHumanInteractionController(
scope = ioScope,
handler = object : InteractionActionHandler {
@@ -165,8 +186,10 @@ class Live2DChatActivity : AppCompatActivity() {
runOnUiThread {
uiManager.appendToUi("\n[State] $state\n")
}
Log.i(TAG_ACTIVITY, "\n[State] $state\n")
if (state == InteractionState.IDLE) {
analyzeUserProfileInIdleIfNeeded()
Log.i(TAG_ACTIVITY, "[analyze] done")
}
}
@@ -205,6 +228,8 @@ class Live2DChatActivity : AppCompatActivity() {
override fun loadLatestThought(): String? = userMemoryStore.getLatestThought()
override fun loadRecentThoughts(timeRangeMs: Long): List<String> = userMemoryStore.getRecentThoughts(timeRangeMs)
override fun addToChatHistory(role: String, content: String) {
appendConversationLine(role, content)
}
@@ -212,7 +237,14 @@ class Live2DChatActivity : AppCompatActivity() {
override fun addAssistantMessageToCloudHistory(content: String) {
cloudApiManager.addAssistantMessage(content)
}
}
override fun getRandomQuestion(faceId: String): String {
// 从数据库获取该faceId未被问过的问题
val question = userMemoryStore.getRandomUnansweredQuestion(faceId)
return question?.content ?: "你喜欢什么颜色呀?"
}
},
context = applicationContext
)
faceDetectionPipeline = FaceDetectionPipeline(
context = applicationContext,
@@ -220,10 +252,21 @@ class Live2DChatActivity : AppCompatActivity() {
faceOverlayView.updateResult(result)
},
onPresenceChanged = { present, faceIdentityId, recognizedName ->
if (present == lastFacePresent) return@FaceDetectionPipeline
lastFacePresent = present
Log.d(TAG_ACTIVITY, "present=$present, faceIdentityId=$faceIdentityId, recognized=$recognizedName")
interactionController.onFacePresenceChanged(present, faceIdentityId, recognizedName)
if (present != lastFacePresent) {
lastFacePresent = present
Log.d(TAG_ACTIVITY, "presence changed: present=$present")
interactionController.onFacePresenceChanged(present)
if (!present) {
lastFaceIdentityId = null
lastFaceRecognizedName = null
}
}
if (present && (faceIdentityId != lastFaceIdentityId || recognizedName != lastFaceRecognizedName)) {
lastFaceIdentityId = faceIdentityId
lastFaceRecognizedName = recognizedName
Log.d(TAG_ACTIVITY, "identity update: faceIdentityId=$faceIdentityId, recognized=$recognizedName")
interactionController.onFaceIdentityUpdated(faceIdentityId, recognizedName)
}
}
)
@@ -261,7 +304,7 @@ class Live2DChatActivity : AppCompatActivity() {
try {
val ttsModeSwitch = findViewById<android.widget.Switch>(R.id.tts_mode_switch)
ttsModeSwitch.isChecked = false // 默认使用本地TTS
ttsModeSwitch.isChecked = true // 默认使用本地TTS
ttsModeSwitch.setOnCheckedChangeListener { _, isChecked ->
ttsController.setUseQCloudTts(isChecked)
uiManager.showToast("TTS模式已切换到${if (isChecked) "腾讯云" else "本地"}")
@@ -297,10 +340,6 @@ class Live2DChatActivity : AppCompatActivity() {
vadManager = VadManager(this)
vadManager.setCallback(createVadCallback())
// 初始化本地 LLM用于 memory 状态)
initLLM()
interactionController.start()
// 检查是否需要下载模型
if (!FileHelper.isLocalLLMAvailable(this)) {
// 显示下载进度对话框
@@ -332,19 +371,31 @@ class Live2DChatActivity : AppCompatActivity() {
Log.i(AppConfig.TAG, "Local LLM is available, enabling local LLM switch")
// 显示本地 LLM 开关,并同步状态
uiManager.showLLMSwitch(false)
// 初始化本地 LLM
initLLM()
// 重新初始化 ConversationSummaryMemory
conversationSummaryMemory = ConversationSummaryMemory(database, llmManager)
// 启动交互控制器
interactionController.start()
// 下载完成后初始化其他组件
initializeOtherComponents()
}
} else {
Log.e(AppConfig.TAG, "Failed to download model files: $message")
uiManager.showToast("模型下载失败: $message", Toast.LENGTH_LONG)
// 显示错误弹窗,阻止应用继续运行
showModelDownloadErrorDialog(message)
}
// 下载完成后初始化其他组件
initializeOtherComponents()
}
}
)
} else {
// 模型已存在,直接初始化其他组件
// 模型已存在,初始化本地 LLM
initLLM()
// 重新初始化 ConversationSummaryMemory
conversationSummaryMemory = ConversationSummaryMemory(database, llmManager)
// 启动交互控制器
interactionController.start()
// 直接初始化其他组件
initializeOtherComponents()
// 显示本地 LLM 开关,并同步状态
uiManager.showLLMSwitch(false)
@@ -404,6 +455,304 @@ class Live2DChatActivity : AppCompatActivity() {
ioScope.launch {
asrManager.runAsrWorker()
}
// 测试人脸识别(延迟执行,确保所有组件初始化完成)
// ioScope.launch {
// kotlinx.coroutines.delay(10000) // 等待3秒确保所有组件初始化完成
// runOnUiThread {
// runFaceRecognitionTest()
// }
// }
}
/**
* 显示模型下载错误弹窗,阻止应用继续运行
*/
private fun showModelDownloadErrorDialog(errorMessage: String) {
AlertDialog.Builder(this)
.setTitle("模型下载失败")
.setMessage("本地 LLM 模型下载失败,应用无法正常运行。\n\n错误信息:$errorMessage\n\n请检查网络连接后重启应用。")
.setCancelable(false)
.setPositiveButton("退出应用") { _, _ ->
finish()
}
.show()
}
/**
* 运行人脸识别相似度测试
* 使用网络服务器上的测试图片
*/
private fun runFaceRecognitionTest() {
Log.i(TAG_ACTIVITY, "Starting face recognition test...")
uiManager.appendToUi("\n[测试] 开始人脸识别相似度测试...\n")
// 从服务器获取目录下的所有图片文件列表
ioScope.launch {
try {
val imageUrls = fetchImageListFromServer("http://192.168.1.19:5000/api/face_test_images")
if (imageUrls.isEmpty()) {
Log.e(AppConfig.TAG, "No images found in server directory")
runOnUiThread {
uiManager.appendToUi("\n[测试] 服务器目录中没有找到图片文件\n")
}
return@launch
}
Log.i(AppConfig.TAG, "[测试]Found ${imageUrls.size} images: $imageUrls")
runOnUiThread {
uiManager.appendToUi("\n[测试] 发现 ${imageUrls.size} 张测试图片\n")
}
val bitmaps = mutableListOf<Pair<String, Bitmap>>()
// 下载所有图片
for (url in imageUrls) {
Log.d(AppConfig.TAG, "[测试]Downloading test image: $url")
val bitmap = downloadImage(url)
if (bitmap != null) {
val fileName = url.substringAfterLast("/")
bitmaps.add(fileName to bitmap)
Log.d(AppConfig.TAG, "[测试]Downloaded image $fileName successfully")
} else {
Log.e(AppConfig.TAG, "[测试]Failed to download image: $url")
}
}
if (bitmaps.size < 2) {
Log.e(AppConfig.TAG, "[测试]Not enough test images downloaded")
runOnUiThread {
uiManager.appendToUi("\n[测试] 测试图片下载失败,无法进行测试\n")
}
return@launch
}
// 对所有图片两两比较
Log.i(AppConfig.TAG, "[测试]Starting similarity comparison for ${bitmaps.size} images...")
for (i in 0 until bitmaps.size) {
for (j in i + 1 until bitmaps.size) {
val (fileName1, bitmap1) = bitmaps[i]
val (fileName2, bitmap2) = bitmaps[j]
Log.d(AppConfig.TAG, "[测试]Comparing $fileName1 with $fileName2")
// 检测人脸
val face1 = detectFace(bitmap1)
val face2 = detectFace(bitmap2)
Log.d(AppConfig.TAG, "[测试]Face detection result: face1=$face1, face2=$face2")
if (face1 != null && face2 != null) {
// 计算相似度
Log.d(AppConfig.TAG, "[测试]Detected faces, calculating similarity...")
val similarity = faceDetectionPipeline?.getRecognizer()?.testSimilarityBetween(
bitmap1, face1, bitmap2, face2
)
val similarityRaw = faceDetectionPipeline?.getRecognizer()?.run {
val emb1 = extractEmbedding(bitmap1, face1)
val emb2 = extractEmbedding(bitmap2, face2)
if (emb1.isNotEmpty() && emb2.isNotEmpty()) {
var dot = 0f
var n1 = 0f
var n2 = 0f
for (k in emb1.indices) {
dot += emb1[k] * emb2[k]
n1 += emb1[k] * emb1[k]
n2 += emb2[k] * emb2[k]
}
if (n1 > 1e-12f && n2 > 1e-12f) {
(dot / (kotlin.math.sqrt(n1) * kotlin.math.sqrt(n2))).coerceIn(-1f, 1f)
} else -1f
} else -1f
}
Log.d(AppConfig.TAG, "[测试]Similarity result: $similarity")
if (similarity != null && similarity >= 0) {
val message = "[测试] 图片 $fileName1$fileName2 的相似度: $similarity"
val compareMessage = "[测试] 对齐后=$similarity, 原始裁剪=$similarityRaw"
Log.i(AppConfig.TAG, message)
Log.i(AppConfig.TAG, compareMessage)
runOnUiThread {
uiManager.appendToUi("\n$message\n")
uiManager.appendToUi("$compareMessage\n")
}
} else {
Log.w(AppConfig.TAG, "[测试]Failed to calculate similarity: $similarity")
runOnUiThread {
uiManager.appendToUi("\n[测试] 计算相似度失败: $similarity\n")
}
}
} else {
val message = "[测试] 无法检测到人脸: $fileName1$fileName2"
Log.w(AppConfig.TAG, message)
runOnUiThread {
uiManager.appendToUi("\n$message\n")
}
}
}
}
Log.i(AppConfig.TAG, "[测试]Face recognition test completed")
runOnUiThread {
uiManager.appendToUi("\n[测试] 人脸识别相似度测试完成\n")
}
} catch (e: Exception) {
Log.e(AppConfig.TAG, "Error during face recognition test: ${e.message}", e)
runOnUiThread {
uiManager.appendToUi("\n[测试] 测试过程中发生错误: ${e.message}\n")
}
}
}
}
/**
* 从服务器获取目录下的图片文件列表
* 调用 API 接口获取图片列表
*/
private fun fetchImageListFromServer(apiUrl: String): List<String> {
val imageUrls = mutableListOf<String>()
return try {
// 调用 API 接口
val connection = java.net.URL(apiUrl).openConnection() as java.net.HttpURLConnection
connection.requestMethod = "GET"
connection.connectTimeout = 10000
connection.readTimeout = 10000
connection.setRequestProperty("Accept", "application/json")
try {
val responseCode = connection.responseCode
if (responseCode == 200) {
connection.inputStream.use { input ->
val content = input.bufferedReader().use { it.readText() }
Log.d(AppConfig.TAG, "API response: $content")
// 解析 JSON 响应
val jsonObject = org.json.JSONObject(content)
val imagesArray = jsonObject.getJSONArray("images")
// 构建完整的图片 URL
val baseUrl = apiUrl.replace("/api/face_test_images", "/shared_files/face_test")
for (i in 0 until imagesArray.length()) {
val fileName = imagesArray.getString(i)
val fullUrl = "$baseUrl/$fileName"
imageUrls.add(fullUrl)
Log.d(AppConfig.TAG, "Added image URL: $fullUrl")
}
}
} else {
Log.e(AppConfig.TAG, "API request failed with code: $responseCode")
}
} finally {
connection.disconnect()
}
imageUrls
} catch (e: Exception) {
Log.e(AppConfig.TAG, "Failed to fetch image list: ${e.message}", e)
// 如果获取失败,返回空列表
emptyList()
}
}
/**
* 检查 URL 是否存在
*/
private fun checkUrlExists(url: String): Boolean {
return try {
val connection = java.net.URL(url).openConnection() as java.net.HttpURLConnection
connection.requestMethod = "HEAD"
connection.connectTimeout = 3000
connection.readTimeout = 3000
val responseCode = connection.responseCode
connection.disconnect()
responseCode == 200
} catch (e: Exception) {
false
}
}
/**
* 从网络下载图片
*/
private fun downloadImage(url: String): Bitmap? {
return try {
// 使用与大模型相同的下载方式
val tempFile = File(cacheDir, "temp_test_image_${System.currentTimeMillis()}.jpg")
val success = FileHelper.downloadTestImage(url, tempFile)
if (success && tempFile.exists()) {
val bitmap = BitmapFactory.decodeFile(tempFile.absolutePath)
tempFile.delete() // 删除临时文件
bitmap
} else {
Log.e(AppConfig.TAG, "Failed to download image: $url")
null
}
} catch (e: Exception) {
Log.e(AppConfig.TAG, "Failed to download image: ${e.message}", e)
null
}
}
/**
* 检测图片中的人脸
*/
private fun detectFace(bitmap: Bitmap): FaceBox? {
Log.d(AppConfig.TAG, "[测试]Detecting face in bitmap: ${bitmap.width}x${bitmap.height}")
return try {
val engine = RetinaFaceEngineRKNN()
Log.d(AppConfig.TAG, "[测试]Initializing RetinaFace engine...")
if (engine.initialize(applicationContext)) {
Log.d(AppConfig.TAG, "[测试]RetinaFace engine initialized successfully")
val raw = engine.detect(bitmap)
Log.d(AppConfig.TAG, "[测试]Face detection result: ${raw.joinToString(", ")}")
engine.release()
if (raw.isNotEmpty()) {
val stride = when {
raw.size % 15 == 0 -> 15
raw.size % 5 == 0 -> 5
else -> 0
}
Log.d(AppConfig.TAG, "[测试]Stride: $stride, raw size: ${raw.size}")
if (stride > 0) {
val faceCount = raw.size / stride
Log.d(AppConfig.TAG, "[测试]Detected $faceCount faces")
if (faceCount > 0) {
val i = 0
val lm = if (stride >= 15) raw.copyOfRange(i + 5, i + 15) else null
val hasLm = lm?.all { it >= 0f } == true
val faceBox = FaceBox(
left = raw[i],
top = raw[i + 1],
right = raw[i + 2],
bottom = raw[i + 3],
score = raw[i + 4],
hasLandmarks = hasLm,
landmarks = if (hasLm) lm else null
)
Log.d(AppConfig.TAG, "[测试]Created face box: $faceBox")
return faceBox
}
}
} else {
Log.w(AppConfig.TAG, "[测试]No faces detected in bitmap")
}
} else {
Log.e(AppConfig.TAG, "[测试]Failed to initialize RetinaFace engine")
}
null
} catch (e: Exception) {
Log.e(AppConfig.TAG, "[测试]Failed to detect face: ${e.message}", e)
null
}
}
private fun createAsrCallback() = object : AsrManager.AsrCallback {
@@ -557,7 +906,6 @@ class Live2DChatActivity : AppCompatActivity() {
try { cameraAnalyzerExecutor.shutdown() } catch (_: Throwable) {}
try { ttsController.release() } catch (_: Throwable) {}
try { llmManager?.destroy() } catch (_: Throwable) {}
try { userMemoryStore.close() } catch (_: Throwable) {}
try { uiManager.release() } catch (_: Throwable) {}
try { audioProcessor.release() } catch (_: Throwable) {}
}
@@ -686,6 +1034,9 @@ class Live2DChatActivity : AppCompatActivity() {
uiManager.appendToUi("\n[LOG] 已打断TTS播放\n")
}
// 通知状态机用户开始说话,立即进入对话状态
interactionController.onUserStartSpeaking()
if (!audioProcessor.initMicrophone(micPermissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) {
uiManager.showToast("麦克风初始化失败/无权限")
return
@@ -844,6 +1195,14 @@ class Live2DChatActivity : AppCompatActivity() {
recentConversationLines.removeAt(0)
}
recentConversationDirty = true
// 同时添加到对话记忆中
val memoryRole = if (role == "用户") "user" else "assistant"
conversationBufferMemory.addMessage(activeUserId, memoryRole, text.trim())
// 定期保存到数据库
if (recentConversationLines.size % 5 == 0) {
conversationBufferMemory.saveToDatabase(activeUserId)
}
}
private fun buildCloudPromptWithUserProfile(userText: String): String {
@@ -854,6 +1213,13 @@ class Live2DChatActivity : AppCompatActivity() {
profile.gender?.takeIf { it.isNotBlank() }?.let { profileParts.add("性别:$it") }
profile.hobbies?.takeIf { it.isNotBlank() }?.let { profileParts.add("爱好:$it") }
profile.profileSummary?.takeIf { it.isNotBlank() }?.let { profileParts.add("画像:$it") }
// 添加对话摘要
val conversationSummary = conversationSummaryMemory.getSummary(activeUserId)
if (conversationSummary.isNotBlank()) {
profileParts.add("对话摘要:$conversationSummary")
}
if (profileParts.isEmpty()) return userText
return buildString {
append("[用户画像]\n")
@@ -864,9 +1230,26 @@ class Live2DChatActivity : AppCompatActivity() {
}
private fun analyzeUserProfileInIdleIfNeeded() {
if (!recentConversationDirty || !activeUserId.startsWith("face_")) return
if (recentConversationLines.isEmpty()) return
val dialogue = recentConversationLines.joinToString("\n")
if (!activeUserId.startsWith("face_")) {
Log.d(AppConfig.TAG, "faceID is not face_")
return
}
// 使用 conversationBufferMemory 获取对话消息
val messages = conversationBufferMemory.getMessages(activeUserId)
Log.d(AppConfig.TAG, "msg is empty? ${messages.isEmpty()}")
val hasUserMessages = messages.any { it.role == "user" }
Log.d(AppConfig.TAG, "msg has user messages? $hasUserMessages")
if (messages.isEmpty() || !hasUserMessages) return
// 生成对话摘要
conversationSummaryMemory.generateSummary(activeUserId, messages) { summary ->
Log.d(AppConfig.TAG, "Generated conversation summary for $activeUserId: $summary")
}
// 使用 conversationBufferMemory 的对话记录提取用户信息
val dialogue = messages.joinToString("\n") { "${it.role}: ${it.content}" }
requestLocalProfileExtraction(dialogue) { raw ->
try {
val json = parseFirstJsonObject(raw)
@@ -879,7 +1262,10 @@ class Live2DChatActivity : AppCompatActivity() {
userMemoryStore.updateDisplayName(activeUserId, name)
}
userMemoryStore.updateProfile(activeUserId, age, gender, hobbies, summary)
recentConversationDirty = false
// 清空已处理的对话记录
conversationBufferMemory.clear(activeUserId)
runOnUiThread {
uiManager.appendToUi("\n[Memory] 已更新用户画像: $activeUserId\n")
}
@@ -901,7 +1287,7 @@ class Live2DChatActivity : AppCompatActivity() {
Log.i(TAG_LLM, "Routing profile extraction to LOCAL")
local.generateResponseWithSystem(
"你是信息抽取器。仅输出JSON对象不要其他文字。字段为name,age,gender,hobbies,summary。",
"请从以下对话提取用户信息,未知填空字符串:\n$dialogue"
"请从以下对话提取用户信息,未知填空字符串,注意不需要\n$dialogue"
)
} catch (e: Exception) {
pendingLocalProfileCallback = null