local llm supported

This commit is contained in:
gcw_4spBpAfv
2026-03-05 13:55:57 +08:00
parent 1701ecfb7f
commit bd07a7526a
43 changed files with 4258 additions and 115 deletions

View File

@@ -2,10 +2,15 @@ package com.digitalperson
import android.content.Intent
import android.os.Bundle
import android.util.Log
import androidx.appcompat.app.AppCompatActivity
import com.digitalperson.config.AppConfig
class EntryActivity : AppCompatActivity() {
companion object {
private const val TAG = "EntryActivity"
}
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
@@ -14,6 +19,7 @@ class EntryActivity : AppCompatActivity() {
} else {
MainActivity::class.java
}
Log.i(TAG, "USE_LIVE2D=${AppConfig.Avatar.USE_LIVE2D}, target=${target.simpleName}")
startActivity(Intent(this, target))
finish()
}

View File

@@ -2,20 +2,37 @@ package com.digitalperson
import android.Manifest
import android.content.pm.PackageManager
import android.graphics.Bitmap
import android.os.Bundle
import android.util.Log
import android.widget.Toast
import androidx.camera.core.CameraSelector
import androidx.camera.core.ImageAnalysis
import androidx.camera.core.ImageProxy
import androidx.camera.core.Preview
import androidx.camera.lifecycle.ProcessCameraProvider
import androidx.camera.view.PreviewView
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import androidx.core.content.ContextCompat
import com.digitalperson.cloud.CloudApiManager
import com.digitalperson.audio.AudioProcessor
import com.digitalperson.vad.VadManager
import com.digitalperson.asr.AsrManager
import com.digitalperson.tts.TtsManager
import com.digitalperson.ui.Live2DUiManager
import com.digitalperson.config.AppConfig
import com.digitalperson.face.FaceDetectionPipeline
import com.digitalperson.face.FaceOverlayView
import com.digitalperson.face.ImageProxyBitmapConverter
import com.digitalperson.metrics.TraceManager
import com.digitalperson.metrics.TraceSession
import com.digitalperson.tts.TtsController
import com.digitalperson.llm.LLMManager
import com.digitalperson.llm.LLMManagerCallback
import com.digitalperson.util.FileHelper
import java.io.File
import java.util.concurrent.ExecutorService
import java.util.concurrent.Executors
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.Job
@@ -26,14 +43,24 @@ import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
class Live2DChatActivity : AppCompatActivity() {
companion object {
private const val TAG_ACTIVITY = "Live2DChatActivity"
private const val TAG_LLM = "LLM_ROUTE"
}
private lateinit var uiManager: Live2DUiManager
private lateinit var vadManager: VadManager
private lateinit var asrManager: AsrManager
private lateinit var ttsManager: TtsManager
private lateinit var ttsController: TtsController
private lateinit var audioProcessor: AudioProcessor
private var llmManager: LLMManager? = null
private var useLocalLLM = false // 默认使用云端 LLM
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
private val appPermissions: Array<String> = arrayOf(
Manifest.permission.RECORD_AUDIO,
Manifest.permission.CAMERA
)
private val micPermissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
@Volatile
private var isRecording: Boolean = false
@@ -55,23 +82,46 @@ class Live2DChatActivity : AppCompatActivity() {
@Volatile private var llmInFlight: Boolean = false
private var enableStreaming = false
private lateinit var cameraPreviewView: PreviewView
private lateinit var faceOverlayView: FaceOverlayView
private lateinit var faceDetectionPipeline: FaceDetectionPipeline
private var facePipelineReady: Boolean = false
private var cameraProvider: ProcessCameraProvider? = null
private lateinit var cameraAnalyzerExecutor: ExecutorService
override fun onRequestPermissionsResult(
requestCode: Int,
permissions: Array<String>,
grantResults: IntArray
) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
val ok = requestCode == AppConfig.REQUEST_RECORD_AUDIO_PERMISSION &&
grantResults.isNotEmpty() &&
grantResults[0] == PackageManager.PERMISSION_GRANTED
if (!ok) {
if (requestCode != AppConfig.REQUEST_RECORD_AUDIO_PERMISSION) return
if (grantResults.isEmpty()) {
finish()
return
}
val granted = permissions.zip(grantResults.toTypedArray()).associate { it.first to it.second }
val micGranted = granted[Manifest.permission.RECORD_AUDIO] == PackageManager.PERMISSION_GRANTED
val cameraGranted = granted[Manifest.permission.CAMERA] == PackageManager.PERMISSION_GRANTED
if (!micGranted) {
Log.e(AppConfig.TAG, "Audio record is disallowed")
finish()
return
}
if (!cameraGranted) {
uiManager.showToast("未授予相机权限,暂不启用人脸检测")
Log.w(AppConfig.TAG, "Camera permission denied")
return
}
if (facePipelineReady) {
startCameraPreviewAndDetection()
}
}
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
Log.i(TAG_ACTIVITY, "onCreate")
setContentView(R.layout.activity_live2d_chat)
uiManager = Live2DUiManager(this)
@@ -82,10 +132,28 @@ class Live2DChatActivity : AppCompatActivity() {
stopButtonId = R.id.stop_button,
recordButtonId = R.id.record_button,
traditionalButtonsId = R.id.traditional_buttons,
llmModeSwitchId = R.id.llm_mode_switch,
llmModeSwitchRowId = R.id.llm_mode_switch_row,
silentPlayerViewId = 0,
speakingPlayerViewId = 0,
live2dViewId = R.id.live2d_view
)
cameraPreviewView = findViewById(R.id.camera_preview)
cameraPreviewView.implementationMode = PreviewView.ImplementationMode.COMPATIBLE
faceOverlayView = findViewById(R.id.face_overlay)
cameraAnalyzerExecutor = Executors.newSingleThreadExecutor()
faceDetectionPipeline = FaceDetectionPipeline(
context = applicationContext,
onResult = { result ->
faceOverlayView.updateResult(result)
},
onGreeting = { greeting ->
uiManager.appendToUi("\n[Face] $greeting\n")
ttsController.enqueueSegment(greeting)
ttsController.enqueueEnd()
}
)
// 根据配置选择交互方式
uiManager.setUseHoldToSpeak(AppConfig.USE_HOLD_TO_SPEAK)
@@ -105,7 +173,7 @@ class Live2DChatActivity : AppCompatActivity() {
uiManager.setStopButtonListener { onStopClicked(userInitiated = true) }
}
ActivityCompat.requestPermissions(this, permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)
ActivityCompat.requestPermissions(this, appPermissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)
try {
val streamingSwitch = findViewById<android.widget.Switch>(R.id.streaming_switch)
@@ -119,6 +187,27 @@ class Live2DChatActivity : AppCompatActivity() {
Log.w(AppConfig.TAG, "Streaming switch not found in layout: ${e.message}")
}
try {
val ttsModeSwitch = findViewById<android.widget.Switch>(R.id.tts_mode_switch)
ttsModeSwitch.isChecked = false // 默认使用本地TTS
ttsModeSwitch.setOnCheckedChangeListener { _, isChecked ->
ttsController.setUseQCloudTts(isChecked)
uiManager.showToast("TTS模式已切换到${if (isChecked) "腾讯云" else "本地"}")
}
} catch (e: Exception) {
Log.w(AppConfig.TAG, "TTS mode switch not found in layout: ${e.message}")
}
// 设置 LLM 模式开关
uiManager.setLLMSwitchListener { isChecked ->
useLocalLLM = isChecked
Log.i(TAG_LLM, "LLM mode switched: useLocalLLM=$useLocalLLM")
uiManager.showToast("LLM模式已切换到${if (isChecked) "本地" else "云端"}")
// 重新初始化 LLM
initLLM()
}
// 默认不显示 LLM 开关,等模型下载完成后再显示
if (AppConfig.USE_HOLD_TO_SPEAK) {
uiManager.setButtonsEnabled(recordEnabled = false)
} else {
@@ -127,8 +216,8 @@ class Live2DChatActivity : AppCompatActivity() {
uiManager.setText("初始化中…")
audioProcessor = AudioProcessor(this)
ttsManager = TtsManager(this)
ttsManager.setCallback(createTtsCallback())
ttsController = TtsController(this)
ttsController.setCallback(createTtsCallback())
asrManager = AsrManager(this)
asrManager.setAudioProcessor(audioProcessor)
@@ -137,6 +226,64 @@ class Live2DChatActivity : AppCompatActivity() {
vadManager = VadManager(this)
vadManager.setCallback(createVadCallback())
// 初始化 LLM 管理器
initLLM()
// 检查是否需要下载模型
if (!FileHelper.isLocalLLMAvailable(this)) {
// 显示下载进度对话框
uiManager.showDownloadProgressDialog()
// 异步下载模型文件
FileHelper.downloadModelFilesWithProgress(
this,
onProgress = { fileName, downloaded, total, progress ->
runOnUiThread {
val downloadedMB = downloaded / (1024 * 1024)
val totalMB = total / (1024 * 1024)
uiManager.updateDownloadProgress(
fileName,
downloadedMB,
totalMB,
progress
)
}
},
onComplete = { success, message ->
runOnUiThread {
uiManager.dismissDownloadProgressDialog()
if (success) {
Log.i(AppConfig.TAG, "Model files downloaded successfully")
uiManager.showToast("模型下载完成", Toast.LENGTH_SHORT)
// 检查本地 LLM 是否可用
if (FileHelper.isLocalLLMAvailable(this)) {
Log.i(AppConfig.TAG, "Local LLM is available, enabling local LLM switch")
// 显示本地 LLM 开关,并同步状态
uiManager.showLLMSwitch(true)
uiManager.setLLMSwitchChecked(useLocalLLM)
}
} else {
Log.e(AppConfig.TAG, "Failed to download model files: $message")
uiManager.showToast("模型下载失败: $message", Toast.LENGTH_LONG)
}
// 下载完成后初始化其他组件
initializeOtherComponents()
}
}
)
} else {
// 模型已存在,直接初始化其他组件
initializeOtherComponents()
// 显示本地 LLM 开关,并同步状态
uiManager.showLLMSwitch(true)
uiManager.setLLMSwitchChecked(useLocalLLM)
}
}
/**
* 初始化其他组件VAD、ASR、TTS、人脸检测等
*/
private fun initializeOtherComponents() {
ioScope.launch {
try {
Log.i(AppConfig.TAG, "Init VAD + SenseVoice(RKNN) + TTS (background)")
@@ -144,7 +291,8 @@ class Live2DChatActivity : AppCompatActivity() {
vadManager.initVadModel()
asrManager.initSenseVoiceModel()
}
val ttsOk = ttsManager.initTtsAndAudioTrack()
val ttsOk = ttsController.init()
facePipelineReady = faceDetectionPipeline.initialize()
withContext(Dispatchers.Main) {
if (!ttsOk) {
uiManager.showToast(
@@ -152,6 +300,11 @@ class Live2DChatActivity : AppCompatActivity() {
Toast.LENGTH_LONG
)
}
if (!facePipelineReady) {
uiManager.showToast("RetinaFace 初始化失败,请检查模型和 rknn 运行库", Toast.LENGTH_LONG)
} else if (allPermissionsGranted()) {
startCameraPreviewAndDetection()
}
uiManager.setText(getString(R.string.hint))
if (AppConfig.USE_HOLD_TO_SPEAK) {
uiManager.setButtonsEnabled(recordEnabled = true)
@@ -203,14 +356,22 @@ class Live2DChatActivity : AppCompatActivity() {
Log.d(AppConfig.TAG, "ASR segment skipped: $reason")
}
override fun shouldSkipAsr(): Boolean = ttsManager.isPlaying()
override fun shouldSkipAsr(): Boolean = ttsController.isPlaying()
override fun isLlmInFlight(): Boolean = llmInFlight
override fun onLlmCalled(text: String) {
llmInFlight = true
Log.d(AppConfig.TAG, "Calling LLM with text: $text")
cloudApiManager.callLLM(text)
if (useLocalLLM) {
Log.i(TAG_LLM, "Routing to LOCAL LLM")
// 使用本地 LLM 生成回复
generateResponse(text)
} else {
Log.i(TAG_LLM, "Routing to CLOUD LLM")
// 使用云端 LLM 生成回复
cloudApiManager.callLLM(text)
}
}
}
@@ -220,7 +381,7 @@ class Live2DChatActivity : AppCompatActivity() {
asrManager.enqueueAudioSegment(originalAudio, processedAudio)
}
override fun shouldSkipProcessing(): Boolean = ttsManager.isPlaying() || llmInFlight
override fun shouldSkipProcessing(): Boolean = ttsController.isPlaying() || llmInFlight
}
private fun createCloudApiListener() = object : CloudApiManager.CloudApiListener {
@@ -232,9 +393,9 @@ class Live2DChatActivity : AppCompatActivity() {
if (enableStreaming) {
for (seg in segmenter.flush()) {
ttsManager.enqueueSegment(seg)
ttsController.enqueueSegment(seg)
}
ttsManager.enqueueEnd()
ttsController.enqueueEnd()
} else {
val previousMood = com.digitalperson.mood.MoodManager.getCurrentMood()
val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(response)
@@ -247,8 +408,8 @@ class Live2DChatActivity : AppCompatActivity() {
runOnUiThread {
uiManager.appendToUi("${filteredText}\n")
}
ttsManager.enqueueSegment(filteredText)
ttsManager.enqueueEnd()
ttsController.enqueueSegment(filteredText)
ttsController.enqueueEnd()
}
}
@@ -271,7 +432,7 @@ class Live2DChatActivity : AppCompatActivity() {
val segments = segmenter.processChunk(filteredText)
for (seg in segments) {
ttsManager.enqueueSegment(seg)
ttsController.enqueueSegment(seg)
}
}
}
@@ -285,7 +446,7 @@ class Live2DChatActivity : AppCompatActivity() {
}
}
private fun createTtsCallback() = object : TtsManager.TtsCallback {
private fun createTtsCallback() = object : TtsController.TtsCallback {
override fun onTtsStarted(text: String) {
runOnUiThread {
uiManager.appendToUi("\n[TTS] 开始合成...\n")
@@ -310,32 +471,6 @@ class Live2DChatActivity : AppCompatActivity() {
uiManager.setSpeaking(speaking)
}
override fun getCurrentTrace(): TraceSession? = currentTrace
override fun onTraceMarkTtsRequestEnqueued() {
currentTrace?.markTtsRequestEnqueued()
}
override fun onTraceMarkTtsSynthesisStart() {
currentTrace?.markTtsSynthesisStart()
}
override fun onTraceMarkTtsFirstPcmReady() {
currentTrace?.markTtsFirstPcmReady()
}
override fun onTraceMarkTtsFirstAudioPlay() {
currentTrace?.markTtsFirstAudioPlay()
}
override fun onTraceMarkTtsDone() {
currentTrace?.markTtsDone()
}
override fun onTraceAddDuration(name: String, value: Long) {
currentTrace?.addDuration(name, value)
}
override fun onEndTurn() {
TraceManager.getInstance().endTurn()
currentTrace = null
@@ -344,27 +479,97 @@ class Live2DChatActivity : AppCompatActivity() {
override fun onDestroy() {
super.onDestroy()
stopCameraPreviewAndDetection()
onStopClicked(userInitiated = false)
ioScope.cancel()
synchronized(nativeLock) {
try { vadManager.release() } catch (_: Throwable) {}
try { asrManager.release() } catch (_: Throwable) {}
}
try { ttsManager.release() } catch (_: Throwable) {}
try { faceDetectionPipeline.release() } catch (_: Throwable) {}
try { cameraAnalyzerExecutor.shutdown() } catch (_: Throwable) {}
try { ttsController.release() } catch (_: Throwable) {}
try { llmManager?.destroy() } catch (_: Throwable) {}
try { uiManager.release() } catch (_: Throwable) {}
try { audioProcessor.release() } catch (_: Throwable) {}
}
override fun onResume() {
super.onResume()
Log.i(TAG_ACTIVITY, "onResume")
uiManager.onResume()
if (facePipelineReady && allPermissionsGranted()) {
startCameraPreviewAndDetection()
}
}
override fun onPause() {
Log.i(TAG_ACTIVITY, "onPause")
stopCameraPreviewAndDetection()
uiManager.onPause()
super.onPause()
}
private fun allPermissionsGranted(): Boolean {
return appPermissions.all {
ContextCompat.checkSelfPermission(this, it) == PackageManager.PERMISSION_GRANTED
}
}
private fun startCameraPreviewAndDetection() {
val cameraProviderFuture = ProcessCameraProvider.getInstance(this)
cameraProviderFuture.addListener({
try {
val provider = cameraProviderFuture.get()
cameraProvider = provider
provider.unbindAll()
val preview = Preview.Builder().build().apply {
setSurfaceProvider(cameraPreviewView.surfaceProvider)
}
cameraPreviewView.scaleType = PreviewView.ScaleType.FIT_CENTER
val analyzer = ImageAnalysis.Builder()
.setBackpressureStrategy(ImageAnalysis.STRATEGY_KEEP_ONLY_LATEST)
.build()
analyzer.setAnalyzer(cameraAnalyzerExecutor) { imageProxy ->
analyzeCameraFrame(imageProxy)
}
val selector = CameraSelector.Builder()
.requireLensFacing(CameraSelector.LENS_FACING_FRONT)
.build()
provider.bindToLifecycle(this, selector, preview, analyzer)
} catch (t: Throwable) {
Log.e(AppConfig.TAG, "startCameraPreviewAndDetection failed: ${t.message}", t)
}
}, ContextCompat.getMainExecutor(this))
}
private fun stopCameraPreviewAndDetection() {
try {
cameraProvider?.unbindAll()
} catch (_: Throwable) {
} finally {
cameraProvider = null
}
}
private fun analyzeCameraFrame(imageProxy: ImageProxy) {
try {
val bitmap: Bitmap? = ImageProxyBitmapConverter.toBitmap(imageProxy)
if (bitmap != null) {
faceDetectionPipeline.submitFrame(bitmap)
}
} catch (t: Throwable) {
Log.w(AppConfig.TAG, "analyzeCameraFrame error: ${t.message}")
} finally {
imageProxy.close()
}
}
private fun onStartClicked() {
Log.d(AppConfig.TAG, "onStartClicked called")
if (isRecording) {
@@ -372,7 +577,7 @@ class Live2DChatActivity : AppCompatActivity() {
return
}
if (!audioProcessor.initMicrophone(permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) {
if (!audioProcessor.initMicrophone(micPermissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) {
uiManager.showToast("麦克风初始化失败/无权限")
return
}
@@ -383,8 +588,7 @@ class Live2DChatActivity : AppCompatActivity() {
uiManager.clearText()
ttsManager.reset()
ttsManager.setCurrentTrace(currentTrace)
ttsController.reset()
segmenter.reset()
vadManager.reset()
@@ -409,12 +613,12 @@ class Live2DChatActivity : AppCompatActivity() {
}
// 如果TTS正在播放打断它
val interrupted = ttsManager.interruptForNewTurn()
val interrupted = ttsController.interruptForNewTurn()
if (interrupted) {
uiManager.appendToUi("\n[LOG] 已打断TTS播放\n")
}
if (!audioProcessor.initMicrophone(permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) {
if (!audioProcessor.initMicrophone(micPermissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) {
uiManager.showToast("麦克风初始化失败/无权限")
return
}
@@ -427,7 +631,7 @@ class Live2DChatActivity : AppCompatActivity() {
// interruptForNewTurn() already prepared TTS state for next turn.
// Keep reset() only for non-interrupt entry points.
ttsManager.setCurrentTrace(currentTrace)
segmenter.reset()
// 启动按住说话的动作
@@ -479,7 +683,7 @@ class Live2DChatActivity : AppCompatActivity() {
recordingJob?.cancel()
recordingJob = null
ttsManager.stop()
ttsController.stop()
if (AppConfig.USE_HOLD_TO_SPEAK) {
uiManager.setButtonsEnabled(recordEnabled = true)
@@ -515,10 +719,10 @@ class Live2DChatActivity : AppCompatActivity() {
while (isRecording && ioScope.coroutineContext.isActive) {
loopCount++
if (loopCount % 100 == 0) {
Log.d(AppConfig.TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsManager.isPlaying()}")
Log.d(AppConfig.TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsController.isPlaying()}")
}
if (ttsManager.isPlaying()) {
if (ttsController.isPlaying()) {
if (vadManager.isInSpeech()) {
Log.d(AppConfig.TAG, "TTS playing, resetting VAD state")
vadManager.clearState()
@@ -546,11 +750,134 @@ class Live2DChatActivity : AppCompatActivity() {
}
val forced = segmenter.maybeForceByTime()
for (seg in forced) ttsManager.enqueueSegment(seg)
for (seg in forced) ttsController.enqueueSegment(seg)
}
vadManager.forceFinalize()
}
Log.d(AppConfig.TAG, "processSamplesLoop stopped")
}
/**
* 初始化 LLM 管理器
*/
private fun initLLM() {
try {
Log.i(TAG_LLM, "initLLM called, useLocalLLM=$useLocalLLM")
llmManager?.destroy()
llmManager = null
if (useLocalLLM) {
// // 本地 LLM 初始化前,先暂停/释放重模块
// Log.i(AppConfig.TAG, "Pausing camera and releasing face detection before LLM initialization")
// stopCameraPreviewAndDetection()
// try {
// faceDetectionPipeline.release()
// Log.i(AppConfig.TAG, "Face detection pipeline released")
// } catch (e: Exception) {
// Log.w(AppConfig.TAG, "Failed to release face detection pipeline: ${e.message}")
// }
// // 释放 VAD 管理器
// try {
// vadManager.release()
// Log.i(AppConfig.TAG, "VAD manager released")
// } catch (e: Exception) {
// Log.w(AppConfig.TAG, "Failed to release VAD manager: ${e.message}")
// }
val modelPath = FileHelper.getLLMModelPath(applicationContext)
if (!File(modelPath).exists()) {
throw IllegalStateException("RKLLM model file missing: $modelPath")
}
Log.i(AppConfig.TAG, "Initializing LLM with model path: $modelPath")
val localLlmResponseBuffer = StringBuilder()
llmManager = LLMManager(modelPath, object : LLMManagerCallback {
override fun onThinking(msg: String, finished: Boolean) {
// 处理思考过程
Log.d(TAG_LLM, "LOCAL onThinking finished=$finished msg=${msg.take(60)}")
runOnUiThread {
if (!finished && enableStreaming) {
uiManager.appendToUi("\n[LLM] 思考中: $msg\n")
}
}
}
override fun onResult(msg: String, finished: Boolean) {
// 处理生成结果
Log.d(TAG_LLM, "LOCAL onResult finished=$finished len=${msg.length}")
runOnUiThread {
if (!finished) {
localLlmResponseBuffer.append(msg)
if (enableStreaming) {
uiManager.appendToUi(msg)
}
} else {
val finalText = localLlmResponseBuffer.toString().trim()
localLlmResponseBuffer.setLength(0)
if (!enableStreaming && finalText.isNotEmpty()) {
uiManager.appendToUi("$finalText\n")
}
uiManager.appendToUi("\n\n[LLM] 生成完成\n")
llmInFlight = false
if (finalText.isNotEmpty()) {
ttsController.enqueueSegment(finalText)
ttsController.enqueueEnd()
} else {
Log.w(TAG_LLM, "LOCAL final text is empty, skip TTS enqueue")
}
}
}
}
})
Log.i(AppConfig.TAG, "LLM initialized successfully")
Log.i(TAG_LLM, "LOCAL LLM initialized")
} else {
// 使用云端 LLM不需要初始化本地 LLM
Log.i(AppConfig.TAG, "Using cloud LLM, skipping local LLM initialization")
Log.i(TAG_LLM, "CLOUD mode active")
}
} catch (e: Exception) {
Log.e(AppConfig.TAG, "Failed to initialize LLM: ${e.message}", e)
Log.e(TAG_LLM, "LOCAL init failed: ${e.message}", e)
useLocalLLM = false
runOnUiThread {
uiManager.setLLMSwitchChecked(false)
uiManager.showToast("LLM 初始化失败: ${e.message}", Toast.LENGTH_LONG)
uiManager.appendToUi("\n[错误] LLM 初始化失败: ${e.message}\n")
}
}
}
/**
* 使用 LLM 生成回复
*/
private fun generateResponse(userInput: String) {
try {
if (useLocalLLM) {
val systemPrompt = "你是一个友好的数字人助手,回答要简洁明了。"
Log.d(AppConfig.TAG, "Generating response for: $userInput")
val local = llmManager
if (local == null) {
Log.e(TAG_LLM, "LOCAL LLM manager is null, fallback to CLOUD")
cloudApiManager.callLLM(userInput)
return
}
Log.i(TAG_LLM, "LOCAL generateResponseWithSystem")
local.generateResponseWithSystem(systemPrompt, userInput)
} else {
// 使用云端 LLM
Log.d(AppConfig.TAG, "Using cloud LLM for response: $userInput")
Log.i(TAG_LLM, "CLOUD callLLM")
// 调用云端 LLM
cloudApiManager.callLLM(userInput)
}
} catch (e: Exception) {
Log.e(AppConfig.TAG, "Failed to generate response: ${e.message}", e)
Log.e(TAG_LLM, "generateResponse failed: ${e.message}", e)
runOnUiThread {
uiManager.appendToUi("\n\n[Error] LLM 生成失败: ${e.message}\n")
llmInFlight = false
}
}
}
}

View File

@@ -34,6 +34,25 @@ object AppConfig {
const val MAX_TEXT_LENGTH = 50
const val MODEL_DIR = "sensevoice_models"
}
object Face {
const val MODEL_DIR = "RetinaFace"
const val MODEL_NAME = "RetinaFace_mobile320.rknn"
const val INPUT_SIZE = 320
const val SCORE_THRESHOLD = 0.6f
const val NMS_THRESHOLD = 0.4f
const val TRACK_IOU_THRESHOLD = 0.45f
const val STABLE_MS = 1000L
const val FRONTAL_MIN_FACE_SIZE = 90f
const val FRONTAL_MAX_ASPECT_DIFF = 0.35f
}
object FaceRecognition {
const val MODEL_DIR = "Insightface"
const val MODEL_NAME = "ms1mv3_arcface_r18.rknn"
const val SIMILARITY_THRESHOLD = 0.5f
const val GREETING_COOLDOWN_MS = 6000L
}
object Audio {
const val GAIN_SMOOTHING_FACTOR = 0.1f
@@ -48,4 +67,10 @@ object AppConfig {
const val MODEL_DIR = "live2d_model/Haru_pro_jp"
const val MODEL_JSON = "haru_greeter_t05.model3.json"
}
object QCloud {
const val APP_ID = "1302849512" // 替换为你的腾讯云APP_ID
const val SECRET_ID = "AKIDbBdyBGE5oPuIGA1iDlDYlFallaJ0YODB" // 替换为你的腾讯云SECRET_ID
const val SECRET_KEY = "32vhIl9OQIRclmLjvuleLp9LLAnFVYEp" // 替换为你的腾讯云SECRET_KEY
}
}

View File

@@ -0,0 +1,79 @@
package com.digitalperson.engine;
import android.content.Context;
import android.graphics.Bitmap;
import android.util.Log;
import com.digitalperson.config.AppConfig;
import com.digitalperson.util.FileHelper;
import java.io.File;
public class ArcFaceEngineRKNN {
private static final String TAG = "ArcFaceEngineRKNN";
static {
try {
System.loadLibrary("rknnrt");
System.loadLibrary("sensevoiceEngine");
Log.d(TAG, "Loaded native libs for ArcFace RKNN");
} catch (UnsatisfiedLinkError e) {
Log.e(TAG, "Failed to load native libraries for ArcFace", e);
throw e;
}
}
private final long nativePtr;
private boolean initialized = false;
private boolean released = false;
public ArcFaceEngineRKNN() {
nativePtr = createEngineNative();
if (nativePtr == 0) {
throw new RuntimeException("Failed to create native ArcFace engine");
}
}
public boolean initialize(Context context) {
if (released) return false;
File modelDir = FileHelper.copyInsightFaceAssets(context);
File modelFile = new File(modelDir, AppConfig.FaceRecognition.MODEL_NAME);
int ret = initNative(nativePtr, modelFile.getAbsolutePath());
initialized = ret == 0;
if (!initialized) {
Log.e(TAG, "ArcFace init failed, code=" + ret + ", model=" + modelFile.getAbsolutePath());
}
return initialized;
}
public float[] extractEmbedding(Bitmap bitmap, float left, float top, float right, float bottom) {
Log.d(TAG, "extractEmbedding called: initialized=" + initialized + ", released=" + released + ", bitmap=" + (bitmap != null));
if (!initialized || released || bitmap == null) {
Log.w(TAG, "extractEmbedding failed: initialized=" + initialized + ", released=" + released + ", bitmap=" + (bitmap != null));
return new float[0];
}
float[] emb = extractEmbeddingNative(nativePtr, bitmap, left, top, right, bottom);
Log.d(TAG, "extractEmbeddingNative returned: " + (emb != null ? emb.length : "null"));
return emb != null ? emb : new float[0];
}
public void release() {
if (!released && nativePtr != 0) {
releaseNative(nativePtr);
}
released = true;
initialized = false;
}
private native long createEngineNative();
private native int initNative(long ptr, String modelPath);
private native float[] extractEmbeddingNative(
long ptr,
Bitmap bitmap,
float left,
float top,
float right,
float bottom
);
private native void releaseNative(long ptr);
}

View File

@@ -0,0 +1,77 @@
package com.digitalperson.engine;
import android.content.Context;
import android.graphics.Bitmap;
import android.util.Log;
import com.digitalperson.config.AppConfig;
import com.digitalperson.util.FileHelper;
import java.io.File;
public class RetinaFaceEngineRKNN {
private static final String TAG = "RetinaFaceEngineRKNN";
static {
try {
System.loadLibrary("rknnrt");
System.loadLibrary("sensevoiceEngine");
Log.d(TAG, "Loaded native libs for RetinaFace RKNN");
} catch (UnsatisfiedLinkError e) {
Log.e(TAG, "Failed to load native libraries for RetinaFace", e);
throw e;
}
}
private final long nativePtr;
private boolean initialized = false;
private boolean released = false;
public RetinaFaceEngineRKNN() {
nativePtr = createEngineNative();
if (nativePtr == 0) {
throw new RuntimeException("Failed to create native RetinaFace engine");
}
}
public boolean initialize(Context context) {
if (released) {
return false;
}
File modelDir = FileHelper.copyRetinaFaceAssets(context);
File modelFile = new File(modelDir, AppConfig.Face.MODEL_NAME);
int ret = initNative(
nativePtr,
modelFile.getAbsolutePath(),
AppConfig.Face.INPUT_SIZE,
AppConfig.Face.SCORE_THRESHOLD,
AppConfig.Face.NMS_THRESHOLD
);
initialized = ret == 0;
if (!initialized) {
Log.e(TAG, "RetinaFace init failed, code=" + ret + ", model=" + modelFile.getAbsolutePath());
}
return initialized;
}
public float[] detect(Bitmap bitmap) {
if (!initialized || released || bitmap == null) {
return new float[0];
}
float[] raw = detectNative(nativePtr, bitmap);
return raw != null ? raw : new float[0];
}
public void release() {
if (!released && nativePtr != 0) {
releaseNative(nativePtr);
}
released = true;
initialized = false;
}
private native long createEngineNative();
private native int initNative(long ptr, String modelPath, int inputSize, float scoreThreshold, float nmsThreshold);
private native float[] detectNative(long ptr, Bitmap bitmap);
private native void releaseNative(long ptr);
}

View File

@@ -0,0 +1,223 @@
package com.digitalperson.face
import android.content.Context
import android.graphics.Bitmap
import android.util.Log
import com.digitalperson.config.AppConfig
import com.digitalperson.engine.RetinaFaceEngineRKNN
import java.util.concurrent.atomic.AtomicBoolean
import kotlin.math.abs
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.cancel
import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
data class FaceBox(
val left: Float,
val top: Float,
val right: Float,
val bottom: Float,
val score: Float,
)
data class FaceDetectionResult(
val sourceWidth: Int,
val sourceHeight: Int,
val faces: List<FaceBox>,
)
class FaceDetectionPipeline(
private val context: Context,
private val onResult: (FaceDetectionResult) -> Unit,
private val onGreeting: (String) -> Unit,
) {
private val engine = RetinaFaceEngineRKNN()
private val recognizer = FaceRecognizer(context)
private val scope = CoroutineScope(SupervisorJob() + Dispatchers.Default)
private val frameInFlight = AtomicBoolean(false)
private val initialized = AtomicBoolean(false)
private var trackFace: FaceBox? = null
private var trackId: Long = 0
private var trackStableSinceMs: Long = 0
private var greetedTrackId: Long = -1
private var lastGreetMs: Long = 0
fun initialize(): Boolean {
val detectorOk = engine.initialize(context)
val recognizerOk = recognizer.initialize()
val ok = detectorOk && recognizerOk
initialized.set(ok)
Log.i(AppConfig.TAG, "Face pipeline initialize result=$ok detector=$detectorOk recognizer=$recognizerOk")
return ok
}
fun submitFrame(bitmap: Bitmap) {
if (!initialized.get()) {
bitmap.recycle()
return
}
if (!frameInFlight.compareAndSet(false, true)) {
bitmap.recycle()
return
}
scope.launch {
try {
val width = bitmap.width
val height = bitmap.height
val raw = engine.detect(bitmap)
val faceCount = raw.size / 5
val faces = ArrayList<FaceBox>(faceCount)
var i = 0
while (i + 4 < raw.size) {
faces.add(
FaceBox(
left = raw[i],
top = raw[i + 1],
right = raw[i + 2],
bottom = raw[i + 3],
score = raw[i + 4],
)
)
i += 5
}
// 过滤太小的人脸
val minFaceSize = 50 // 最小人脸大小(像素)
val filteredFaces = faces.filter { face ->
val width = face.right - face.left
val height = face.bottom - face.top
width > minFaceSize && height > minFaceSize
}
// if (filteredFaces.isNotEmpty()) {
// Log.d(
// AppConfig.TAG,"[Face] filtered detected ${filteredFaces.size} face(s)"
// )
// }
maybeRecognizeAndGreet(bitmap, filteredFaces)
withContext(Dispatchers.Main) {
onResult(FaceDetectionResult(width, height, filteredFaces))
}
} catch (t: Throwable) {
Log.e(AppConfig.TAG, "Face detection pipeline failed: ${t.message}", t)
} finally {
bitmap.recycle()
frameInFlight.set(false)
}
}
}
private suspend fun maybeRecognizeAndGreet(bitmap: Bitmap, faces: List<FaceBox>) {
val now = System.currentTimeMillis()
if (faces.isEmpty()) {
trackFace = null
trackStableSinceMs = 0
return
}
val primary = faces.maxByOrNull { (it.right - it.left) * (it.bottom - it.top) } ?: return
val prev = trackFace
if (prev == null || iou(prev, primary) < AppConfig.Face.TRACK_IOU_THRESHOLD) {
trackId += 1
greetedTrackId = -1
trackStableSinceMs = now
}
trackFace = primary
val stableMs = now - trackStableSinceMs
val frontal = isFrontal(primary, bitmap.width, bitmap.height)
val coolingDown = (now - lastGreetMs) < AppConfig.FaceRecognition.GREETING_COOLDOWN_MS
if (stableMs < AppConfig.Face.STABLE_MS || !frontal || greetedTrackId == trackId || coolingDown) {
return
}
val match = recognizer.identify(bitmap, primary)
Log.d(AppConfig.TAG, "[Face] Recognition result: matchedName=${match.matchedName}, similarity=${match.similarity}")
// 检查是否需要保存新人脸
if (match.matchedName.isNullOrBlank()) {
Log.d(AppConfig.TAG, "[Face] No match found, attempting to add new face")
// 提取人脸特征
val embedding = extractEmbedding(bitmap, primary)
Log.d(AppConfig.TAG, "[Face] Extracted embedding size: ${embedding.size}")
if (embedding.isNotEmpty()) {
// 尝试添加新人脸
val added = recognizer.addNewFace(embedding)
Log.d(AppConfig.TAG, "[Face] Add new face result: $added")
if (added) {
Log.i(AppConfig.TAG, "[Face] New face added to database")
} else {
Log.i(AppConfig.TAG, "[Face] Face already exists in database (similar face found)")
}
} else {
Log.w(AppConfig.TAG, "[Face] Failed to extract embedding")
}
} else {
Log.d(AppConfig.TAG, "[Face] Matched existing face: ${match.matchedName}")
}
val greeting = if (!match.matchedName.isNullOrBlank()) {
"你好,${match.matchedName}"
} else {
"你好,很高兴见到你。"
}
greetedTrackId = trackId
lastGreetMs = now
Log.i(
AppConfig.TAG,
"[Face] greeting track=$trackId stable=${stableMs}ms frontal=$frontal matched=${match.matchedName} score=${match.similarity}"
)
withContext(Dispatchers.Main) {
onGreeting(greeting)
}
}
private fun extractEmbedding(bitmap: Bitmap, face: FaceBox): FloatArray {
return recognizer.extractEmbedding(bitmap, face)
}
private fun isFrontal(face: FaceBox, frameW: Int, frameH: Int): Boolean {
val w = face.right - face.left
val h = face.bottom - face.top
if (w < AppConfig.Face.FRONTAL_MIN_FACE_SIZE || h < AppConfig.Face.FRONTAL_MIN_FACE_SIZE) {
return false
}
val aspectDiff = abs((w / h) - 1f)
if (aspectDiff > AppConfig.Face.FRONTAL_MAX_ASPECT_DIFF) {
return false
}
val cx = (face.left + face.right) * 0.5f
val cy = (face.top + face.bottom) * 0.5f
val minX = frameW * 0.15f
val maxX = frameW * 0.85f
val minY = frameH * 0.15f
val maxY = frameH * 0.85f
return cx in minX..maxX && cy in minY..maxY
}
private fun iou(a: FaceBox, b: FaceBox): Float {
val left = maxOf(a.left, b.left)
val top = maxOf(a.top, b.top)
val right = minOf(a.right, b.right)
val bottom = minOf(a.bottom, b.bottom)
val w = maxOf(0f, right - left)
val h = maxOf(0f, bottom - top)
val inter = w * h
val areaA = maxOf(0f, a.right - a.left) * maxOf(0f, a.bottom - a.top)
val areaB = maxOf(0f, b.right - b.left) * maxOf(0f, b.bottom - b.top)
val union = areaA + areaB - inter
return if (union <= 0f) 0f else inter / union
}
fun release() {
scope.cancel()
engine.release()
recognizer.release()
initialized.set(false)
}
}

View File

@@ -0,0 +1,93 @@
package com.digitalperson.face
import android.content.ContentValues
import android.content.Context
import android.database.sqlite.SQLiteDatabase
import android.database.sqlite.SQLiteOpenHelper
import android.util.Log
import com.digitalperson.config.AppConfig
import java.nio.ByteBuffer
import java.nio.ByteOrder
data class FaceProfile(
val id: Long,
val name: String,
val embedding: FloatArray,
)
class FaceFeatureStore(context: Context) : SQLiteOpenHelper(context, DB_NAME, null, DB_VERSION) {
override fun onCreate(db: SQLiteDatabase) {
db.execSQL(
"""
CREATE TABLE IF NOT EXISTS face_profiles (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
embedding BLOB NOT NULL,
updated_at INTEGER NOT NULL
)
""".trimIndent()
)
}
override fun onUpgrade(db: SQLiteDatabase, oldVersion: Int, newVersion: Int) {
db.execSQL("DROP TABLE IF EXISTS face_profiles")
onCreate(db)
}
fun loadAllProfiles(): List<FaceProfile> {
val db = readableDatabase
val list = ArrayList<FaceProfile>()
db.rawQuery("SELECT id, name, embedding FROM face_profiles", null).use { c ->
val idIdx = c.getColumnIndexOrThrow("id")
val nameIdx = c.getColumnIndexOrThrow("name")
val embIdx = c.getColumnIndexOrThrow("embedding")
while (c.moveToNext()) {
val embBlob = c.getBlob(embIdx) ?: continue
list.add(
FaceProfile(
id = c.getLong(idIdx),
name = c.getString(nameIdx),
embedding = blobToFloatArray(embBlob),
)
)
}
}
return list
}
fun upsertProfile(name: String, embedding: FloatArray) {
// 确保名字不为null使用空字符串作为默认值
val safeName = name.takeIf { it.isNotBlank() } ?: ""
val values = ContentValues().apply {
put("name", safeName)
put("embedding", floatArrayToBlob(embedding))
put("updated_at", System.currentTimeMillis())
}
val rowId = writableDatabase.insertWithOnConflict(
"face_profiles",
null,
values,
SQLiteDatabase.CONFLICT_REPLACE
)
Log.i(AppConfig.TAG, "[FaceFeatureStore] upsertProfile name='$safeName' rowId=$rowId dim=${embedding.size}")
}
private fun floatArrayToBlob(values: FloatArray): ByteArray {
val buf = ByteBuffer.allocate(values.size * 4).order(ByteOrder.LITTLE_ENDIAN)
for (v in values) buf.putFloat(v)
return buf.array()
}
private fun blobToFloatArray(blob: ByteArray): FloatArray {
if (blob.isEmpty()) return FloatArray(0)
val buf = ByteBuffer.wrap(blob).order(ByteOrder.LITTLE_ENDIAN)
val out = FloatArray(blob.size / 4)
for (i in out.indices) out[i] = buf.getFloat()
return out
}
companion object {
private const val DB_NAME = "face_feature.db"
private const val DB_VERSION = 1
}
}

View File

@@ -0,0 +1,61 @@
package com.digitalperson.face
import android.content.Context
import android.graphics.Canvas
import android.graphics.Color
import android.graphics.Paint
import android.graphics.RectF
import android.util.AttributeSet
import android.view.View
class FaceOverlayView @JvmOverloads constructor(
context: Context,
attrs: AttributeSet? = null,
) : View(context, attrs) {
private val boxPaint = Paint(Paint.ANTI_ALIAS_FLAG).apply {
color = Color.GREEN
style = Paint.Style.STROKE
strokeWidth = 4f
}
private val textPaint = Paint(Paint.ANTI_ALIAS_FLAG).apply {
color = Color.GREEN
textSize = 28f
}
@Volatile
private var latestResult: FaceDetectionResult? = null
fun updateResult(result: FaceDetectionResult) {
latestResult = result
postInvalidateOnAnimation()
}
override fun onDraw(canvas: Canvas) {
super.onDraw(canvas)
val result = latestResult ?: return
if (result.sourceWidth <= 0 || result.sourceHeight <= 0) return
val srcW = result.sourceWidth.toFloat()
val srcH = result.sourceHeight.toFloat()
val viewW = width.toFloat()
val viewH = height.toFloat()
if (viewW <= 0f || viewH <= 0f) return
val scale = minOf(viewW / srcW, viewH / srcH)
val dx = (viewW - srcW * scale) / 2f
val dy = (viewH - srcH * scale) / 2f
for (face in result.faces) {
val rect = RectF(
dx + face.left * scale,
dy + face.top * scale,
dx + face.right * scale,
dy + face.bottom * scale,
)
canvas.drawRect(rect, boxPaint)
canvas.drawText(String.format("%.2f", face.score), rect.left, rect.top - 8f, textPaint)
}
}
}

View File

@@ -0,0 +1,129 @@
package com.digitalperson.face
import android.content.Context
import android.graphics.Bitmap
import android.util.Log
import com.digitalperson.config.AppConfig
import com.digitalperson.engine.ArcFaceEngineRKNN
import kotlin.math.sqrt
data class FaceRecognitionResult(
val matchedName: String?,
val similarity: Float,
val embeddingDim: Int,
)
class FaceRecognizer(context: Context) {
private val appContext = context.applicationContext
private val engine = ArcFaceEngineRKNN()
private val store = FaceFeatureStore(appContext)
private val cache = ArrayList<FaceProfile>()
@Volatile
private var initialized = false
fun initialize(): Boolean {
Log.d(AppConfig.TAG, "[FaceRecognizer] initialize: starting...")
val ok = engine.initialize(appContext)
Log.d(AppConfig.TAG, "[FaceRecognizer] initialize: engine.initialize() returned $ok")
if (!ok) {
initialized = false
Log.e(AppConfig.TAG, "[FaceRecognizer] initialize: failed - engine initialization failed")
return false
}
cache.clear()
val profiles = store.loadAllProfiles()
cache.addAll(profiles)
initialized = true
Log.i(AppConfig.TAG, "[FaceRecognizer] initialized, profiles=${cache.size}")
return true
}
fun identify(bitmap: Bitmap, face: FaceBox): FaceRecognitionResult {
if (!initialized) return FaceRecognitionResult(null, 0f, 0)
val embedding = extractEmbedding(bitmap, face)
if (embedding.isEmpty()) return FaceRecognitionResult(null, 0f, 0)
var bestName: String? = null
var bestScore = -1f
for (p in cache) {
if (p.embedding.size != embedding.size) continue
val score = cosineSimilarity(embedding, p.embedding)
if (score > bestScore) {
bestScore = score
bestName = p.name
}
}
if (bestScore >= AppConfig.FaceRecognition.SIMILARITY_THRESHOLD) {
return FaceRecognitionResult(bestName, bestScore, embedding.size)
}
return FaceRecognitionResult(null, bestScore, embedding.size)
}
fun extractEmbedding(bitmap: Bitmap, face: FaceBox): FloatArray {
if (!initialized) return FloatArray(0)
return engine.extractEmbedding(bitmap, face.left, face.top, face.right, face.bottom)
}
fun addOrUpdateProfile(name: String?, embedding: FloatArray) {
val normalized = normalize(embedding)
store.upsertProfile(name ?: "", normalized)
// 移除旧的记录(如果存在)
if (name != null) {
cache.removeAll { it.name == name }
}
cache.add(FaceProfile(id = -1L, name = name ?: "", embedding = normalized))
}
fun addNewFace(embedding: FloatArray): Boolean {
Log.d(AppConfig.TAG, "[FaceRecognizer] addNewFace: embedding size=${embedding.size}, cache size=${cache.size}")
// 检查是否已经存在相似的人脸
for (p in cache) {
if (p.embedding.size != embedding.size) {
Log.d(AppConfig.TAG, "[FaceRecognizer] Skipping profile with different embedding size: ${p.embedding.size}")
continue
}
val score = cosineSimilarity(embedding, p.embedding)
Log.d(AppConfig.TAG, "[FaceRecognizer] Comparing with profile '${p.name}': similarity=$score, threshold=${AppConfig.FaceRecognition.SIMILARITY_THRESHOLD}")
if (score >= AppConfig.FaceRecognition.SIMILARITY_THRESHOLD) {
// 已经存在相似的人脸,不需要添加
Log.i(AppConfig.TAG, "[FaceRecognizer] Similar face found: ${p.name} with similarity=$score, not adding new face")
return false
}
}
// 添加新人脸名字为null
Log.i(AppConfig.TAG, "[FaceRecognizer] No similar face found, adding new face")
addOrUpdateProfile(null, embedding)
return true
}
fun release() {
initialized = false
engine.release()
store.close()
}
private fun cosineSimilarity(a: FloatArray, b: FloatArray): Float {
var dot = 0f
var na = 0f
var nb = 0f
for (i in a.indices) {
dot += a[i] * b[i]
na += a[i] * a[i]
nb += b[i] * b[i]
}
if (na <= 1e-12f || nb <= 1e-12f) return -1f
return (dot / (sqrt(na) * sqrt(nb))).coerceIn(-1f, 1f)
}
private fun normalize(v: FloatArray): FloatArray {
var sum = 0f
for (x in v) sum += x * x
val norm = sqrt(sum.coerceAtLeast(1e-12f))
val out = FloatArray(v.size)
for (i in v.indices) out[i] = v[i] / norm
return out
}
}

View File

@@ -0,0 +1,87 @@
package com.digitalperson.face
import android.graphics.Bitmap
import android.graphics.BitmapFactory
import android.graphics.ImageFormat
import android.graphics.Matrix
import android.graphics.Rect
import android.graphics.YuvImage
import androidx.camera.core.ImageProxy
import java.io.ByteArrayOutputStream
object ImageProxyBitmapConverter {
fun toBitmap(image: ImageProxy): Bitmap? {
val nv21 = yuv420ToNv21(image) ?: return null
val yuvImage = YuvImage(nv21, ImageFormat.NV21, image.width, image.height, null)
val out = ByteArrayOutputStream()
if (!yuvImage.compressToJpeg(Rect(0, 0, image.width, image.height), 80, out)) {
return null
}
val bytes = out.toByteArray()
var bitmap = BitmapFactory.decodeByteArray(bytes, 0, bytes.size) ?: return null
if (bitmap.config != Bitmap.Config.ARGB_8888) {
val converted = bitmap.copy(Bitmap.Config.ARGB_8888, false)
bitmap.recycle()
bitmap = converted
}
val matrix = Matrix()
// 前置摄像头需要水平翻转
// 注意:这里假设我们使用的是前置摄像头
// 如果需要支持后置摄像头,需要根据实际使用的摄像头类型来决定是否翻转
matrix.postScale(-1f, 1f, bitmap.width / 2f, bitmap.height / 2f)
// 处理旋转
val rotation = image.imageInfo.rotationDegrees
if (rotation != 0) {
matrix.postRotate(rotation.toFloat())
}
// 应用变换
val transformed = Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true)
bitmap.recycle()
bitmap = transformed
return bitmap
}
private fun yuv420ToNv21(image: ImageProxy): ByteArray? {
val planes = image.planes
if (planes.size < 3) return null
val width = image.width
val height = image.height
val ySize = width * height
val uvSize = width * height / 4
val nv21 = ByteArray(ySize + uvSize * 2)
val yPlane = planes[0]
val yBuffer = yPlane.buffer
val yRowStride = yPlane.rowStride
var dst = 0
for (row in 0 until height) {
yBuffer.position(row * yRowStride)
yBuffer.get(nv21, dst, width)
dst += width
}
val uPlane = planes[1]
val vPlane = planes[2]
val uBuffer = uPlane.buffer
val vBuffer = vPlane.buffer
val uRowStride = uPlane.rowStride
val vRowStride = vPlane.rowStride
val uPixelStride = uPlane.pixelStride
val vPixelStride = vPlane.pixelStride
for (row in 0 until height / 2) {
for (col in 0 until width / 2) {
val uIndex = row * uRowStride + col * uPixelStride
val vIndex = row * vRowStride + col * vPixelStride
nv21[dst++] = vBuffer.get(vIndex)
nv21[dst++] = uBuffer.get(uIndex)
}
}
return nv21
}
}

View File

@@ -7,6 +7,7 @@ class Live2DAvatarManager(private val glSurfaceView: GLSurfaceView) {
init {
glSurfaceView.setEGLContextClientVersion(2)
glSurfaceView.setPreserveEGLContextOnPause(true)
glSurfaceView.setRenderer(renderer)
glSurfaceView.renderMode = GLSurfaceView.RENDERMODE_CONTINUOUSLY
}
@@ -16,11 +17,15 @@ class Live2DAvatarManager(private val glSurfaceView: GLSurfaceView) {
}
fun setMood(mood: String) {
renderer.setMood(mood)
glSurfaceView.queueEvent {
renderer.setMood(mood)
}
}
fun startSpecificMotion(motionName: String) {
renderer.startSpecificMotion(motionName)
glSurfaceView.queueEvent {
renderer.startSpecificMotion(motionName)
}
}
fun onResume() {
@@ -32,6 +37,8 @@ class Live2DAvatarManager(private val glSurfaceView: GLSurfaceView) {
}
fun release() {
renderer.release()
glSurfaceView.queueEvent {
renderer.release()
}
}
}

View File

@@ -214,32 +214,8 @@ class Live2DCharacter : CubismUserModel() {
}
private fun loadMoodMotions(assets: AssetManager, modelDir: String) {
// 开心心情动作
moodMotions["开心"] = listOf(
"haru_g_m22.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m22.motion3.json"),
"haru_g_m21.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m21.motion3.json"),
"haru_g_m18.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m18.motion3.json")
).mapNotNull { (fileName, motion) ->
motion?.let {
motionFileMap[it] = fileName
it
}
}
// 伤心心情动作
moodMotions["伤心"] = listOf(
"haru_g_m25.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m25.motion3.json"),
"haru_g_m24.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m24.motion3.json"),
"haru_g_m05.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m05.motion3.json")
).mapNotNull { (fileName, motion) ->
motion?.let {
motionFileMap[it] = fileName
it
}
}
// 平和心情动作
moodMotions["平和"] = listOf(
// 中性心情动作
moodMotions["中性"] = listOf(
"haru_g_m15.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m15.motion3.json"),
"haru_g_m07.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m07.motion3.json"),
"haru_g_m06.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m06.motion3.json"),
@@ -252,8 +228,50 @@ class Live2DCharacter : CubismUserModel() {
}
}
// 惊讶心情动作
moodMotions["惊讶"] = listOf(
// 悲伤心情动作
moodMotions["悲伤"] = listOf(
"haru_g_m25.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m25.motion3.json"),
"haru_g_m24.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m24.motion3.json"),
"haru_g_m05.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m05.motion3.json"),
"haru_g_m16.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m16.motion3.json"),
"haru_g_m20.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m20.motion3.json"),
).mapNotNull { (fileName, motion) ->
motion?.let {
motionFileMap[it] = fileName
it
}
}
// 高兴心情动作
moodMotions["高兴"] = listOf(
"haru_g_m22.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m22.motion3.json"),
"haru_g_m21.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m21.motion3.json"),
"haru_g_m18.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m18.motion3.json"),
"haru_g_m09.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m09.motion3.json"),
"haru_g_m08.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m08.motion3.json")
).mapNotNull { (fileName, motion) ->
motion?.let {
motionFileMap[it] = fileName
it
}
}
// 生气心情动作
moodMotions["生气"] = listOf(
"haru_g_m10.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m10.motion3.json"),
"haru_g_m11.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m11.motion3.json"),
"haru_g_m04.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m04.motion3.json"),
"haru_g_m03.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m03.motion3.json"),
).mapNotNull { (fileName, motion) ->
motion?.let {
motionFileMap[it] = fileName
it
}
}
// 恐惧心情动作
moodMotions["恐惧"] = listOf(
"haru_g_m26.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m26.motion3.json"),
"haru_g_m12.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m12.motion3.json")
).mapNotNull { (fileName, motion) ->
@@ -263,18 +281,8 @@ class Live2DCharacter : CubismUserModel() {
}
}
// 关心心情动作
moodMotions["关心"] = listOf(
"haru_g_m17.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m17.motion3.json")
).mapNotNull { (fileName, motion) ->
motion?.let {
motionFileMap[it] = fileName
it
}
}
// 害羞心情动作
moodMotions["害羞"] = listOf(
// 撒娇心情动作
moodMotions["撒娇"] = listOf(
"haru_g_m19.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m19.motion3.json")
).mapNotNull { (fileName, motion) ->
motion?.let {
@@ -282,6 +290,38 @@ class Live2DCharacter : CubismUserModel() {
it
}
}
// 震惊心情动作
moodMotions["震惊"] = listOf(
"haru_g_m26.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m26.motion3.json"),
"haru_g_m12.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m12.motion3.json")
).mapNotNull { (fileName, motion) ->
motion?.let {
motionFileMap[it] = fileName
it
}
}
// 厌恶心情动作
moodMotions["厌恶"] = listOf(
"haru_g_m14.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m14.motion3.json"),
"haru_g_m13.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m13.motion3.json")
).mapNotNull { (fileName, motion) ->
motion?.let {
motionFileMap[it] = fileName
it
}
}
// 兼容旧的心情名称
moodMotions["开心"] = moodMotions["高兴"] ?: emptyList()
moodMotions["伤心"] = moodMotions["悲伤"] ?: emptyList()
moodMotions["平和"] = moodMotions["平静"] ?: emptyList()
moodMotions["惊讶"] = moodMotions["震惊"] ?: emptyList()
moodMotions["关心"] = moodMotions["中性"] ?: emptyList()
moodMotions["害羞"] = moodMotions["撒娇"] ?: emptyList()
}
private fun loadSpecificMotions(assets: AssetManager, modelDir: String) {

View File

@@ -14,6 +14,9 @@ import javax.microedition.khronos.opengles.GL10
class Live2DRenderer(
private val context: Context
) : GLSurfaceView.Renderer {
companion object {
private const val TAG = "Live2DRenderer"
}
@Volatile
private var speaking = false
@@ -25,6 +28,7 @@ class Live2DRenderer(
GLES20.glClearColor(0f, 0f, 0f, 0f)
ensureFrameworkInitialized()
startTimeMs = SystemClock.elapsedRealtime()
Log.i(TAG, "onSurfaceCreated")
runCatching {
val model = Live2DCharacter()
@@ -35,6 +39,7 @@ class Live2DRenderer(
)
model.bindTextures(context.assets, AppConfig.Avatar.MODEL_DIR)
character = model
Log.i(TAG, "Live2D model loaded and textures bound")
}.onFailure {
Log.e(AppConfig.TAG, "Load Live2D model failed: ${it.message}", it)
character = null

View File

@@ -0,0 +1,46 @@
package com.digitalperson.llm
interface LLMManagerCallback {
fun onThinking(msg: String, finished: Boolean)
fun onResult(msg: String, finished: Boolean)
}
class LLMManager(modelPath: String, callback: LLMManagerCallback) :
RKLLM(modelPath, object : LLMCallback {
var inThinking = false
override fun onCallback(data: String, state: LLMCallback.State) {
if (state == LLMCallback.State.NORMAL) {
if (data == "<think>") {
inThinking = true
return
} else if (data == "</think>") {
inThinking = false
callback.onThinking("", true)
return
}
if (inThinking) {
callback.onThinking(data, false)
} else {
if (data == "\n") return
callback.onResult(data, false)
}
} else {
callback.onThinking("", true)
callback.onResult("", true)
}
}
})
{
fun generateResponse(prompt: String) {
val msg = "<User>$prompt<Assistant>"
say(msg)
}
fun generateResponseWithSystem(systemPrompt: String, userPrompt: String) {
val msg = "<System>$systemPrompt<User>$userPrompt<Assistant>"
say(msg)
}
}

View File

@@ -0,0 +1,52 @@
package com.digitalperson.llm
interface LLMCallback {
enum class State {
ERROR, NORMAL, FINISH
}
fun onCallback(data: String, state: State)
}
open class RKLLM(modelPath: String, callback: LLMCallback) {
companion object {
init {
System.loadLibrary("rkllmrt")
}
}
private var mInstance: Long
private var mCallback: LLMCallback
init {
mInstance = initLLM(modelPath)
mCallback = callback
if (mInstance == 0L) {
throw IllegalStateException("RKLLM init failed: native handle is null")
}
}
fun destroy() {
deinitLLM(mInstance)
mInstance = 0
}
protected fun say(text: String) {
if (mInstance == 0L) {
mCallback.onCallback("RKLLM is not initialized", LLMCallback.State.ERROR)
return
}
infer(mInstance, text)
}
fun callbackFromNative(data: String, state: Int) {
var s = LLMCallback.State.ERROR
s = if (state == 0) LLMCallback.State.FINISH
else if (state < 0) LLMCallback.State.ERROR
else LLMCallback.State.NORMAL
mCallback.onCallback(data, s)
}
private external fun initLLM(modelPath: String): Long
private external fun deinitLLM(handle: Long)
private external fun infer(handle: Long, text: String)
}

View File

@@ -0,0 +1,330 @@
package com.digitalperson.tts
import android.content.Context
import android.media.AudioAttributes
import android.media.AudioFormat
import android.media.AudioManager
import android.media.AudioTrack
import android.util.Log
import com.digitalperson.config.AppConfig
import com.digitalperson.mood.MoodManager
import com.tencent.cloud.realtime.tts.RealTimeSpeechSynthesizer
import com.tencent.cloud.realtime.tts.RealTimeSpeechSynthesizerListener
import com.tencent.cloud.realtime.tts.RealTimeSpeechSynthesizerRequest
import com.tencent.cloud.realtime.tts.SpeechSynthesizerResponse
import com.tencent.cloud.realtime.tts.core.ws.Credential
import com.tencent.cloud.realtime.tts.core.ws.SpeechClient
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.launch
import java.nio.ByteBuffer
import java.util.UUID
import java.util.concurrent.LinkedBlockingQueue
import java.util.concurrent.atomic.AtomicBoolean
class QCloudTtsManager(private val context: Context) {
companion object {
private const val TAG = "QCloudTtsManager"
private const val SAMPLE_RATE = 16000
private val proxy = SpeechClient()
}
private var audioTrack: AudioTrack? = null
private var synthesizer: RealTimeSpeechSynthesizer? = null
private sealed class TtsQueueItem {
data class Segment(val text: String) : TtsQueueItem()
data object End : TtsQueueItem()
}
private val ttsQueue = LinkedBlockingQueue<TtsQueueItem>()
private val ttsStopped = AtomicBoolean(false)
private val ttsWorkerRunning = AtomicBoolean(false)
private val ttsPlaying = AtomicBoolean(false)
private val interrupting = AtomicBoolean(false)
private val ioScope = CoroutineScope(Dispatchers.IO)
interface TtsCallback {
fun onTtsStarted(text: String)
fun onTtsCompleted()
fun onTtsSegmentCompleted(durationMs: Long)
fun isTtsStopped(): Boolean
fun onClearAsrQueue()
fun onSetSpeaking(speaking: Boolean)
fun onEndTurn()
}
private var callback: TtsCallback? = null
fun setCallback(callback: TtsCallback) {
this.callback = callback
}
fun init(): Boolean {
return try {
initAudioTrack()
true
} catch (e: Exception) {
Log.e(TAG, "Init QCloud TTS failed: ${e.message}", e)
false
}
}
private fun initAudioTrack() {
val bufferSize = AudioTrack.getMinBufferSize(
SAMPLE_RATE,
AudioFormat.CHANNEL_OUT_MONO,
AudioFormat.ENCODING_PCM_16BIT
)
val attr = AudioAttributes.Builder()
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
.setUsage(AudioAttributes.USAGE_MEDIA)
.build()
val format = AudioFormat.Builder()
.setEncoding(AudioFormat.ENCODING_PCM_16BIT)
.setChannelMask(AudioFormat.CHANNEL_OUT_MONO)
.setSampleRate(SAMPLE_RATE)
.build()
audioTrack = AudioTrack(
attr,
format,
bufferSize,
AudioTrack.MODE_STREAM,
AudioManager.AUDIO_SESSION_ID_GENERATE
)
}
fun enqueueSegment(seg: String) {
if (ttsStopped.get()) {
ttsStopped.set(false)
}
val cleanedSeg = seg.trimEnd('.', '。', '!', '', '?', '', ',', '', ';', '', ':', '')
ttsQueue.offer(TtsQueueItem.Segment(cleanedSeg))
ensureTtsWorker()
}
fun enqueueEnd() {
ttsQueue.offer(TtsQueueItem.End)
}
fun isPlaying(): Boolean = ttsPlaying.get()
fun reset() {
val workerRunning = ttsWorkerRunning.get()
val wasStopped = ttsStopped.get()
ttsStopped.set(false)
ttsPlaying.set(false)
ttsQueue.clear()
if (wasStopped && workerRunning) {
ttsQueue.offer(TtsQueueItem.End)
}
}
fun stop() {
ttsStopped.set(true)
ttsPlaying.set(false)
ttsQueue.clear()
ttsQueue.offer(TtsQueueItem.End)
try {
synthesizer?.cancel()
synthesizer = null
audioTrack?.pause()
audioTrack?.flush()
} catch (_: Throwable) {
}
}
fun interruptForNewTurn(waitTimeoutMs: Long = 300): Boolean {
if (!interrupting.compareAndSet(false, true)) return false
try {
val hadPendingPlayback = ttsPlaying.get() || ttsWorkerRunning.get() || ttsQueue.isNotEmpty()
if (!hadPendingPlayback) {
ttsStopped.set(false)
ttsPlaying.set(false)
return false
}
ttsStopped.set(true)
ttsPlaying.set(false)
ttsQueue.clear()
ttsQueue.offer(TtsQueueItem.End)
try {
synthesizer?.cancel()
synthesizer = null
audioTrack?.pause()
audioTrack?.flush()
} catch (_: Throwable) {
}
val deadline = System.currentTimeMillis() + waitTimeoutMs
while (ttsWorkerRunning.get() && System.currentTimeMillis() < deadline) {
Thread.sleep(10)
}
if (ttsWorkerRunning.get()) {
Log.w(TAG, "interruptForNewTurn timeout: worker still running")
}
ttsQueue.clear()
ttsStopped.set(false)
ttsPlaying.set(false)
callback?.onSetSpeaking(false)
return true
} finally {
interrupting.set(false)
}
}
fun release() {
try {
synthesizer?.cancel()
synthesizer = null
} catch (_: Throwable) {
}
try {
audioTrack?.release()
audioTrack = null
} catch (_: Throwable) {
}
}
private fun ensureTtsWorker() {
if (!ttsWorkerRunning.compareAndSet(false, true)) return
ioScope.launch {
try {
runTtsWorker()
} finally {
ttsWorkerRunning.set(false)
if (!ttsStopped.get() && ttsQueue.isNotEmpty()) {
ensureTtsWorker()
}
}
}
}
private fun runTtsWorker() {
val audioTrack = audioTrack ?: return
while (true) {
val item = ttsQueue.take()
if (ttsStopped.get()) break
when (item) {
is TtsQueueItem.Segment -> {
ttsPlaying.set(true)
callback?.onSetSpeaking(true)
Log.d(TAG, "QCloud TTS started: processing segment '${item.text}'")
callback?.onTtsStarted(item.text)
val startMs = System.currentTimeMillis()
try {
if (audioTrack.playState != AudioTrack.PLAYSTATE_PLAYING) {
audioTrack.play()
}
val credential = Credential(
AppConfig.QCloud.APP_ID,
AppConfig.QCloud.SECRET_ID,
AppConfig.QCloud.SECRET_KEY,
""
)
val request = RealTimeSpeechSynthesizerRequest()
request.setVolume(0f) // 音量大小,范围[-1010]
request.setSpeed(0f) // 语速,范围:[-26]
request.setCodec("pcm") // 返回音频格式pcm
request.setSampleRate(SAMPLE_RATE) // 音频采样率
request.setVoiceType(601010) // 音色ID
request.setEnableSubtitle(true) // 是否开启时间戳功能
// 根据当前心情设置情感类别
val currentMood = MoodManager.getCurrentMood()
val emotionCategory = when (currentMood) {
"中性" -> "neutral"
"悲伤" -> "sad"
"高兴" -> "happy"
"生气" -> "angry"
"恐惧" -> "fear"
"撒娇" -> "sajiao"
"震惊" -> "amaze"
"厌恶" -> "disgusted"
"平静" -> "peaceful"
// 兼容旧的心情名称
"开心" -> "happy"
"伤心" -> "sad"
"平和" -> "peaceful"
"惊讶" -> "amaze"
"关心" -> "neutral"
"害羞" -> "sajiao"
else -> "neutral"
}
request.setEmotionCategory(emotionCategory) // 控制合成音频的情感
request.setEmotionIntensity(100) // 控制合成音频情感程度
request.setSessionId(UUID.randomUUID().toString()) // sessionId
request.setText(item.text) // 合成文本
val listener = object : RealTimeSpeechSynthesizerListener() {
override fun onSynthesisStart(response: SpeechSynthesizerResponse) {
Log.d(TAG, "onSynthesisStart: ${response.sessionId}")
}
override fun onSynthesisEnd(response: SpeechSynthesizerResponse) {
Log.d(TAG, "onSynthesisEnd: ${response.sessionId}")
val ttsMs = System.currentTimeMillis() - startMs
callback?.onTtsSegmentCompleted(ttsMs)
}
override fun onAudioResult(buffer: ByteBuffer) {
val data = ByteArray(buffer.remaining())
buffer.get(data)
// 播放pcm
audioTrack.write(data, 0, data.size)
}
override fun onTextResult(response: SpeechSynthesizerResponse) {
Log.d(TAG, "onTextResult: ${response.sessionId}")
}
override fun onSynthesisCancel() {
Log.d(TAG, "onSynthesisCancel")
}
override fun onSynthesisFail(response: SpeechSynthesizerResponse) {
Log.e(TAG, "onSynthesisFail: ${response.sessionId}, error: ${response.message}")
}
}
synthesizer = RealTimeSpeechSynthesizer(proxy, credential, request, listener)
synthesizer?.start()
} catch (e: Exception) {
Log.e(TAG, "QCloud TTS error: ${e.message}", e)
}
}
TtsQueueItem.End -> {
callback?.onClearAsrQueue()
waitForPlaybackComplete(audioTrack)
callback?.onTtsCompleted()
ttsPlaying.set(false)
callback?.onSetSpeaking(false)
callback?.onEndTurn()
break
}
}
}
}
private fun waitForPlaybackComplete(audioTrack: AudioTrack) {
// 等待音频播放完成
Thread.sleep(1000)
}
}

View File

@@ -0,0 +1,181 @@
package com.digitalperson.tts
import android.content.Context
import android.util.Log
class TtsController(private val context: Context) {
companion object {
private const val TAG = "TtsController"
}
private var localTts: TtsManager? = null
private var qcloudTts: QCloudTtsManager? = null
private var useQCloudTts = false
interface TtsCallback {
fun onTtsStarted(text: String)
fun onTtsCompleted()
fun onTtsSegmentCompleted(durationMs: Long)
fun isTtsStopped(): Boolean
fun onClearAsrQueue()
fun onSetSpeaking(speaking: Boolean)
fun onEndTurn()
}
private var callback: TtsCallback? = null
fun setCallback(callback: TtsCallback) {
this.callback = callback
localTts?.setCallback(object : TtsManager.TtsCallback {
override fun onTtsStarted(text: String) {
callback.onTtsStarted(text)
}
override fun onTtsCompleted() {
callback.onTtsCompleted()
}
override fun onTtsSegmentCompleted(durationMs: Long) {
callback.onTtsSegmentCompleted(durationMs)
}
override fun isTtsStopped(): Boolean {
return callback.isTtsStopped()
}
override fun onClearAsrQueue() {
callback.onClearAsrQueue()
}
override fun onSetSpeaking(speaking: Boolean) {
callback.onSetSpeaking(speaking)
}
override fun getCurrentTrace() = null
override fun onTraceMarkTtsRequestEnqueued() {
}
override fun onTraceMarkTtsSynthesisStart() {
}
override fun onTraceMarkTtsFirstPcmReady() {
}
override fun onTraceMarkTtsFirstAudioPlay() {
}
override fun onTraceMarkTtsDone() {
}
override fun onTraceAddDuration(name: String, value: Long) {
}
override fun onEndTurn() {
callback.onEndTurn()
}
})
qcloudTts?.setCallback(object : QCloudTtsManager.TtsCallback {
override fun onTtsStarted(text: String) {
callback.onTtsStarted(text)
}
override fun onTtsCompleted() {
callback.onTtsCompleted()
}
override fun onTtsSegmentCompleted(durationMs: Long) {
callback.onTtsSegmentCompleted(durationMs)
}
override fun isTtsStopped(): Boolean {
return callback.isTtsStopped()
}
override fun onClearAsrQueue() {
callback.onClearAsrQueue()
}
override fun onSetSpeaking(speaking: Boolean) {
callback.onSetSpeaking(speaking)
}
override fun onEndTurn() {
callback.onEndTurn()
}
})
}
fun init(): Boolean {
// 初始化本地TTS
localTts = TtsManager(context)
val localInit = localTts?.initTtsAndAudioTrack() ?: false
Log.d(TAG, "Local TTS init: $localInit")
// 初始化腾讯云TTS
qcloudTts = QCloudTtsManager(context)
val qcloudInit = qcloudTts?.init() ?: false
Log.d(TAG, "QCloud TTS init: $qcloudInit")
return localInit || qcloudInit
}
fun setUseQCloudTts(useQCloud: Boolean) {
this.useQCloudTts = useQCloud
Log.d(TAG, "TTS mode changed: ${if (useQCloud) "QCloud" else "Local"}")
}
fun enqueueSegment(seg: String) {
if (useQCloudTts) {
qcloudTts?.enqueueSegment(seg)
} else {
localTts?.enqueueSegment(seg)
}
}
fun enqueueEnd() {
if (useQCloudTts) {
qcloudTts?.enqueueEnd()
} else {
localTts?.enqueueEnd()
}
}
fun isPlaying(): Boolean {
return if (useQCloudTts) {
qcloudTts?.isPlaying() ?: false
} else {
localTts?.isPlaying() ?: false
}
}
fun reset() {
if (useQCloudTts) {
qcloudTts?.reset()
} else {
localTts?.reset()
}
}
fun stop() {
if (useQCloudTts) {
qcloudTts?.stop()
} else {
localTts?.stop()
}
}
fun interruptForNewTurn(waitTimeoutMs: Long = 300): Boolean {
return if (useQCloudTts) {
qcloudTts?.interruptForNewTurn(waitTimeoutMs) ?: false
} else {
localTts?.interruptForNewTurn(waitTimeoutMs) ?: false
}
}
fun release() {
localTts?.release()
qcloudTts?.release()
}
}

View File

@@ -1,12 +1,14 @@
package com.digitalperson.ui
import android.app.Activity
import android.app.ProgressDialog
import android.opengl.GLSurfaceView
import android.text.method.ScrollingMovementMethod
import android.view.MotionEvent
import android.widget.Button
import android.widget.LinearLayout
import android.widget.ScrollView
import android.widget.Switch
import android.widget.TextView
import android.widget.Toast
import com.digitalperson.live2d.Live2DAvatarManager
@@ -18,7 +20,10 @@ class Live2DUiManager(private val activity: Activity) {
private var stopButton: Button? = null
private var recordButton: Button? = null
private var traditionalButtons: LinearLayout? = null
private var llmModeSwitch: Switch? = null
private var llmModeSwitchRow: LinearLayout? = null
private var avatarManager: Live2DAvatarManager? = null
private var downloadProgressDialog: ProgressDialog? = null
private var lastUiText: String = ""
@@ -29,6 +34,8 @@ class Live2DUiManager(private val activity: Activity) {
stopButtonId: Int = -1,
recordButtonId: Int = -1,
traditionalButtonsId: Int = -1,
llmModeSwitchId: Int = -1,
llmModeSwitchRowId: Int = -1,
silentPlayerViewId: Int,
speakingPlayerViewId: Int,
live2dViewId: Int
@@ -39,12 +46,17 @@ class Live2DUiManager(private val activity: Activity) {
if (stopButtonId != -1) stopButton = activity.findViewById(stopButtonId)
if (recordButtonId != -1) recordButton = activity.findViewById(recordButtonId)
if (traditionalButtonsId != -1) traditionalButtons = activity.findViewById(traditionalButtonsId)
if (llmModeSwitchId != -1) llmModeSwitch = activity.findViewById(llmModeSwitchId)
if (llmModeSwitchRowId != -1) llmModeSwitchRow = activity.findViewById(llmModeSwitchRowId)
textView?.movementMethod = ScrollingMovementMethod()
val glView = activity.findViewById<GLSurfaceView>(live2dViewId)
avatarManager = Live2DAvatarManager(glView)
avatarManager?.setSpeaking(false)
// 默认隐藏本地 LLM 开关
llmModeSwitchRow?.visibility = LinearLayout.GONE
}
fun setStartButtonListener(listener: () -> Unit) {
@@ -131,6 +143,72 @@ class Live2DUiManager(private val activity: Activity) {
}
}
/**
* 显示或隐藏本地 LLM 开关
*/
fun showLLMSwitch(show: Boolean) {
activity.runOnUiThread {
llmModeSwitchRow?.visibility = if (show) LinearLayout.VISIBLE else LinearLayout.GONE
}
}
/**
* 设置 LLM 模式开关的监听器
*/
fun setLLMSwitchListener(listener: (Boolean) -> Unit) {
llmModeSwitch?.setOnCheckedChangeListener { _, isChecked ->
listener(isChecked)
}
}
/**
* 设置 LLM 模式开关的状态
*/
fun setLLMSwitchChecked(checked: Boolean) {
activity.runOnUiThread {
llmModeSwitch?.isChecked = checked
}
}
/**
* 显示下载进度对话框
*/
fun showDownloadProgressDialog() {
activity.runOnUiThread {
downloadProgressDialog = ProgressDialog(activity).apply {
setTitle("下载模型")
setMessage("正在下载 LLM 模型文件,请稍候...")
setProgressStyle(ProgressDialog.STYLE_HORIZONTAL)
isIndeterminate = false
setCancelable(false)
setCanceledOnTouchOutside(false)
show()
}
}
}
/**
* 更新下载进度
*/
fun updateDownloadProgress(fileName: String, downloadedMB: Long, totalMB: Long, progress: Int) {
activity.runOnUiThread {
downloadProgressDialog?.apply {
setMessage("正在下载: $fileName\n$downloadedMB MB / $totalMB MB")
setProgress(progress)
}
}
}
/**
* 关闭下载进度对话框
*/
fun dismissDownloadProgressDialog() {
activity.runOnUiThread {
downloadProgressDialog?.dismiss()
downloadProgressDialog = null
}
}
fun onResume() {
avatarManager?.onResume()
}

View File

@@ -1,6 +1,8 @@
package com.digitalperson.util
import android.content.ContentUris
import android.content.Context
import android.provider.MediaStore
import android.util.Log
import com.digitalperson.config.AppConfig
import java.io.File
@@ -48,13 +50,270 @@ object FileHelper {
)
return copyAssetsToInternal(context, AppConfig.Asr.MODEL_DIR, outDir, files)
}
@JvmStatic
fun copyRetinaFaceAssets(context: Context): File {
val outDir = File(context.filesDir, AppConfig.Face.MODEL_DIR)
val files = arrayOf(AppConfig.Face.MODEL_NAME)
return copyAssetsToInternal(context, AppConfig.Face.MODEL_DIR, outDir, files)
}
@JvmStatic
fun copyInsightFaceAssets(context: Context): File {
val outDir = File(context.filesDir, AppConfig.FaceRecognition.MODEL_DIR)
val files = arrayOf(AppConfig.FaceRecognition.MODEL_NAME)
return copyAssetsToInternal(context, AppConfig.FaceRecognition.MODEL_DIR, outDir, files)
}
fun ensureDir(dir: File): File {
if (!dir.exists()) dir.mkdirs()
if (!dir.exists()) {
val created = dir.mkdirs()
if (!created) {
Log.e(TAG, "Failed to create directory: ${dir.absolutePath}")
// 如果创建失败,使用应用内部存储
return File("/data/data/${dir.parentFile?.parentFile?.name}/files/llm")
}
}
return dir
}
fun getAsrAudioDir(context: Context): File {
return ensureDir(File(context.filesDir, "asr_audio"))
}
// @JvmStatic
// 当前使用的模型文件名
private const val MODEL_FILE_NAME = "Qwen3-0.6B-rk3588-w8a8.rkllm"
fun getLLMModelPath(context: Context): String {
Log.d(TAG, "=== getLLMModelPath START ===")
// 从应用内部存储目录加载模型
val llmDir = ensureDir(File(context.filesDir, "llm"))
Log.d(TAG, "Loading models from: ${llmDir.absolutePath}")
// 检查文件是否存在
val rkllmFile = File(llmDir, MODEL_FILE_NAME)
if (!rkllmFile.exists()) {
Log.e(TAG, "RKLLM model not found: ${rkllmFile.absolutePath}")
} else {
Log.i(TAG, "RKLLM model exists, size: ${rkllmFile.length() / (1024*1024)} MB")
}
val modelPath = rkllmFile.absolutePath
Log.i(TAG, "Using RKLLM model path: $modelPath")
Log.d(TAG, "=== getLLMModelPath END ===")
return modelPath
}
/**
* 异步下载模型文件,带进度回调
* @param context 上下文
* @param onProgress 进度回调 (currentFile, downloadedBytes, totalBytes, progressPercent)
* @param onComplete 完成回调 (success, message)
*/
@JvmStatic
fun downloadModelFilesWithProgress(
context: Context,
onProgress: (String, Long, Long, Int) -> Unit,
onComplete: (Boolean, String) -> Unit
) {
Log.d(TAG, "=== downloadModelFilesWithProgress START ===")
val llmDir = ensureDir(File(context.filesDir, "llm"))
// 模型文件列表 - 使用 DeepSeek-R1-Distill-Qwen-1.5B 模型
val modelFiles = listOf(
MODEL_FILE_NAME
)
// 在后台线程下载
Thread {
try {
var allSuccess = true
var totalDownloaded: Long = 0
var totalSize: Long = 0
// 首先计算总大小
for (fileName in modelFiles) {
val modelFile = File(llmDir, fileName)
if (!modelFile.exists() || modelFile.length() == 0L) {
val size = getFileSizeFromServer("http://192.168.1.19:5000/download/$fileName")
if (size > 0) {
totalSize += size
} else {
// 如果无法获取文件大小,使用估计值
when (fileName) {
MODEL_FILE_NAME -> totalSize += 1L * 1024 * 1024 * 1024 // 1.5B模型约1GB
else -> totalSize += 1L * 1024 * 1024 * 1024 // 1GB 默认
}
Log.i(TAG, "Using estimated size for $fileName: ${totalSize / (1024*1024)} MB")
}
}
}
for (fileName in modelFiles) {
val modelFile = File(llmDir, fileName)
if (!modelFile.exists() || modelFile.length() == 0L) {
Log.i(TAG, "Downloading model file: $fileName")
try {
downloadFileWithProgress(
"http://192.168.1.19:5000/download/$fileName",
modelFile
) { downloaded, total ->
val progress = if (totalSize > 0) {
((totalDownloaded + downloaded) * 100 / totalSize).toInt()
} else 0
onProgress(fileName, downloaded, total, progress)
}
totalDownloaded += modelFile.length()
Log.i(TAG, "Downloaded model file: $fileName, size: ${modelFile.length() / (1024*1024)} MB")
} catch (e: Exception) {
Log.e(TAG, "Failed to download model file $fileName: ${e.message}")
allSuccess = false
}
} else {
totalDownloaded += modelFile.length()
Log.i(TAG, "Model file exists: $fileName, size: ${modelFile.length() / (1024*1024)} MB")
}
}
Log.d(TAG, "=== downloadModelFilesWithProgress END ===")
if (allSuccess) {
onComplete(true, "模型下载完成")
} else {
onComplete(false, "部分模型下载失败")
}
} catch (e: Exception) {
Log.e(TAG, "Download failed: ${e.message}")
onComplete(false, "下载失败: ${e.message}")
}
}.start()
}
/**
* 从服务器获取文件大小
*/
private fun getFileSizeFromServer(url: String): Long {
return try {
val connection = java.net.URL(url).openConnection() as java.net.HttpURLConnection
connection.requestMethod = "HEAD"
connection.connectTimeout = 15000
connection.readTimeout = 15000
// 从响应头获取 Content-Length避免 int 溢出
val contentLengthStr = connection.getHeaderField("Content-Length")
var size = 0L
if (contentLengthStr != null) {
try {
size = contentLengthStr.toLong()
if (size < 0) {
Log.w(TAG, "Invalid Content-Length value: $size")
size = 0
}
} catch (e: NumberFormatException) {
Log.w(TAG, "Invalid Content-Length format: $contentLengthStr")
size = 0
}
} else {
val contentLength = connection.contentLength
if (contentLength > 0) {
size = contentLength.toLong()
} else {
Log.w(TAG, "Content-Length not available or invalid: $contentLength")
size = 0
}
}
connection.disconnect()
Log.i(TAG, "File size for $url: $size bytes")
size
} catch (e: Exception) {
Log.w(TAG, "Failed to get file size: ${e.message}")
0
}
}
/**
* 从网络下载文件,带进度回调
*/
private fun downloadFileWithProgress(
url: String,
destination: File,
onProgress: (Long, Long) -> Unit
) {
val connection = java.net.URL(url).openConnection() as java.net.HttpURLConnection
connection.connectTimeout = 30000
connection.readTimeout = 6000000
// 从响应头获取 Content-Length避免 int 溢出
val contentLengthStr = connection.getHeaderField("Content-Length")
val totalSize = if (contentLengthStr != null) {
try {
contentLengthStr.toLong()
} catch (e: NumberFormatException) {
Log.w(TAG, "Invalid Content-Length format: $contentLengthStr")
0
}
} else {
connection.contentLength.toLong()
}
Log.i(TAG, "Downloading file $url, size: $totalSize bytes")
try {
connection.inputStream.use { input ->
FileOutputStream(destination).use { output ->
val buffer = ByteArray(8192)
var downloaded: Long = 0
var bytesRead: Int
while (input.read(buffer).also { bytesRead = it } != -1) {
output.write(buffer, 0, bytesRead)
downloaded += bytesRead
onProgress(downloaded, totalSize)
}
}
}
} finally {
connection.disconnect()
}
}
/**
* 检查本地 LLM 模型是否可用
*/
@JvmStatic
fun isLocalLLMAvailable(context: Context): Boolean {
val llmDir = File(context.filesDir, "llm")
val rkllmFile = File(llmDir, MODEL_FILE_NAME)
val rkllmExists = rkllmFile.exists() && rkllmFile.length() > 0
Log.i(TAG, "LLM model check: rkllm=$rkllmExists")
Log.i(TAG, "RKLLM file: ${rkllmFile.absolutePath}, size: ${if (rkllmFile.exists()) rkllmFile.length() / (1024*1024) else 0} MB")
return rkllmExists
}
/**
* 从网络下载文件
*/
private fun downloadFile(url: String, destination: File) {
val connection = java.net.URL(url).openConnection() as java.net.HttpURLConnection
connection.connectTimeout = 30000 // 30秒超时
connection.readTimeout = 60000 // 60秒读取超时
try {
connection.inputStream.use { input ->
FileOutputStream(destination).use { output ->
input.copyTo(output)
}
}
} finally {
connection.disconnect()
}
}
}