live2d model

This commit is contained in:
gcw_4spBpAfv
2026-03-02 09:25:50 +08:00
parent d63d4b03cf
commit 2f6166ab6c
179 changed files with 100625 additions and 2018 deletions

View File

@@ -0,0 +1,418 @@
package com.digitalperson
import android.Manifest
import android.content.pm.PackageManager
import android.os.Bundle
import android.util.Log
import android.widget.Toast
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import com.digitalperson.cloud.CloudApiManager
import com.digitalperson.audio.AudioProcessor
import com.digitalperson.vad.VadManager
import com.digitalperson.asr.AsrManager
import com.digitalperson.tts.TtsManager
import com.digitalperson.ui.Live2DUiManager
import com.digitalperson.config.AppConfig
import com.digitalperson.metrics.TraceManager
import com.digitalperson.metrics.TraceSession
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.Job
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.cancel
import kotlinx.coroutines.isActive
import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
class Live2DChatActivity : AppCompatActivity() {
private lateinit var uiManager: Live2DUiManager
private lateinit var vadManager: VadManager
private lateinit var asrManager: AsrManager
private lateinit var ttsManager: TtsManager
private lateinit var audioProcessor: AudioProcessor
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
@Volatile
private var isRecording: Boolean = false
private val ioScope = CoroutineScope(SupervisorJob() + Dispatchers.IO)
private var recordingJob: Job? = null
private val nativeLock = Any()
private lateinit var cloudApiManager: CloudApiManager
private val segmenter = StreamingTextSegmenter(
maxLen = AppConfig.Tts.MAX_LEN,
maxWaitMs = AppConfig.Tts.MAX_WAIT_MS
)
private var currentTrace: TraceSession? = null
@Volatile private var llmInFlight: Boolean = false
private var enableStreaming = false
override fun onRequestPermissionsResult(
requestCode: Int,
permissions: Array<String>,
grantResults: IntArray
) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
val ok = requestCode == AppConfig.REQUEST_RECORD_AUDIO_PERMISSION &&
grantResults.isNotEmpty() &&
grantResults[0] == PackageManager.PERMISSION_GRANTED
if (!ok) {
Log.e(AppConfig.TAG, "Audio record is disallowed")
finish()
}
}
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContentView(R.layout.activity_live2d_chat)
uiManager = Live2DUiManager(this)
uiManager.initViews(
textViewId = R.id.my_text,
scrollViewId = R.id.scroll_view,
startButtonId = R.id.start_button,
stopButtonId = R.id.stop_button,
silentPlayerViewId = 0,
speakingPlayerViewId = 0,
live2dViewId = R.id.live2d_view
)
uiManager.setStartButtonListener { onStartClicked() }
uiManager.setStopButtonListener { onStopClicked(userInitiated = true) }
ActivityCompat.requestPermissions(this, permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)
try {
val streamingSwitch = findViewById<android.widget.Switch>(R.id.streaming_switch)
streamingSwitch.isChecked = enableStreaming
streamingSwitch.setOnCheckedChangeListener { _, isChecked ->
enableStreaming = isChecked
cloudApiManager.setEnableStreaming(isChecked)
uiManager.showToast("流式输出已${if (isChecked) "启用" else "禁用"}")
}
} catch (e: Exception) {
Log.w(AppConfig.TAG, "Streaming switch not found in layout: ${e.message}")
}
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
uiManager.setText("初始化中…")
audioProcessor = AudioProcessor(this)
asrManager = AsrManager(this)
asrManager.setAudioProcessor(audioProcessor)
asrManager.setCallback(createAsrCallback())
vadManager = VadManager(this)
vadManager.setCallback(createVadCallback())
ioScope.launch {
try {
Log.i(AppConfig.TAG, "Init VAD + SenseVoice(RKNN) + TTS (background)")
synchronized(nativeLock) {
vadManager.initVadModel()
asrManager.initSenseVoiceModel()
}
val ttsOk = ttsManager.initTtsAndAudioTrack()
withContext(Dispatchers.Main) {
if (!ttsOk) {
uiManager.showToast(
"TTS 初始化失败:请确认 assets/${AppConfig.Tts.MODEL_DIR}/ 下有 model.onnx、tokens.txt、lexicon.txt 以及 phone/date/number/new_heteronym.fst",
Toast.LENGTH_LONG
)
}
uiManager.setText(getString(R.string.hint))
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
}
} catch (t: Throwable) {
Log.e(AppConfig.TAG, "Initialization failed: ${t.message}", t)
withContext(Dispatchers.Main) {
uiManager.setText("初始化失败:${t.javaClass.simpleName}: ${t.message}")
uiManager.showToast("初始化失败(请看 Logcat: ${t.javaClass.simpleName}", Toast.LENGTH_LONG)
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
}
}
}
cloudApiManager = CloudApiManager(createCloudApiListener(), applicationContext)
cloudApiManager.setEnableStreaming(enableStreaming)
ttsManager = TtsManager(this)
ttsManager.setCallback(createTtsCallback())
Log.d(AppConfig.TAG, "Pre-starting ASR worker")
ioScope.launch {
asrManager.runAsrWorker()
}
}
private fun createAsrCallback() = object : AsrManager.AsrCallback {
override fun onAsrStarted() {
currentTrace?.markASRStart()
runOnUiThread {
uiManager.appendToUi("\n[ASR] 开始识别...\n")
}
}
override fun onAsrResult(text: String) {
currentTrace?.markASREnd()
runOnUiThread {
uiManager.appendToUi("\n\n[ASR] ${text}\n")
}
currentTrace?.markRecordingDone()
currentTrace?.markLlmResponseReceived()
}
override fun onAsrSkipped(reason: String) {
Log.d(AppConfig.TAG, "ASR segment skipped: $reason")
}
override fun shouldSkipAsr(): Boolean = ttsManager.isPlaying()
override fun isLlmInFlight(): Boolean = llmInFlight
override fun onLlmCalled(text: String) {
llmInFlight = true
Log.d(AppConfig.TAG, "Calling LLM with text: $text")
cloudApiManager.callLLM(text)
}
}
private fun createVadCallback() = object : VadManager.VadCallback {
override fun onSpeechSegmentReady(originalAudio: FloatArray, processedAudio: FloatArray) {
Log.d(AppConfig.TAG, "Sending audio segment to ASR queue, size: ${processedAudio.size}")
asrManager.enqueueAudioSegment(originalAudio, processedAudio)
}
override fun shouldSkipProcessing(): Boolean = ttsManager.isPlaying() || llmInFlight
}
private fun createCloudApiListener() = object : CloudApiManager.CloudApiListener {
private var llmFirstChunkMarked = false
override fun onLLMResponseReceived(response: String) {
currentTrace?.markLlmDone()
llmInFlight = false
if (enableStreaming) {
for (seg in segmenter.flush()) {
ttsManager.enqueueSegment(seg)
}
ttsManager.enqueueEnd()
} else {
runOnUiThread {
uiManager.appendToUi("${response}\n")
}
ttsManager.enqueueSegment(response)
ttsManager.enqueueEnd()
}
}
override fun onLLMStreamingChunkReceived(chunk: String) {
if (enableStreaming) {
if (!llmFirstChunkMarked) {
llmFirstChunkMarked = true
currentTrace?.markLlmFirstChunk()
}
uiManager.appendToUi(chunk)
val segments = segmenter.processChunk(chunk)
for (seg in segments) {
ttsManager.enqueueSegment(seg)
}
}
}
override fun onTTSAudioReceived(audioFilePath: String) {}
override fun onError(errorMessage: String) {
llmInFlight = false
uiManager.showToast(errorMessage, Toast.LENGTH_LONG)
onStopClicked(userInitiated = false)
}
}
private fun createTtsCallback() = object : TtsManager.TtsCallback {
override fun onTtsStarted(text: String) {
runOnUiThread {
uiManager.appendToUi("\n[TTS] 开始合成...\n")
}
}
override fun onTtsCompleted() {
runOnUiThread {
uiManager.appendToUi("\n[LOG] TTS completed at: ${System.currentTimeMillis()}\n")
}
}
override fun onTtsSegmentCompleted(durationMs: Long) {}
override fun isTtsStopped(): Boolean = !isRecording
override fun onClearAsrQueue() {
asrManager.clearQueue()
}
override fun onSetSpeaking(speaking: Boolean) {
uiManager.setSpeaking(speaking)
}
override fun getCurrentTrace(): TraceSession? = currentTrace
override fun onTraceMarkTtsRequestEnqueued() {
currentTrace?.markTtsRequestEnqueued()
}
override fun onTraceMarkTtsSynthesisStart() {
currentTrace?.markTtsSynthesisStart()
}
override fun onTraceMarkTtsFirstPcmReady() {
currentTrace?.markTtsFirstPcmReady()
}
override fun onTraceMarkTtsFirstAudioPlay() {
currentTrace?.markTtsFirstAudioPlay()
}
override fun onTraceMarkTtsDone() {
currentTrace?.markTtsDone()
}
override fun onTraceAddDuration(name: String, value: Long) {
currentTrace?.addDuration(name, value)
}
override fun onEndTurn() {
TraceManager.getInstance().endTurn()
currentTrace = null
}
}
override fun onDestroy() {
super.onDestroy()
onStopClicked(userInitiated = false)
ioScope.cancel()
synchronized(nativeLock) {
try { vadManager.release() } catch (_: Throwable) {}
try { asrManager.release() } catch (_: Throwable) {}
}
try { ttsManager.release() } catch (_: Throwable) {}
try { uiManager.release() } catch (_: Throwable) {}
try { audioProcessor.release() } catch (_: Throwable) {}
}
override fun onResume() {
super.onResume()
uiManager.onResume()
}
override fun onPause() {
uiManager.onPause()
super.onPause()
}
private fun onStartClicked() {
Log.d(AppConfig.TAG, "onStartClicked called")
if (isRecording) {
Log.d(AppConfig.TAG, "Already recording, returning")
return
}
if (!audioProcessor.initMicrophone(permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) {
uiManager.showToast("麦克风初始化失败/无权限")
return
}
currentTrace = TraceManager.getInstance().startNewTurn()
currentTrace?.mark("turn_start")
llmInFlight = false
uiManager.clearText()
ttsManager.reset()
ttsManager.setCurrentTrace(currentTrace)
segmenter.reset()
vadManager.reset()
audioProcessor.startRecording()
isRecording = true
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = true)
Log.d(AppConfig.TAG, "Starting processSamplesLoop coroutine")
recordingJob?.cancel()
recordingJob = ioScope.launch {
processSamplesLoop()
}
Log.d(AppConfig.TAG, "onStartClicked completed")
}
private fun onStopClicked(userInitiated: Boolean) {
isRecording = false
audioProcessor.stopRecording()
recordingJob?.cancel()
recordingJob = null
ttsManager.stop()
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
if (userInitiated) {
TraceManager.getInstance().endTurn()
currentTrace = null
}
}
private suspend fun processSamplesLoop() {
Log.d(AppConfig.TAG, "processSamplesLoop started")
val windowSize = AppConfig.WINDOW_SIZE
val buffer = ShortArray(windowSize)
var loopCount = 0
while (isRecording && ioScope.coroutineContext.isActive) {
loopCount++
if (loopCount % 100 == 0) {
Log.d(AppConfig.TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsManager.isPlaying()}")
}
if (ttsManager.isPlaying()) {
if (vadManager.isInSpeech()) {
Log.d(AppConfig.TAG, "TTS playing, resetting VAD state")
vadManager.clearState()
}
val ret = audioProcessor.readAudio(buffer)
if (ret <= 0) continue
continue
}
val ret = audioProcessor.readAudio(buffer)
if (ret <= 0) continue
if (ret != windowSize) continue
val chunk = audioProcessor.convertShortToFloat(buffer)
val processedChunk = audioProcessor.applyGain(chunk)
val result = vadManager.processAudioChunk(chunk, processedChunk)
if (vadManager.vadComputeCount % 100 == 0) {
Log.d(AppConfig.TAG, "VAD result: $result, inSpeech=${vadManager.isInSpeech()}")
}
if (loopCount % 1000 == 0) {
Log.d(AppConfig.TAG, "VAD status: inSpeech=${vadManager.isInSpeech()}, speechLen=${vadManager.getSpeechLength()}")
}
val forced = segmenter.maybeForceByTime()
for (seg in forced) ttsManager.enqueueSegment(seg)
}
vadManager.forceFinalize()
}
}