live2d model
This commit is contained in:
418
app/src/main/java/com/digitalperson/Live2DChatActivity.kt
Normal file
418
app/src/main/java/com/digitalperson/Live2DChatActivity.kt
Normal file
@@ -0,0 +1,418 @@
|
||||
package com.digitalperson
|
||||
|
||||
import android.Manifest
|
||||
import android.content.pm.PackageManager
|
||||
import android.os.Bundle
|
||||
import android.util.Log
|
||||
import android.widget.Toast
|
||||
import androidx.appcompat.app.AppCompatActivity
|
||||
import androidx.core.app.ActivityCompat
|
||||
import com.digitalperson.cloud.CloudApiManager
|
||||
import com.digitalperson.audio.AudioProcessor
|
||||
import com.digitalperson.vad.VadManager
|
||||
import com.digitalperson.asr.AsrManager
|
||||
import com.digitalperson.tts.TtsManager
|
||||
import com.digitalperson.ui.Live2DUiManager
|
||||
import com.digitalperson.config.AppConfig
|
||||
import com.digitalperson.metrics.TraceManager
|
||||
import com.digitalperson.metrics.TraceSession
|
||||
import kotlinx.coroutines.CoroutineScope
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.Job
|
||||
import kotlinx.coroutines.SupervisorJob
|
||||
import kotlinx.coroutines.cancel
|
||||
import kotlinx.coroutines.isActive
|
||||
import kotlinx.coroutines.launch
|
||||
import kotlinx.coroutines.withContext
|
||||
|
||||
class Live2DChatActivity : AppCompatActivity() {
|
||||
|
||||
private lateinit var uiManager: Live2DUiManager
|
||||
private lateinit var vadManager: VadManager
|
||||
private lateinit var asrManager: AsrManager
|
||||
private lateinit var ttsManager: TtsManager
|
||||
private lateinit var audioProcessor: AudioProcessor
|
||||
|
||||
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
|
||||
|
||||
@Volatile
|
||||
private var isRecording: Boolean = false
|
||||
|
||||
private val ioScope = CoroutineScope(SupervisorJob() + Dispatchers.IO)
|
||||
private var recordingJob: Job? = null
|
||||
private val nativeLock = Any()
|
||||
|
||||
private lateinit var cloudApiManager: CloudApiManager
|
||||
private val segmenter = StreamingTextSegmenter(
|
||||
maxLen = AppConfig.Tts.MAX_LEN,
|
||||
maxWaitMs = AppConfig.Tts.MAX_WAIT_MS
|
||||
)
|
||||
|
||||
private var currentTrace: TraceSession? = null
|
||||
@Volatile private var llmInFlight: Boolean = false
|
||||
private var enableStreaming = false
|
||||
|
||||
override fun onRequestPermissionsResult(
|
||||
requestCode: Int,
|
||||
permissions: Array<String>,
|
||||
grantResults: IntArray
|
||||
) {
|
||||
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
|
||||
val ok = requestCode == AppConfig.REQUEST_RECORD_AUDIO_PERMISSION &&
|
||||
grantResults.isNotEmpty() &&
|
||||
grantResults[0] == PackageManager.PERMISSION_GRANTED
|
||||
if (!ok) {
|
||||
Log.e(AppConfig.TAG, "Audio record is disallowed")
|
||||
finish()
|
||||
}
|
||||
}
|
||||
|
||||
override fun onCreate(savedInstanceState: Bundle?) {
|
||||
super.onCreate(savedInstanceState)
|
||||
setContentView(R.layout.activity_live2d_chat)
|
||||
|
||||
uiManager = Live2DUiManager(this)
|
||||
uiManager.initViews(
|
||||
textViewId = R.id.my_text,
|
||||
scrollViewId = R.id.scroll_view,
|
||||
startButtonId = R.id.start_button,
|
||||
stopButtonId = R.id.stop_button,
|
||||
silentPlayerViewId = 0,
|
||||
speakingPlayerViewId = 0,
|
||||
live2dViewId = R.id.live2d_view
|
||||
)
|
||||
|
||||
uiManager.setStartButtonListener { onStartClicked() }
|
||||
uiManager.setStopButtonListener { onStopClicked(userInitiated = true) }
|
||||
|
||||
ActivityCompat.requestPermissions(this, permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)
|
||||
|
||||
try {
|
||||
val streamingSwitch = findViewById<android.widget.Switch>(R.id.streaming_switch)
|
||||
streamingSwitch.isChecked = enableStreaming
|
||||
streamingSwitch.setOnCheckedChangeListener { _, isChecked ->
|
||||
enableStreaming = isChecked
|
||||
cloudApiManager.setEnableStreaming(isChecked)
|
||||
uiManager.showToast("流式输出已${if (isChecked) "启用" else "禁用"}")
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
Log.w(AppConfig.TAG, "Streaming switch not found in layout: ${e.message}")
|
||||
}
|
||||
|
||||
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
|
||||
uiManager.setText("初始化中…")
|
||||
|
||||
audioProcessor = AudioProcessor(this)
|
||||
|
||||
asrManager = AsrManager(this)
|
||||
asrManager.setAudioProcessor(audioProcessor)
|
||||
asrManager.setCallback(createAsrCallback())
|
||||
|
||||
vadManager = VadManager(this)
|
||||
vadManager.setCallback(createVadCallback())
|
||||
|
||||
ioScope.launch {
|
||||
try {
|
||||
Log.i(AppConfig.TAG, "Init VAD + SenseVoice(RKNN) + TTS (background)")
|
||||
synchronized(nativeLock) {
|
||||
vadManager.initVadModel()
|
||||
asrManager.initSenseVoiceModel()
|
||||
}
|
||||
val ttsOk = ttsManager.initTtsAndAudioTrack()
|
||||
withContext(Dispatchers.Main) {
|
||||
if (!ttsOk) {
|
||||
uiManager.showToast(
|
||||
"TTS 初始化失败:请确认 assets/${AppConfig.Tts.MODEL_DIR}/ 下有 model.onnx、tokens.txt、lexicon.txt 以及 phone/date/number/new_heteronym.fst",
|
||||
Toast.LENGTH_LONG
|
||||
)
|
||||
}
|
||||
uiManager.setText(getString(R.string.hint))
|
||||
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
|
||||
}
|
||||
} catch (t: Throwable) {
|
||||
Log.e(AppConfig.TAG, "Initialization failed: ${t.message}", t)
|
||||
withContext(Dispatchers.Main) {
|
||||
uiManager.setText("初始化失败:${t.javaClass.simpleName}: ${t.message}")
|
||||
uiManager.showToast("初始化失败(请看 Logcat): ${t.javaClass.simpleName}", Toast.LENGTH_LONG)
|
||||
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cloudApiManager = CloudApiManager(createCloudApiListener(), applicationContext)
|
||||
cloudApiManager.setEnableStreaming(enableStreaming)
|
||||
|
||||
ttsManager = TtsManager(this)
|
||||
ttsManager.setCallback(createTtsCallback())
|
||||
|
||||
Log.d(AppConfig.TAG, "Pre-starting ASR worker")
|
||||
ioScope.launch {
|
||||
asrManager.runAsrWorker()
|
||||
}
|
||||
}
|
||||
|
||||
private fun createAsrCallback() = object : AsrManager.AsrCallback {
|
||||
override fun onAsrStarted() {
|
||||
currentTrace?.markASRStart()
|
||||
runOnUiThread {
|
||||
uiManager.appendToUi("\n[ASR] 开始识别...\n")
|
||||
}
|
||||
}
|
||||
|
||||
override fun onAsrResult(text: String) {
|
||||
currentTrace?.markASREnd()
|
||||
runOnUiThread {
|
||||
uiManager.appendToUi("\n\n[ASR] ${text}\n")
|
||||
}
|
||||
currentTrace?.markRecordingDone()
|
||||
currentTrace?.markLlmResponseReceived()
|
||||
}
|
||||
|
||||
override fun onAsrSkipped(reason: String) {
|
||||
Log.d(AppConfig.TAG, "ASR segment skipped: $reason")
|
||||
}
|
||||
|
||||
override fun shouldSkipAsr(): Boolean = ttsManager.isPlaying()
|
||||
|
||||
override fun isLlmInFlight(): Boolean = llmInFlight
|
||||
|
||||
override fun onLlmCalled(text: String) {
|
||||
llmInFlight = true
|
||||
Log.d(AppConfig.TAG, "Calling LLM with text: $text")
|
||||
cloudApiManager.callLLM(text)
|
||||
}
|
||||
}
|
||||
|
||||
private fun createVadCallback() = object : VadManager.VadCallback {
|
||||
override fun onSpeechSegmentReady(originalAudio: FloatArray, processedAudio: FloatArray) {
|
||||
Log.d(AppConfig.TAG, "Sending audio segment to ASR queue, size: ${processedAudio.size}")
|
||||
asrManager.enqueueAudioSegment(originalAudio, processedAudio)
|
||||
}
|
||||
|
||||
override fun shouldSkipProcessing(): Boolean = ttsManager.isPlaying() || llmInFlight
|
||||
}
|
||||
|
||||
private fun createCloudApiListener() = object : CloudApiManager.CloudApiListener {
|
||||
private var llmFirstChunkMarked = false
|
||||
|
||||
override fun onLLMResponseReceived(response: String) {
|
||||
currentTrace?.markLlmDone()
|
||||
llmInFlight = false
|
||||
|
||||
if (enableStreaming) {
|
||||
for (seg in segmenter.flush()) {
|
||||
ttsManager.enqueueSegment(seg)
|
||||
}
|
||||
ttsManager.enqueueEnd()
|
||||
} else {
|
||||
runOnUiThread {
|
||||
uiManager.appendToUi("${response}\n")
|
||||
}
|
||||
ttsManager.enqueueSegment(response)
|
||||
ttsManager.enqueueEnd()
|
||||
}
|
||||
}
|
||||
|
||||
override fun onLLMStreamingChunkReceived(chunk: String) {
|
||||
if (enableStreaming) {
|
||||
if (!llmFirstChunkMarked) {
|
||||
llmFirstChunkMarked = true
|
||||
currentTrace?.markLlmFirstChunk()
|
||||
}
|
||||
uiManager.appendToUi(chunk)
|
||||
|
||||
val segments = segmenter.processChunk(chunk)
|
||||
for (seg in segments) {
|
||||
ttsManager.enqueueSegment(seg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
override fun onTTSAudioReceived(audioFilePath: String) {}
|
||||
|
||||
override fun onError(errorMessage: String) {
|
||||
llmInFlight = false
|
||||
uiManager.showToast(errorMessage, Toast.LENGTH_LONG)
|
||||
onStopClicked(userInitiated = false)
|
||||
}
|
||||
}
|
||||
|
||||
private fun createTtsCallback() = object : TtsManager.TtsCallback {
|
||||
override fun onTtsStarted(text: String) {
|
||||
runOnUiThread {
|
||||
uiManager.appendToUi("\n[TTS] 开始合成...\n")
|
||||
}
|
||||
}
|
||||
|
||||
override fun onTtsCompleted() {
|
||||
runOnUiThread {
|
||||
uiManager.appendToUi("\n[LOG] TTS completed at: ${System.currentTimeMillis()}\n")
|
||||
}
|
||||
}
|
||||
|
||||
override fun onTtsSegmentCompleted(durationMs: Long) {}
|
||||
|
||||
override fun isTtsStopped(): Boolean = !isRecording
|
||||
|
||||
override fun onClearAsrQueue() {
|
||||
asrManager.clearQueue()
|
||||
}
|
||||
|
||||
override fun onSetSpeaking(speaking: Boolean) {
|
||||
uiManager.setSpeaking(speaking)
|
||||
}
|
||||
|
||||
override fun getCurrentTrace(): TraceSession? = currentTrace
|
||||
|
||||
override fun onTraceMarkTtsRequestEnqueued() {
|
||||
currentTrace?.markTtsRequestEnqueued()
|
||||
}
|
||||
|
||||
override fun onTraceMarkTtsSynthesisStart() {
|
||||
currentTrace?.markTtsSynthesisStart()
|
||||
}
|
||||
|
||||
override fun onTraceMarkTtsFirstPcmReady() {
|
||||
currentTrace?.markTtsFirstPcmReady()
|
||||
}
|
||||
|
||||
override fun onTraceMarkTtsFirstAudioPlay() {
|
||||
currentTrace?.markTtsFirstAudioPlay()
|
||||
}
|
||||
|
||||
override fun onTraceMarkTtsDone() {
|
||||
currentTrace?.markTtsDone()
|
||||
}
|
||||
|
||||
override fun onTraceAddDuration(name: String, value: Long) {
|
||||
currentTrace?.addDuration(name, value)
|
||||
}
|
||||
|
||||
override fun onEndTurn() {
|
||||
TraceManager.getInstance().endTurn()
|
||||
currentTrace = null
|
||||
}
|
||||
}
|
||||
|
||||
override fun onDestroy() {
|
||||
super.onDestroy()
|
||||
onStopClicked(userInitiated = false)
|
||||
ioScope.cancel()
|
||||
synchronized(nativeLock) {
|
||||
try { vadManager.release() } catch (_: Throwable) {}
|
||||
try { asrManager.release() } catch (_: Throwable) {}
|
||||
}
|
||||
try { ttsManager.release() } catch (_: Throwable) {}
|
||||
try { uiManager.release() } catch (_: Throwable) {}
|
||||
try { audioProcessor.release() } catch (_: Throwable) {}
|
||||
}
|
||||
|
||||
override fun onResume() {
|
||||
super.onResume()
|
||||
uiManager.onResume()
|
||||
}
|
||||
|
||||
override fun onPause() {
|
||||
uiManager.onPause()
|
||||
super.onPause()
|
||||
}
|
||||
|
||||
private fun onStartClicked() {
|
||||
Log.d(AppConfig.TAG, "onStartClicked called")
|
||||
if (isRecording) {
|
||||
Log.d(AppConfig.TAG, "Already recording, returning")
|
||||
return
|
||||
}
|
||||
|
||||
if (!audioProcessor.initMicrophone(permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) {
|
||||
uiManager.showToast("麦克风初始化失败/无权限")
|
||||
return
|
||||
}
|
||||
|
||||
currentTrace = TraceManager.getInstance().startNewTurn()
|
||||
currentTrace?.mark("turn_start")
|
||||
llmInFlight = false
|
||||
|
||||
uiManager.clearText()
|
||||
|
||||
ttsManager.reset()
|
||||
ttsManager.setCurrentTrace(currentTrace)
|
||||
segmenter.reset()
|
||||
|
||||
vadManager.reset()
|
||||
audioProcessor.startRecording()
|
||||
isRecording = true
|
||||
|
||||
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = true)
|
||||
|
||||
Log.d(AppConfig.TAG, "Starting processSamplesLoop coroutine")
|
||||
recordingJob?.cancel()
|
||||
recordingJob = ioScope.launch {
|
||||
processSamplesLoop()
|
||||
}
|
||||
Log.d(AppConfig.TAG, "onStartClicked completed")
|
||||
}
|
||||
|
||||
private fun onStopClicked(userInitiated: Boolean) {
|
||||
isRecording = false
|
||||
audioProcessor.stopRecording()
|
||||
|
||||
recordingJob?.cancel()
|
||||
recordingJob = null
|
||||
|
||||
ttsManager.stop()
|
||||
|
||||
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
|
||||
|
||||
if (userInitiated) {
|
||||
TraceManager.getInstance().endTurn()
|
||||
currentTrace = null
|
||||
}
|
||||
}
|
||||
|
||||
private suspend fun processSamplesLoop() {
|
||||
Log.d(AppConfig.TAG, "processSamplesLoop started")
|
||||
val windowSize = AppConfig.WINDOW_SIZE
|
||||
val buffer = ShortArray(windowSize)
|
||||
var loopCount = 0
|
||||
|
||||
while (isRecording && ioScope.coroutineContext.isActive) {
|
||||
loopCount++
|
||||
if (loopCount % 100 == 0) {
|
||||
Log.d(AppConfig.TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsManager.isPlaying()}")
|
||||
}
|
||||
|
||||
if (ttsManager.isPlaying()) {
|
||||
if (vadManager.isInSpeech()) {
|
||||
Log.d(AppConfig.TAG, "TTS playing, resetting VAD state")
|
||||
vadManager.clearState()
|
||||
}
|
||||
val ret = audioProcessor.readAudio(buffer)
|
||||
if (ret <= 0) continue
|
||||
continue
|
||||
}
|
||||
|
||||
val ret = audioProcessor.readAudio(buffer)
|
||||
if (ret <= 0) continue
|
||||
if (ret != windowSize) continue
|
||||
|
||||
val chunk = audioProcessor.convertShortToFloat(buffer)
|
||||
val processedChunk = audioProcessor.applyGain(chunk)
|
||||
|
||||
val result = vadManager.processAudioChunk(chunk, processedChunk)
|
||||
|
||||
if (vadManager.vadComputeCount % 100 == 0) {
|
||||
Log.d(AppConfig.TAG, "VAD result: $result, inSpeech=${vadManager.isInSpeech()}")
|
||||
}
|
||||
|
||||
if (loopCount % 1000 == 0) {
|
||||
Log.d(AppConfig.TAG, "VAD status: inSpeech=${vadManager.isInSpeech()}, speechLen=${vadManager.getSpeechLength()}")
|
||||
}
|
||||
|
||||
val forced = segmenter.maybeForceByTime()
|
||||
for (seg in forced) ttsManager.enqueueSegment(seg)
|
||||
}
|
||||
|
||||
vadManager.forceFinalize()
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user