push2talk
This commit is contained in:
@@ -37,6 +37,9 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
|
||||
@Volatile
|
||||
private var isRecording: Boolean = false
|
||||
|
||||
private val holdToSpeakAudioBuffer = mutableListOf<Float>()
|
||||
private val HOLD_TO_SPEAK_MIN_SAMPLES = 16000 // 1秒的音频数据
|
||||
|
||||
private val ioScope = CoroutineScope(SupervisorJob() + Dispatchers.IO)
|
||||
private var recordingJob: Job? = null
|
||||
@@ -77,13 +80,30 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
scrollViewId = R.id.scroll_view,
|
||||
startButtonId = R.id.start_button,
|
||||
stopButtonId = R.id.stop_button,
|
||||
recordButtonId = R.id.record_button,
|
||||
traditionalButtonsId = R.id.traditional_buttons,
|
||||
silentPlayerViewId = 0,
|
||||
speakingPlayerViewId = 0,
|
||||
live2dViewId = R.id.live2d_view
|
||||
)
|
||||
|
||||
uiManager.setStartButtonListener { onStartClicked() }
|
||||
uiManager.setStopButtonListener { onStopClicked(userInitiated = true) }
|
||||
// 根据配置选择交互方式
|
||||
uiManager.setUseHoldToSpeak(AppConfig.USE_HOLD_TO_SPEAK)
|
||||
|
||||
if (AppConfig.USE_HOLD_TO_SPEAK) {
|
||||
uiManager.setRecordButtonTouchListener { isDown ->
|
||||
if (isDown) {
|
||||
// 按住按钮,开始录音
|
||||
onRecordButtonDown()
|
||||
} else {
|
||||
// 松开按钮,停止录音
|
||||
onRecordButtonUp()
|
||||
}
|
||||
}
|
||||
} else {
|
||||
uiManager.setStartButtonListener { onStartClicked() }
|
||||
uiManager.setStopButtonListener { onStopClicked(userInitiated = true) }
|
||||
}
|
||||
|
||||
ActivityCompat.requestPermissions(this, permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)
|
||||
|
||||
@@ -99,10 +119,16 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
Log.w(AppConfig.TAG, "Streaming switch not found in layout: ${e.message}")
|
||||
}
|
||||
|
||||
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
|
||||
if (AppConfig.USE_HOLD_TO_SPEAK) {
|
||||
uiManager.setButtonsEnabled(recordEnabled = false)
|
||||
} else {
|
||||
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
|
||||
}
|
||||
uiManager.setText("初始化中…")
|
||||
|
||||
audioProcessor = AudioProcessor(this)
|
||||
ttsManager = TtsManager(this)
|
||||
ttsManager.setCallback(createTtsCallback())
|
||||
|
||||
asrManager = AsrManager(this)
|
||||
asrManager.setAudioProcessor(audioProcessor)
|
||||
@@ -127,23 +153,28 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
)
|
||||
}
|
||||
uiManager.setText(getString(R.string.hint))
|
||||
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
|
||||
if (AppConfig.USE_HOLD_TO_SPEAK) {
|
||||
uiManager.setButtonsEnabled(recordEnabled = true)
|
||||
} else {
|
||||
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
|
||||
}
|
||||
}
|
||||
} catch (t: Throwable) {
|
||||
Log.e(AppConfig.TAG, "Initialization failed: ${t.message}", t)
|
||||
withContext(Dispatchers.Main) {
|
||||
uiManager.setText("初始化失败:${t.javaClass.simpleName}: ${t.message}")
|
||||
uiManager.showToast("初始化失败(请看 Logcat): ${t.javaClass.simpleName}", Toast.LENGTH_LONG)
|
||||
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
|
||||
if (AppConfig.USE_HOLD_TO_SPEAK) {
|
||||
uiManager.setButtonsEnabled(recordEnabled = false)
|
||||
} else {
|
||||
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cloudApiManager = CloudApiManager(createCloudApiListener(), applicationContext)
|
||||
cloudApiManager.setEnableStreaming(enableStreaming)
|
||||
|
||||
ttsManager = TtsManager(this)
|
||||
ttsManager.setCallback(createTtsCallback())
|
||||
|
||||
Log.d(AppConfig.TAG, "Pre-starting ASR worker")
|
||||
ioScope.launch {
|
||||
@@ -205,10 +236,18 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
}
|
||||
ttsManager.enqueueEnd()
|
||||
} else {
|
||||
runOnUiThread {
|
||||
uiManager.appendToUi("${response}\n")
|
||||
val previousMood = com.digitalperson.mood.MoodManager.getCurrentMood()
|
||||
val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(response)
|
||||
android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Final mood: $mood, filtered text: $filteredText")
|
||||
|
||||
if (mood != previousMood) {
|
||||
uiManager.setMood(mood)
|
||||
}
|
||||
ttsManager.enqueueSegment(response)
|
||||
|
||||
runOnUiThread {
|
||||
uiManager.appendToUi("${filteredText}\n")
|
||||
}
|
||||
ttsManager.enqueueSegment(filteredText)
|
||||
ttsManager.enqueueEnd()
|
||||
}
|
||||
}
|
||||
@@ -219,9 +258,18 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
llmFirstChunkMarked = true
|
||||
currentTrace?.markLlmFirstChunk()
|
||||
}
|
||||
uiManager.appendToUi(chunk)
|
||||
|
||||
val segments = segmenter.processChunk(chunk)
|
||||
val previousMood = com.digitalperson.mood.MoodManager.getCurrentMood()
|
||||
val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(chunk)
|
||||
if (mood != previousMood) {
|
||||
android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Mood changed to: $mood")
|
||||
// 设置Live2D人物的心情
|
||||
uiManager.setMood(mood)
|
||||
}
|
||||
|
||||
uiManager.appendToUi(filteredText)
|
||||
|
||||
val segments = segmenter.processChunk(filteredText)
|
||||
for (seg in segments) {
|
||||
ttsManager.enqueueSegment(seg)
|
||||
}
|
||||
@@ -353,6 +401,77 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
Log.d(AppConfig.TAG, "onStartClicked completed")
|
||||
}
|
||||
|
||||
private fun onRecordButtonDown() {
|
||||
Log.d(AppConfig.TAG, "onRecordButtonDown called")
|
||||
if (isRecording) {
|
||||
Log.d(AppConfig.TAG, "Already recording, returning")
|
||||
return
|
||||
}
|
||||
|
||||
// 如果TTS正在播放,打断它
|
||||
val interrupted = ttsManager.interruptForNewTurn()
|
||||
if (interrupted) {
|
||||
uiManager.appendToUi("\n[LOG] 已打断TTS播放\n")
|
||||
}
|
||||
|
||||
if (!audioProcessor.initMicrophone(permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) {
|
||||
uiManager.showToast("麦克风初始化失败/无权限")
|
||||
return
|
||||
}
|
||||
|
||||
currentTrace = TraceManager.getInstance().startNewTurn()
|
||||
currentTrace?.mark("turn_start")
|
||||
llmInFlight = false
|
||||
|
||||
uiManager.clearText()
|
||||
|
||||
// interruptForNewTurn() already prepared TTS state for next turn.
|
||||
// Keep reset() only for non-interrupt entry points.
|
||||
ttsManager.setCurrentTrace(currentTrace)
|
||||
segmenter.reset()
|
||||
|
||||
// 启动按住说话的动作
|
||||
uiManager.startSpecificMotion("hold_to_speak")
|
||||
|
||||
holdToSpeakAudioBuffer.clear()
|
||||
audioProcessor.startRecording()
|
||||
isRecording = true
|
||||
|
||||
Log.d(AppConfig.TAG, "Starting processSamplesLoop coroutine")
|
||||
recordingJob?.cancel()
|
||||
recordingJob = ioScope.launch {
|
||||
processSamplesLoop()
|
||||
}
|
||||
Log.d(AppConfig.TAG, "onRecordButtonDown completed")
|
||||
}
|
||||
|
||||
private fun onRecordButtonUp() {
|
||||
Log.d(AppConfig.TAG, "onRecordButtonUp called")
|
||||
if (!isRecording) {
|
||||
Log.d(AppConfig.TAG, "Not recording, returning")
|
||||
return
|
||||
}
|
||||
|
||||
isRecording = false
|
||||
audioProcessor.stopRecording()
|
||||
|
||||
recordingJob?.cancel()
|
||||
recordingJob = ioScope.launch {
|
||||
// 处理最后的音频数据
|
||||
val audioData = audioProcessor.getRecordedData()
|
||||
holdToSpeakAudioBuffer.addAll(audioData.toList())
|
||||
|
||||
if (holdToSpeakAudioBuffer.size >= HOLD_TO_SPEAK_MIN_SAMPLES) {
|
||||
val finalAudio = holdToSpeakAudioBuffer.toFloatArray()
|
||||
asrManager.enqueueAudioSegment(finalAudio, finalAudio)
|
||||
} else {
|
||||
uiManager.showToast("录音时间太短,请长按至少1秒")
|
||||
}
|
||||
holdToSpeakAudioBuffer.clear()
|
||||
}
|
||||
Log.d(AppConfig.TAG, "onRecordButtonUp completed")
|
||||
}
|
||||
|
||||
private fun onStopClicked(userInitiated: Boolean) {
|
||||
isRecording = false
|
||||
audioProcessor.stopRecording()
|
||||
@@ -362,7 +481,11 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
|
||||
ttsManager.stop()
|
||||
|
||||
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
|
||||
if (AppConfig.USE_HOLD_TO_SPEAK) {
|
||||
uiManager.setButtonsEnabled(recordEnabled = true)
|
||||
} else {
|
||||
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
|
||||
}
|
||||
|
||||
if (userInitiated) {
|
||||
TraceManager.getInstance().endTurn()
|
||||
@@ -372,47 +495,62 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
|
||||
private suspend fun processSamplesLoop() {
|
||||
Log.d(AppConfig.TAG, "processSamplesLoop started")
|
||||
val windowSize = AppConfig.WINDOW_SIZE
|
||||
val buffer = ShortArray(windowSize)
|
||||
var loopCount = 0
|
||||
|
||||
while (isRecording && ioScope.coroutineContext.isActive) {
|
||||
loopCount++
|
||||
if (loopCount % 100 == 0) {
|
||||
Log.d(AppConfig.TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsManager.isPlaying()}")
|
||||
}
|
||||
|
||||
if (ttsManager.isPlaying()) {
|
||||
if (vadManager.isInSpeech()) {
|
||||
Log.d(AppConfig.TAG, "TTS playing, resetting VAD state")
|
||||
vadManager.clearState()
|
||||
if (AppConfig.USE_HOLD_TO_SPEAK) {
|
||||
// 按住说话模式:累积音频数据到一定长度后再发送给ASR
|
||||
while (isRecording && ioScope.coroutineContext.isActive) {
|
||||
val audioData = audioProcessor.getAudioData()
|
||||
if (audioData.isNotEmpty()) {
|
||||
holdToSpeakAudioBuffer.addAll(audioData.toList())
|
||||
}
|
||||
// 避免CPU占用过高
|
||||
kotlinx.coroutines.delay(10)
|
||||
}
|
||||
} else {
|
||||
// 传统模式:使用VAD
|
||||
val windowSize = AppConfig.WINDOW_SIZE
|
||||
val buffer = ShortArray(windowSize)
|
||||
var loopCount = 0
|
||||
|
||||
while (isRecording && ioScope.coroutineContext.isActive) {
|
||||
loopCount++
|
||||
if (loopCount % 100 == 0) {
|
||||
Log.d(AppConfig.TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsManager.isPlaying()}")
|
||||
}
|
||||
|
||||
if (ttsManager.isPlaying()) {
|
||||
if (vadManager.isInSpeech()) {
|
||||
Log.d(AppConfig.TAG, "TTS playing, resetting VAD state")
|
||||
vadManager.clearState()
|
||||
}
|
||||
val ret = audioProcessor.readAudio(buffer)
|
||||
if (ret <= 0) continue
|
||||
continue
|
||||
}
|
||||
|
||||
val ret = audioProcessor.readAudio(buffer)
|
||||
if (ret <= 0) continue
|
||||
continue
|
||||
}
|
||||
|
||||
val ret = audioProcessor.readAudio(buffer)
|
||||
if (ret <= 0) continue
|
||||
if (ret != windowSize) continue
|
||||
|
||||
val chunk = audioProcessor.convertShortToFloat(buffer)
|
||||
val processedChunk = audioProcessor.applyGain(chunk)
|
||||
if (ret != windowSize) continue
|
||||
|
||||
val chunk = audioProcessor.convertShortToFloat(buffer)
|
||||
val processedChunk = audioProcessor.applyGain(chunk)
|
||||
|
||||
val result = vadManager.processAudioChunk(chunk, processedChunk)
|
||||
|
||||
if (vadManager.vadComputeCount % 100 == 0) {
|
||||
Log.d(AppConfig.TAG, "VAD result: $result, inSpeech=${vadManager.isInSpeech()}")
|
||||
val result = vadManager.processAudioChunk(chunk, processedChunk)
|
||||
|
||||
if (vadManager.vadComputeCount % 100 == 0) {
|
||||
Log.d(AppConfig.TAG, "VAD result: $result, inSpeech=${vadManager.isInSpeech()}")
|
||||
}
|
||||
|
||||
if (loopCount % 1000 == 0) {
|
||||
Log.d(AppConfig.TAG, "VAD status: inSpeech=${vadManager.isInSpeech()}, speechLen=${vadManager.getSpeechLength()}")
|
||||
}
|
||||
|
||||
val forced = segmenter.maybeForceByTime()
|
||||
for (seg in forced) ttsManager.enqueueSegment(seg)
|
||||
}
|
||||
|
||||
if (loopCount % 1000 == 0) {
|
||||
Log.d(AppConfig.TAG, "VAD status: inSpeech=${vadManager.isInSpeech()}, speechLen=${vadManager.getSpeechLength()}")
|
||||
}
|
||||
|
||||
val forced = segmenter.maybeForceByTime()
|
||||
for (seg in forced) ttsManager.enqueueSegment(seg)
|
||||
vadManager.forceFinalize()
|
||||
}
|
||||
|
||||
vadManager.forceFinalize()
|
||||
Log.d(AppConfig.TAG, "processSamplesLoop stopped")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user