push2talk
This commit is contained in:
@@ -55,3 +55,50 @@ TTS Sherpa-ONNX VITS .onnx ❌ 否 CPU ONNX Runtime
|
||||
│ SenseVoice │
|
||||
│ (RKNN推理) │
|
||||
└─────────────────┘
|
||||
|
||||
5. live2d, Haru的动作
|
||||
由于没有官方文档,基于Live2D模型的常见设计模式,我建议以下映射:
|
||||
|
||||
### 🎭 开心类情绪
|
||||
- haru_g_m22 眯眼微笑
|
||||
- haru_g_m21 跳动微笑
|
||||
- haru_g_m18 手收背后微笑
|
||||
- haru_g_m09 微微鞠躬
|
||||
- haru_g_m08 深深鞠躬
|
||||
|
||||
|
||||
### 😢 伤心类情绪
|
||||
- haru_g_m25 - 扁嘴
|
||||
- haru_g_m24 - 低头斜看地板,收手到背后
|
||||
- haru_g_m05 扁嘴,张开双手
|
||||
|
||||
### 😠 愤怒类情绪
|
||||
- haru_g_m11 双手交叉,摇头,扁嘴
|
||||
- haru_g_m04 双手交叉,点头
|
||||
- haru_g_m03 双手交叉,点头
|
||||
|
||||
### 😌 平和类情绪
|
||||
- haru_g_m15 双手交叉在胸前
|
||||
- haru_g_m07 举起左手
|
||||
- haru_g_m06 举起右手
|
||||
- haru_g_m02 双手放到背后
|
||||
- haru_g_m01 点头
|
||||
|
||||
|
||||
### 😲 惊讶类情绪
|
||||
- haru_g_m26 - 后退一步(适合:惊讶、吃惊)
|
||||
- haru_g_m12 摆手,摇头
|
||||
|
||||
### 😕 困惑类情绪
|
||||
- haru_g_m20 手指点腮,思考,皱眉
|
||||
- haru_g_m16 双手捧腮,思考
|
||||
- haru_g_m14 身体前倾,皱眉
|
||||
- haru_g_m13 身体前倾,双手分开
|
||||
|
||||
### 害羞
|
||||
- haru_g_m19 脸红微笑
|
||||
|
||||
|
||||
### ❤️ 关心类情绪
|
||||
- haru_g_m17 靠近侧脸
|
||||
|
||||
|
||||
@@ -38,6 +38,9 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
@Volatile
|
||||
private var isRecording: Boolean = false
|
||||
|
||||
private val holdToSpeakAudioBuffer = mutableListOf<Float>()
|
||||
private val HOLD_TO_SPEAK_MIN_SAMPLES = 16000 // 1秒的音频数据
|
||||
|
||||
private val ioScope = CoroutineScope(SupervisorJob() + Dispatchers.IO)
|
||||
private var recordingJob: Job? = null
|
||||
private val nativeLock = Any()
|
||||
@@ -77,13 +80,30 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
scrollViewId = R.id.scroll_view,
|
||||
startButtonId = R.id.start_button,
|
||||
stopButtonId = R.id.stop_button,
|
||||
recordButtonId = R.id.record_button,
|
||||
traditionalButtonsId = R.id.traditional_buttons,
|
||||
silentPlayerViewId = 0,
|
||||
speakingPlayerViewId = 0,
|
||||
live2dViewId = R.id.live2d_view
|
||||
)
|
||||
|
||||
uiManager.setStartButtonListener { onStartClicked() }
|
||||
uiManager.setStopButtonListener { onStopClicked(userInitiated = true) }
|
||||
// 根据配置选择交互方式
|
||||
uiManager.setUseHoldToSpeak(AppConfig.USE_HOLD_TO_SPEAK)
|
||||
|
||||
if (AppConfig.USE_HOLD_TO_SPEAK) {
|
||||
uiManager.setRecordButtonTouchListener { isDown ->
|
||||
if (isDown) {
|
||||
// 按住按钮,开始录音
|
||||
onRecordButtonDown()
|
||||
} else {
|
||||
// 松开按钮,停止录音
|
||||
onRecordButtonUp()
|
||||
}
|
||||
}
|
||||
} else {
|
||||
uiManager.setStartButtonListener { onStartClicked() }
|
||||
uiManager.setStopButtonListener { onStopClicked(userInitiated = true) }
|
||||
}
|
||||
|
||||
ActivityCompat.requestPermissions(this, permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)
|
||||
|
||||
@@ -99,10 +119,16 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
Log.w(AppConfig.TAG, "Streaming switch not found in layout: ${e.message}")
|
||||
}
|
||||
|
||||
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
|
||||
if (AppConfig.USE_HOLD_TO_SPEAK) {
|
||||
uiManager.setButtonsEnabled(recordEnabled = false)
|
||||
} else {
|
||||
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
|
||||
}
|
||||
uiManager.setText("初始化中…")
|
||||
|
||||
audioProcessor = AudioProcessor(this)
|
||||
ttsManager = TtsManager(this)
|
||||
ttsManager.setCallback(createTtsCallback())
|
||||
|
||||
asrManager = AsrManager(this)
|
||||
asrManager.setAudioProcessor(audioProcessor)
|
||||
@@ -127,14 +153,22 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
)
|
||||
}
|
||||
uiManager.setText(getString(R.string.hint))
|
||||
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
|
||||
if (AppConfig.USE_HOLD_TO_SPEAK) {
|
||||
uiManager.setButtonsEnabled(recordEnabled = true)
|
||||
} else {
|
||||
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
|
||||
}
|
||||
}
|
||||
} catch (t: Throwable) {
|
||||
Log.e(AppConfig.TAG, "Initialization failed: ${t.message}", t)
|
||||
withContext(Dispatchers.Main) {
|
||||
uiManager.setText("初始化失败:${t.javaClass.simpleName}: ${t.message}")
|
||||
uiManager.showToast("初始化失败(请看 Logcat): ${t.javaClass.simpleName}", Toast.LENGTH_LONG)
|
||||
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
|
||||
if (AppConfig.USE_HOLD_TO_SPEAK) {
|
||||
uiManager.setButtonsEnabled(recordEnabled = false)
|
||||
} else {
|
||||
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -142,9 +176,6 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
cloudApiManager = CloudApiManager(createCloudApiListener(), applicationContext)
|
||||
cloudApiManager.setEnableStreaming(enableStreaming)
|
||||
|
||||
ttsManager = TtsManager(this)
|
||||
ttsManager.setCallback(createTtsCallback())
|
||||
|
||||
Log.d(AppConfig.TAG, "Pre-starting ASR worker")
|
||||
ioScope.launch {
|
||||
asrManager.runAsrWorker()
|
||||
@@ -205,10 +236,18 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
}
|
||||
ttsManager.enqueueEnd()
|
||||
} else {
|
||||
runOnUiThread {
|
||||
uiManager.appendToUi("${response}\n")
|
||||
val previousMood = com.digitalperson.mood.MoodManager.getCurrentMood()
|
||||
val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(response)
|
||||
android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Final mood: $mood, filtered text: $filteredText")
|
||||
|
||||
if (mood != previousMood) {
|
||||
uiManager.setMood(mood)
|
||||
}
|
||||
ttsManager.enqueueSegment(response)
|
||||
|
||||
runOnUiThread {
|
||||
uiManager.appendToUi("${filteredText}\n")
|
||||
}
|
||||
ttsManager.enqueueSegment(filteredText)
|
||||
ttsManager.enqueueEnd()
|
||||
}
|
||||
}
|
||||
@@ -219,9 +258,18 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
llmFirstChunkMarked = true
|
||||
currentTrace?.markLlmFirstChunk()
|
||||
}
|
||||
uiManager.appendToUi(chunk)
|
||||
|
||||
val segments = segmenter.processChunk(chunk)
|
||||
val previousMood = com.digitalperson.mood.MoodManager.getCurrentMood()
|
||||
val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(chunk)
|
||||
if (mood != previousMood) {
|
||||
android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Mood changed to: $mood")
|
||||
// 设置Live2D人物的心情
|
||||
uiManager.setMood(mood)
|
||||
}
|
||||
|
||||
uiManager.appendToUi(filteredText)
|
||||
|
||||
val segments = segmenter.processChunk(filteredText)
|
||||
for (seg in segments) {
|
||||
ttsManager.enqueueSegment(seg)
|
||||
}
|
||||
@@ -353,6 +401,77 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
Log.d(AppConfig.TAG, "onStartClicked completed")
|
||||
}
|
||||
|
||||
private fun onRecordButtonDown() {
|
||||
Log.d(AppConfig.TAG, "onRecordButtonDown called")
|
||||
if (isRecording) {
|
||||
Log.d(AppConfig.TAG, "Already recording, returning")
|
||||
return
|
||||
}
|
||||
|
||||
// 如果TTS正在播放,打断它
|
||||
val interrupted = ttsManager.interruptForNewTurn()
|
||||
if (interrupted) {
|
||||
uiManager.appendToUi("\n[LOG] 已打断TTS播放\n")
|
||||
}
|
||||
|
||||
if (!audioProcessor.initMicrophone(permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) {
|
||||
uiManager.showToast("麦克风初始化失败/无权限")
|
||||
return
|
||||
}
|
||||
|
||||
currentTrace = TraceManager.getInstance().startNewTurn()
|
||||
currentTrace?.mark("turn_start")
|
||||
llmInFlight = false
|
||||
|
||||
uiManager.clearText()
|
||||
|
||||
// interruptForNewTurn() already prepared TTS state for next turn.
|
||||
// Keep reset() only for non-interrupt entry points.
|
||||
ttsManager.setCurrentTrace(currentTrace)
|
||||
segmenter.reset()
|
||||
|
||||
// 启动按住说话的动作
|
||||
uiManager.startSpecificMotion("hold_to_speak")
|
||||
|
||||
holdToSpeakAudioBuffer.clear()
|
||||
audioProcessor.startRecording()
|
||||
isRecording = true
|
||||
|
||||
Log.d(AppConfig.TAG, "Starting processSamplesLoop coroutine")
|
||||
recordingJob?.cancel()
|
||||
recordingJob = ioScope.launch {
|
||||
processSamplesLoop()
|
||||
}
|
||||
Log.d(AppConfig.TAG, "onRecordButtonDown completed")
|
||||
}
|
||||
|
||||
private fun onRecordButtonUp() {
|
||||
Log.d(AppConfig.TAG, "onRecordButtonUp called")
|
||||
if (!isRecording) {
|
||||
Log.d(AppConfig.TAG, "Not recording, returning")
|
||||
return
|
||||
}
|
||||
|
||||
isRecording = false
|
||||
audioProcessor.stopRecording()
|
||||
|
||||
recordingJob?.cancel()
|
||||
recordingJob = ioScope.launch {
|
||||
// 处理最后的音频数据
|
||||
val audioData = audioProcessor.getRecordedData()
|
||||
holdToSpeakAudioBuffer.addAll(audioData.toList())
|
||||
|
||||
if (holdToSpeakAudioBuffer.size >= HOLD_TO_SPEAK_MIN_SAMPLES) {
|
||||
val finalAudio = holdToSpeakAudioBuffer.toFloatArray()
|
||||
asrManager.enqueueAudioSegment(finalAudio, finalAudio)
|
||||
} else {
|
||||
uiManager.showToast("录音时间太短,请长按至少1秒")
|
||||
}
|
||||
holdToSpeakAudioBuffer.clear()
|
||||
}
|
||||
Log.d(AppConfig.TAG, "onRecordButtonUp completed")
|
||||
}
|
||||
|
||||
private fun onStopClicked(userInitiated: Boolean) {
|
||||
isRecording = false
|
||||
audioProcessor.stopRecording()
|
||||
@@ -362,7 +481,11 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
|
||||
ttsManager.stop()
|
||||
|
||||
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
|
||||
if (AppConfig.USE_HOLD_TO_SPEAK) {
|
||||
uiManager.setButtonsEnabled(recordEnabled = true)
|
||||
} else {
|
||||
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
|
||||
}
|
||||
|
||||
if (userInitiated) {
|
||||
TraceManager.getInstance().endTurn()
|
||||
@@ -372,47 +495,62 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
|
||||
private suspend fun processSamplesLoop() {
|
||||
Log.d(AppConfig.TAG, "processSamplesLoop started")
|
||||
val windowSize = AppConfig.WINDOW_SIZE
|
||||
val buffer = ShortArray(windowSize)
|
||||
var loopCount = 0
|
||||
|
||||
while (isRecording && ioScope.coroutineContext.isActive) {
|
||||
loopCount++
|
||||
if (loopCount % 100 == 0) {
|
||||
Log.d(AppConfig.TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsManager.isPlaying()}")
|
||||
}
|
||||
|
||||
if (ttsManager.isPlaying()) {
|
||||
if (vadManager.isInSpeech()) {
|
||||
Log.d(AppConfig.TAG, "TTS playing, resetting VAD state")
|
||||
vadManager.clearState()
|
||||
if (AppConfig.USE_HOLD_TO_SPEAK) {
|
||||
// 按住说话模式:累积音频数据到一定长度后再发送给ASR
|
||||
while (isRecording && ioScope.coroutineContext.isActive) {
|
||||
val audioData = audioProcessor.getAudioData()
|
||||
if (audioData.isNotEmpty()) {
|
||||
holdToSpeakAudioBuffer.addAll(audioData.toList())
|
||||
}
|
||||
// 避免CPU占用过高
|
||||
kotlinx.coroutines.delay(10)
|
||||
}
|
||||
} else {
|
||||
// 传统模式:使用VAD
|
||||
val windowSize = AppConfig.WINDOW_SIZE
|
||||
val buffer = ShortArray(windowSize)
|
||||
var loopCount = 0
|
||||
|
||||
while (isRecording && ioScope.coroutineContext.isActive) {
|
||||
loopCount++
|
||||
if (loopCount % 100 == 0) {
|
||||
Log.d(AppConfig.TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsManager.isPlaying()}")
|
||||
}
|
||||
|
||||
if (ttsManager.isPlaying()) {
|
||||
if (vadManager.isInSpeech()) {
|
||||
Log.d(AppConfig.TAG, "TTS playing, resetting VAD state")
|
||||
vadManager.clearState()
|
||||
}
|
||||
val ret = audioProcessor.readAudio(buffer)
|
||||
if (ret <= 0) continue
|
||||
continue
|
||||
}
|
||||
|
||||
val ret = audioProcessor.readAudio(buffer)
|
||||
if (ret <= 0) continue
|
||||
continue
|
||||
if (ret != windowSize) continue
|
||||
|
||||
val chunk = audioProcessor.convertShortToFloat(buffer)
|
||||
val processedChunk = audioProcessor.applyGain(chunk)
|
||||
|
||||
val result = vadManager.processAudioChunk(chunk, processedChunk)
|
||||
|
||||
if (vadManager.vadComputeCount % 100 == 0) {
|
||||
Log.d(AppConfig.TAG, "VAD result: $result, inSpeech=${vadManager.isInSpeech()}")
|
||||
}
|
||||
|
||||
if (loopCount % 1000 == 0) {
|
||||
Log.d(AppConfig.TAG, "VAD status: inSpeech=${vadManager.isInSpeech()}, speechLen=${vadManager.getSpeechLength()}")
|
||||
}
|
||||
|
||||
val forced = segmenter.maybeForceByTime()
|
||||
for (seg in forced) ttsManager.enqueueSegment(seg)
|
||||
}
|
||||
|
||||
val ret = audioProcessor.readAudio(buffer)
|
||||
if (ret <= 0) continue
|
||||
if (ret != windowSize) continue
|
||||
|
||||
val chunk = audioProcessor.convertShortToFloat(buffer)
|
||||
val processedChunk = audioProcessor.applyGain(chunk)
|
||||
|
||||
val result = vadManager.processAudioChunk(chunk, processedChunk)
|
||||
|
||||
if (vadManager.vadComputeCount % 100 == 0) {
|
||||
Log.d(AppConfig.TAG, "VAD result: $result, inSpeech=${vadManager.isInSpeech()}")
|
||||
}
|
||||
|
||||
if (loopCount % 1000 == 0) {
|
||||
Log.d(AppConfig.TAG, "VAD status: inSpeech=${vadManager.isInSpeech()}, speechLen=${vadManager.getSpeechLength()}")
|
||||
}
|
||||
|
||||
val forced = segmenter.maybeForceByTime()
|
||||
for (seg in forced) ttsManager.enqueueSegment(seg)
|
||||
vadManager.forceFinalize()
|
||||
}
|
||||
|
||||
vadManager.forceFinalize()
|
||||
Log.d(AppConfig.TAG, "processSamplesLoop stopped")
|
||||
}
|
||||
}
|
||||
@@ -75,14 +75,20 @@ class MainActivity : AppCompatActivity() {
|
||||
uiManager.initViews(
|
||||
textViewId = R.id.my_text,
|
||||
scrollViewId = R.id.scroll_view,
|
||||
startButtonId = R.id.start_button,
|
||||
stopButtonId = R.id.stop_button,
|
||||
recordButtonId = R.id.record_button,
|
||||
silentPlayerViewId = R.id.player_view_silent,
|
||||
speakingPlayerViewId = R.id.player_view_speaking
|
||||
)
|
||||
|
||||
uiManager.setStartButtonListener { onStartClicked() }
|
||||
uiManager.setStopButtonListener { onStopClicked(userInitiated = true) }
|
||||
uiManager.setRecordButtonTouchListener { isDown ->
|
||||
if (isDown) {
|
||||
// 按住按钮,开始录音
|
||||
onRecordButtonDown()
|
||||
} else {
|
||||
// 松开按钮,停止录音
|
||||
onRecordButtonUp()
|
||||
}
|
||||
}
|
||||
|
||||
ActivityCompat.requestPermissions(this, permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)
|
||||
|
||||
@@ -98,7 +104,7 @@ class MainActivity : AppCompatActivity() {
|
||||
Log.w(AppConfig.TAG, "Streaming switch not found in layout: ${e.message}")
|
||||
}
|
||||
|
||||
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
|
||||
uiManager.setButtonsEnabled(recordEnabled = false)
|
||||
uiManager.setText("初始化中…")
|
||||
|
||||
audioProcessor = AudioProcessor(this)
|
||||
@@ -126,14 +132,14 @@ class MainActivity : AppCompatActivity() {
|
||||
)
|
||||
}
|
||||
uiManager.setText(getString(R.string.hint))
|
||||
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
|
||||
uiManager.setButtonsEnabled(recordEnabled = true)
|
||||
}
|
||||
} catch (t: Throwable) {
|
||||
Log.e(AppConfig.TAG, "Initialization failed: ${t.message}", t)
|
||||
withContext(Dispatchers.Main) {
|
||||
uiManager.setText("初始化失败:${t.javaClass.simpleName}: ${t.message}")
|
||||
uiManager.showToast("初始化失败(请看 Logcat): ${t.javaClass.simpleName}", Toast.LENGTH_LONG)
|
||||
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
|
||||
uiManager.setButtonsEnabled(recordEnabled = false)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -204,10 +210,13 @@ class MainActivity : AppCompatActivity() {
|
||||
}
|
||||
ttsManager.enqueueEnd()
|
||||
} else {
|
||||
val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(response)
|
||||
android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Final mood: $mood, filtered text: $filteredText")
|
||||
|
||||
runOnUiThread {
|
||||
uiManager.appendToUi("${response}\n")
|
||||
uiManager.appendToUi("${filteredText}\n")
|
||||
}
|
||||
ttsManager.enqueueSegment(response)
|
||||
ttsManager.enqueueSegment(filteredText)
|
||||
ttsManager.enqueueEnd()
|
||||
}
|
||||
}
|
||||
@@ -218,9 +227,15 @@ class MainActivity : AppCompatActivity() {
|
||||
llmFirstChunkMarked = true
|
||||
currentTrace?.markLlmFirstChunk()
|
||||
}
|
||||
uiManager.appendToUi(chunk)
|
||||
|
||||
val segments = segmenter.processChunk(chunk)
|
||||
val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(chunk)
|
||||
if (mood != com.digitalperson.mood.MoodManager.getCurrentMood()) {
|
||||
android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Mood changed to: $mood")
|
||||
}
|
||||
|
||||
uiManager.appendToUi(filteredText)
|
||||
|
||||
val segments = segmenter.processChunk(filteredText)
|
||||
for (seg in segments) {
|
||||
ttsManager.enqueueSegment(seg)
|
||||
}
|
||||
@@ -306,13 +321,19 @@ class MainActivity : AppCompatActivity() {
|
||||
try { audioProcessor.release() } catch (_: Throwable) {}
|
||||
}
|
||||
|
||||
private fun onStartClicked() {
|
||||
Log.d(AppConfig.TAG, "onStartClicked called")
|
||||
private fun onRecordButtonDown() {
|
||||
Log.d(AppConfig.TAG, "onRecordButtonDown called")
|
||||
if (isRecording) {
|
||||
Log.d(AppConfig.TAG, "Already recording, returning")
|
||||
return
|
||||
}
|
||||
|
||||
// 如果TTS正在播放,打断它
|
||||
if (ttsManager.isPlaying()) {
|
||||
ttsManager.stop()
|
||||
uiManager.appendToUi("\n[LOG] 已打断TTS播放\n")
|
||||
}
|
||||
|
||||
if (!audioProcessor.initMicrophone(permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) {
|
||||
uiManager.showToast("麦克风初始化失败/无权限")
|
||||
return
|
||||
@@ -328,18 +349,37 @@ class MainActivity : AppCompatActivity() {
|
||||
ttsManager.setCurrentTrace(currentTrace)
|
||||
segmenter.reset()
|
||||
|
||||
vadManager.reset()
|
||||
audioProcessor.startRecording()
|
||||
isRecording = true
|
||||
|
||||
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = true)
|
||||
|
||||
Log.d(AppConfig.TAG, "Starting processSamplesLoop coroutine")
|
||||
recordingJob?.cancel()
|
||||
recordingJob = ioScope.launch {
|
||||
processSamplesLoop()
|
||||
}
|
||||
Log.d(AppConfig.TAG, "onStartClicked completed")
|
||||
Log.d(AppConfig.TAG, "onRecordButtonDown completed")
|
||||
}
|
||||
|
||||
private fun onRecordButtonUp() {
|
||||
Log.d(AppConfig.TAG, "onRecordButtonUp called")
|
||||
if (!isRecording) {
|
||||
Log.d(AppConfig.TAG, "Not recording, returning")
|
||||
return
|
||||
}
|
||||
|
||||
isRecording = false
|
||||
audioProcessor.stopRecording()
|
||||
|
||||
recordingJob?.cancel()
|
||||
recordingJob = ioScope.launch {
|
||||
// 处理最后的音频数据
|
||||
val audioData = audioProcessor.getRecordedData()
|
||||
if (audioData.isNotEmpty()) {
|
||||
// 直接发送到ASR,不经过VAD
|
||||
asrManager.enqueueAudioSegment(audioData, audioData)
|
||||
}
|
||||
}
|
||||
Log.d(AppConfig.TAG, "onRecordButtonUp completed")
|
||||
}
|
||||
|
||||
private fun onStopClicked(userInitiated: Boolean) {
|
||||
@@ -351,7 +391,7 @@ class MainActivity : AppCompatActivity() {
|
||||
|
||||
ttsManager.stop()
|
||||
|
||||
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
|
||||
uiManager.setButtonsEnabled(recordEnabled = true)
|
||||
|
||||
if (userInitiated) {
|
||||
TraceManager.getInstance().endTurn()
|
||||
@@ -361,47 +401,16 @@ class MainActivity : AppCompatActivity() {
|
||||
|
||||
private suspend fun processSamplesLoop() {
|
||||
Log.d(AppConfig.TAG, "processSamplesLoop started")
|
||||
val windowSize = AppConfig.WINDOW_SIZE
|
||||
val buffer = ShortArray(windowSize)
|
||||
var loopCount = 0
|
||||
|
||||
while (isRecording && ioScope.coroutineContext.isActive) {
|
||||
loopCount++
|
||||
if (loopCount % 100 == 0) {
|
||||
Log.d(AppConfig.TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsManager.isPlaying()}")
|
||||
val audioData = audioProcessor.getAudioData()
|
||||
if (audioData.isNotEmpty()) {
|
||||
// 直接发送到ASR,不经过VAD
|
||||
asrManager.enqueueAudioSegment(audioData, audioData)
|
||||
}
|
||||
|
||||
if (ttsManager.isPlaying()) {
|
||||
if (vadManager.isInSpeech()) {
|
||||
Log.d(AppConfig.TAG, "TTS playing, resetting VAD state")
|
||||
vadManager.clearState()
|
||||
}
|
||||
val ret = audioProcessor.readAudio(buffer)
|
||||
if (ret <= 0) continue
|
||||
continue
|
||||
}
|
||||
|
||||
val ret = audioProcessor.readAudio(buffer)
|
||||
if (ret <= 0) continue
|
||||
if (ret != windowSize) continue
|
||||
|
||||
val chunk = audioProcessor.convertShortToFloat(buffer)
|
||||
val processedChunk = audioProcessor.applyGain(chunk)
|
||||
|
||||
val result = vadManager.processAudioChunk(chunk, processedChunk)
|
||||
|
||||
if (vadManager.vadComputeCount % 100 == 0) {
|
||||
Log.d(AppConfig.TAG, "VAD result: $result, inSpeech=${vadManager.isInSpeech()}")
|
||||
}
|
||||
|
||||
if (loopCount % 1000 == 0) {
|
||||
Log.d(AppConfig.TAG, "VAD status: inSpeech=${vadManager.isInSpeech()}, speechLen=${vadManager.getSpeechLength()}")
|
||||
}
|
||||
|
||||
val forced = segmenter.maybeForceByTime()
|
||||
for (seg in forced) ttsManager.enqueueSegment(seg)
|
||||
// 避免CPU占用过高
|
||||
kotlinx.coroutines.delay(10)
|
||||
}
|
||||
|
||||
vadManager.forceFinalize()
|
||||
Log.d(AppConfig.TAG, "processSamplesLoop stopped")
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,7 @@
|
||||
package com.digitalperson
|
||||
|
||||
import com.digitalperson.mood.MoodManager
|
||||
|
||||
/**
|
||||
* 将大模型流式 chunk 做“伪流式 TTS”的分段器:
|
||||
* - 优先按中文/英文标点断句,尽早产出第一段,缩短首包时间
|
||||
@@ -11,17 +13,28 @@ class StreamingTextSegmenter(
|
||||
) {
|
||||
private val buf = StringBuilder()
|
||||
private var lastEmitAtMs: Long = 0
|
||||
private var moodExtracted = false
|
||||
|
||||
@Synchronized
|
||||
fun reset(nowMs: Long = System.currentTimeMillis()) {
|
||||
buf.setLength(0)
|
||||
lastEmitAtMs = nowMs
|
||||
moodExtracted = false
|
||||
}
|
||||
|
||||
@Synchronized
|
||||
fun processChunk(chunk: String, nowMs: Long = System.currentTimeMillis()): List<String> {
|
||||
if (chunk.isEmpty()) return emptyList()
|
||||
buf.append(chunk)
|
||||
|
||||
var processedChunk = chunk
|
||||
|
||||
if (!moodExtracted) {
|
||||
val (filteredText, _) = MoodManager.extractAndFilterMood(chunk)
|
||||
processedChunk = filteredText
|
||||
moodExtracted = true
|
||||
}
|
||||
|
||||
buf.append(processedChunk)
|
||||
return drain(nowMs, forceByTime = false)
|
||||
}
|
||||
|
||||
@@ -32,6 +45,7 @@ class StreamingTextSegmenter(
|
||||
buf.setLength(0)
|
||||
if (remaining.isNotEmpty()) out.add(remaining)
|
||||
lastEmitAtMs = nowMs
|
||||
moodExtracted = false
|
||||
return out
|
||||
}
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ class AudioProcessor(
|
||||
|
||||
private var smoothedRms = 0f
|
||||
private val alpha = 0.8f
|
||||
private val recordedData = mutableListOf<Float>()
|
||||
|
||||
fun initMicrophone(permissions: Array<String>, requestCode: Int): Boolean {
|
||||
if (ActivityCompat.checkSelfPermission(context, Manifest.permission.RECORD_AUDIO)
|
||||
@@ -84,6 +85,29 @@ class AudioProcessor(
|
||||
Log.d(TAG, "Audio recording stopped")
|
||||
}
|
||||
|
||||
fun getAudioData(): FloatArray {
|
||||
val bufferSize = 1024
|
||||
val shortBuffer = ShortArray(bufferSize)
|
||||
val readSize = audioRecord?.read(shortBuffer, 0, bufferSize) ?: 0
|
||||
|
||||
if (readSize > 0) {
|
||||
val floatBuffer = convertShortToFloat(shortBuffer.copyOf(readSize))
|
||||
// 不再自动添加到recordedData,由调用方决定
|
||||
return floatBuffer
|
||||
}
|
||||
return FloatArray(0)
|
||||
}
|
||||
|
||||
fun getRecordedData(): FloatArray {
|
||||
val data = recordedData.toFloatArray()
|
||||
recordedData.clear()
|
||||
return data
|
||||
}
|
||||
|
||||
fun clearRecordedData() {
|
||||
recordedData.clear()
|
||||
}
|
||||
|
||||
fun release() {
|
||||
try {
|
||||
audioRecord?.stop()
|
||||
@@ -105,6 +129,7 @@ class AudioProcessor(
|
||||
}
|
||||
aec = null
|
||||
ns = null
|
||||
recordedData.clear()
|
||||
|
||||
Log.d(TAG, "AudioProcessor released")
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ object AppConfig {
|
||||
const val WINDOW_SIZE = 512
|
||||
|
||||
const val SHOW_DEBUG_TEXT = true
|
||||
const val USE_HOLD_TO_SPEAK = true // true: 按住说话, false: 传统按钮
|
||||
|
||||
object Tts {
|
||||
const val MODEL_DIR = "tts_model/sherpa-onnx-vits-zh-ll"
|
||||
|
||||
@@ -15,6 +15,14 @@ class Live2DAvatarManager(private val glSurfaceView: GLSurfaceView) {
|
||||
renderer.setSpeaking(speaking)
|
||||
}
|
||||
|
||||
fun setMood(mood: String) {
|
||||
renderer.setMood(mood)
|
||||
}
|
||||
|
||||
fun startSpecificMotion(motionName: String) {
|
||||
renderer.startSpecificMotion(motionName)
|
||||
}
|
||||
|
||||
fun onResume() {
|
||||
glSurfaceView.onResume()
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import android.content.res.AssetManager
|
||||
import android.graphics.BitmapFactory
|
||||
import android.opengl.GLES20
|
||||
import android.opengl.GLUtils
|
||||
import android.util.Log
|
||||
import com.live2d.sdk.cubism.framework.CubismFramework
|
||||
import com.live2d.sdk.cubism.framework.CubismModelSettingJson
|
||||
import com.live2d.sdk.cubism.framework.id.CubismId
|
||||
@@ -12,14 +13,22 @@ import com.live2d.sdk.cubism.framework.model.CubismUserModel
|
||||
import com.live2d.sdk.cubism.framework.motion.CubismMotion
|
||||
import com.live2d.sdk.cubism.framework.rendering.android.CubismRendererAndroid
|
||||
import kotlin.math.sin
|
||||
import java.util.Random
|
||||
|
||||
class Live2DCharacter : CubismUserModel() {
|
||||
private lateinit var setting: CubismModelSettingJson
|
||||
private val lipSyncParams = mutableListOf<CubismId>()
|
||||
private val idleMotions = mutableListOf<CubismMotion>()
|
||||
private val moodMotions = mutableMapOf<String, List<CubismMotion>>()
|
||||
private val specificMotions = mutableMapOf<String, CubismMotion>()
|
||||
private var idleMotionIndex = 0
|
||||
private var lastElapsedSec = 0f
|
||||
private val textureIds = mutableListOf<Int>()
|
||||
private val random = Random()
|
||||
private var currentMood: String = "平和"
|
||||
|
||||
// 添加文件名映射
|
||||
private val motionFileMap = mutableMapOf<CubismMotion, String>()
|
||||
|
||||
fun loadFromAssets(assets: AssetManager, modelDir: String, modelJsonName: String) {
|
||||
val settingBytes = readAssetBytes(assets, "$modelDir/$modelJsonName")
|
||||
@@ -41,6 +50,8 @@ class Live2DCharacter : CubismUserModel() {
|
||||
|
||||
initLipSyncParams()
|
||||
loadIdleMotions(assets, modelDir)
|
||||
loadMoodMotions(assets, modelDir)
|
||||
loadSpecificMotions(assets, modelDir)
|
||||
startNextIdleMotion()
|
||||
}
|
||||
|
||||
@@ -147,17 +158,34 @@ class Live2DCharacter : CubismUserModel() {
|
||||
|
||||
private fun loadIdleMotions(assets: AssetManager, modelDir: String) {
|
||||
idleMotions.clear()
|
||||
val groupName = findIdleGroupName()
|
||||
if (groupName.isEmpty()) return
|
||||
// val groupName = findIdleGroupName()
|
||||
// if (groupName.isNotEmpty()) {
|
||||
// for (i in 0 until setting.getMotionCount(groupName)) {
|
||||
// val fileName = setting.getMotionFileName(groupName, i)
|
||||
// if (fileName.isBlank()) continue
|
||||
// runCatching {
|
||||
// // Motion path in model3.json can be either "motion/xxx.motion3.json" or "xxx.motion3.json".
|
||||
// val path = if (fileName.startsWith("motion/")) {
|
||||
// "$modelDir/$fileName"
|
||||
// } else {
|
||||
// "$modelDir/motion/$fileName"
|
||||
// }
|
||||
// val motion = loadMotion(readAssetBytes(assets, path))
|
||||
// motion?.setLoop(true)
|
||||
// motion?.setLoopFadeIn(true)
|
||||
// if (motion != null) idleMotions.add(motion)
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
for (i in 0 until setting.getMotionCount(groupName)) {
|
||||
val fileName = setting.getMotionFileName(groupName, i)
|
||||
if (fileName.isBlank()) continue
|
||||
runCatching {
|
||||
val motion = loadMotion(readAssetBytes(assets, "$modelDir/$fileName"))
|
||||
motion?.setLoop(true)
|
||||
motion?.setLoopFadeIn(true)
|
||||
if (motion != null) idleMotions.add(motion)
|
||||
// Fallback for models without Idle group config.
|
||||
if (idleMotions.isEmpty()) {
|
||||
loadMotionByName(assets, modelDir, "haru_g_idle.motion3.json")?.let { motion ->
|
||||
motion.setLoop(true)
|
||||
motion.setLoopFadeIn(true)
|
||||
idleMotions.add(motion)
|
||||
// 也添加到映射表
|
||||
motionFileMap[motion] = "haru_g_idle.motion3.json"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -166,7 +194,12 @@ class Live2DCharacter : CubismUserModel() {
|
||||
if (idleMotions.isEmpty()) return
|
||||
val index = idleMotionIndex % idleMotions.size
|
||||
idleMotionIndex++
|
||||
motionManager.startMotionPriority(idleMotions[index], 1)
|
||||
|
||||
val motion = idleMotions[index]
|
||||
val motionName = motionFileMap[motion]
|
||||
Log.d("Live2DCharacter", "开始播放空闲动作: $motionName")
|
||||
|
||||
motionManager.startMotionPriority(motion, 1)
|
||||
}
|
||||
|
||||
private fun findIdleGroupName(): String {
|
||||
@@ -179,4 +212,122 @@ class Live2DCharacter : CubismUserModel() {
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
private fun loadMoodMotions(assets: AssetManager, modelDir: String) {
|
||||
// 开心心情动作
|
||||
moodMotions["开心"] = listOf(
|
||||
"haru_g_m22.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m22.motion3.json"),
|
||||
"haru_g_m21.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m21.motion3.json"),
|
||||
"haru_g_m18.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m18.motion3.json")
|
||||
).mapNotNull { (fileName, motion) ->
|
||||
motion?.let {
|
||||
motionFileMap[it] = fileName
|
||||
it
|
||||
}
|
||||
}
|
||||
|
||||
// 伤心心情动作
|
||||
moodMotions["伤心"] = listOf(
|
||||
"haru_g_m25.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m25.motion3.json"),
|
||||
"haru_g_m24.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m24.motion3.json"),
|
||||
"haru_g_m05.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m05.motion3.json")
|
||||
).mapNotNull { (fileName, motion) ->
|
||||
motion?.let {
|
||||
motionFileMap[it] = fileName
|
||||
it
|
||||
}
|
||||
}
|
||||
|
||||
// 平和心情动作
|
||||
moodMotions["平和"] = listOf(
|
||||
"haru_g_m15.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m15.motion3.json"),
|
||||
"haru_g_m07.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m07.motion3.json"),
|
||||
"haru_g_m06.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m06.motion3.json"),
|
||||
"haru_g_m02.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m02.motion3.json"),
|
||||
"haru_g_m01.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m01.motion3.json")
|
||||
).mapNotNull { (fileName, motion) ->
|
||||
motion?.let {
|
||||
motionFileMap[it] = fileName
|
||||
it
|
||||
}
|
||||
}
|
||||
|
||||
// 惊讶心情动作
|
||||
moodMotions["惊讶"] = listOf(
|
||||
"haru_g_m26.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m26.motion3.json"),
|
||||
"haru_g_m12.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m12.motion3.json")
|
||||
).mapNotNull { (fileName, motion) ->
|
||||
motion?.let {
|
||||
motionFileMap[it] = fileName
|
||||
it
|
||||
}
|
||||
}
|
||||
|
||||
// 关心心情动作
|
||||
moodMotions["关心"] = listOf(
|
||||
"haru_g_m17.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m17.motion3.json")
|
||||
).mapNotNull { (fileName, motion) ->
|
||||
motion?.let {
|
||||
motionFileMap[it] = fileName
|
||||
it
|
||||
}
|
||||
}
|
||||
|
||||
// 害羞心情动作
|
||||
moodMotions["害羞"] = listOf(
|
||||
"haru_g_m19.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m19.motion3.json")
|
||||
).mapNotNull { (fileName, motion) ->
|
||||
motion?.let {
|
||||
motionFileMap[it] = fileName
|
||||
it
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun loadSpecificMotions(assets: AssetManager, modelDir: String) {
|
||||
// 按住说话时的动作
|
||||
loadMotionByName(assets, modelDir, "haru_g_m17.motion3.json")?.let { motion ->
|
||||
motionFileMap[motion] = "haru_g_m17.motion3.json"
|
||||
specificMotions["hold_to_speak"] = motion
|
||||
}
|
||||
}
|
||||
|
||||
private fun loadMotionByName(assets: AssetManager, modelDir: String, fileName: String): CubismMotion? {
|
||||
return runCatching {
|
||||
val motion = loadMotion(readAssetBytes(assets, "$modelDir/motion/$fileName"))
|
||||
motion?.setLoop(false)
|
||||
motion
|
||||
}.getOrNull()
|
||||
}
|
||||
|
||||
fun startMoodMotion(mood: String) {
|
||||
currentMood = mood
|
||||
val motions = moodMotions[mood]
|
||||
if (motions.isNullOrEmpty()) {
|
||||
Log.d("Live2DCharacter", "心情 '$mood' 没有对应的动作")
|
||||
return
|
||||
}
|
||||
|
||||
val randomMotion = motions[random.nextInt(motions.size)]
|
||||
// 从映射表中获取文件名
|
||||
val motionName = motionFileMap[randomMotion]
|
||||
Log.d("Live2DCharacter", "开始播放心情动作: $motionName, 心情: $mood")
|
||||
|
||||
motionManager.startMotionPriority(randomMotion, 10)
|
||||
}
|
||||
|
||||
fun startSpecificMotion(motionName: String) {
|
||||
val motion = specificMotions[motionName]
|
||||
if (motion != null) {
|
||||
val fileName = motionFileMap[motion]
|
||||
Log.d("Live2DCharacter", "开始播放特定动作: $fileName")
|
||||
motionManager.startMotionPriority(motion, 10)
|
||||
} else {
|
||||
Log.d("Live2DCharacter", "特定动作 '$motionName' 不存在")
|
||||
}
|
||||
}
|
||||
|
||||
fun getCurrentMood(): String {
|
||||
return currentMood
|
||||
}
|
||||
}
|
||||
@@ -62,6 +62,14 @@ class Live2DRenderer(
|
||||
this.speaking = speaking
|
||||
}
|
||||
|
||||
fun setMood(mood: String) {
|
||||
character?.startMoodMotion(mood)
|
||||
}
|
||||
|
||||
fun startSpecificMotion(motionName: String) {
|
||||
character?.startSpecificMotion(motionName)
|
||||
}
|
||||
|
||||
fun release() {
|
||||
character?.release()
|
||||
character = null
|
||||
|
||||
34
app/src/main/java/com/digitalperson/mood/MoodManager.kt
Normal file
34
app/src/main/java/com/digitalperson/mood/MoodManager.kt
Normal file
@@ -0,0 +1,34 @@
|
||||
package com.digitalperson.mood
|
||||
|
||||
import android.util.Log
|
||||
import com.digitalperson.config.AppConfig
|
||||
|
||||
object MoodManager {
|
||||
private const val TAG = "MoodManager"
|
||||
|
||||
private var currentMood: String = "平和"
|
||||
|
||||
private val moodPattern = Regex("""^\[([^\]]+)\]""")
|
||||
|
||||
fun extractAndFilterMood(text: String): Pair<String, String> {
|
||||
val match = moodPattern.find(text)
|
||||
|
||||
if (match != null) {
|
||||
val mood = match.groupValues[1]
|
||||
val filteredText = text.removeRange(match.range)
|
||||
|
||||
currentMood = mood
|
||||
Log.d(TAG, "Extracted mood: $mood, filtered text: $filteredText")
|
||||
|
||||
return Pair(filteredText, mood)
|
||||
}
|
||||
|
||||
return Pair(text, currentMood)
|
||||
}
|
||||
|
||||
fun getCurrentMood(): String = currentMood
|
||||
|
||||
fun reset() {
|
||||
currentMood = "平和"
|
||||
}
|
||||
}
|
||||
@@ -36,6 +36,8 @@ class TtsManager(private val context: Context) {
|
||||
private val ttsStopped = AtomicBoolean(false)
|
||||
private val ttsWorkerRunning = AtomicBoolean(false)
|
||||
private val ttsPlaying = AtomicBoolean(false)
|
||||
private val interrupting = AtomicBoolean(false)
|
||||
private val needTrackReset = AtomicBoolean(true)
|
||||
@Volatile private var ttsTotalSamplesWritten: Long = 0
|
||||
|
||||
private var currentTrace: TraceSession? = null
|
||||
@@ -124,6 +126,10 @@ class TtsManager(private val context: Context) {
|
||||
}
|
||||
|
||||
fun enqueueSegment(seg: String) {
|
||||
if (ttsStopped.get()) {
|
||||
// Recover from interrupt state for next turn.
|
||||
ttsStopped.set(false)
|
||||
}
|
||||
val cleanedSeg = seg.trimEnd('.', '。', '!', '!', '?', '?', ',', ',', ';', ';', ':', ':')
|
||||
|
||||
callback?.onTraceMarkTtsRequestEnqueued()
|
||||
@@ -138,15 +144,26 @@ class TtsManager(private val context: Context) {
|
||||
fun isPlaying(): Boolean = ttsPlaying.get()
|
||||
|
||||
fun reset() {
|
||||
val workerRunning = ttsWorkerRunning.get()
|
||||
val wasStopped = ttsStopped.get()
|
||||
|
||||
ttsStopped.set(false)
|
||||
ttsPlaying.set(false)
|
||||
needTrackReset.set(true)
|
||||
ttsTotalSamplesWritten = 0
|
||||
ttsQueue.clear()
|
||||
|
||||
// If reset is called right after stop(), old worker may still be alive.
|
||||
// Re-send an End token so the old worker won't block forever on take().
|
||||
if (wasStopped && workerRunning) {
|
||||
ttsQueue.offer(TtsQueueItem.End)
|
||||
}
|
||||
}
|
||||
|
||||
fun stop() {
|
||||
ttsStopped.set(true)
|
||||
ttsPlaying.set(false)
|
||||
needTrackReset.set(true)
|
||||
ttsTotalSamplesWritten = 0
|
||||
ttsQueue.clear()
|
||||
ttsQueue.offer(TtsQueueItem.End)
|
||||
@@ -158,6 +175,52 @@ class TtsManager(private val context: Context) {
|
||||
}
|
||||
}
|
||||
|
||||
@Synchronized
|
||||
fun interruptForNewTurn(waitTimeoutMs: Long = 300): Boolean {
|
||||
if (!interrupting.compareAndSet(false, true)) return false
|
||||
try {
|
||||
val hadPendingPlayback = ttsPlaying.get() || ttsWorkerRunning.get() || ttsQueue.isNotEmpty()
|
||||
if (!hadPendingPlayback) {
|
||||
ttsStopped.set(false)
|
||||
ttsPlaying.set(false)
|
||||
ttsTotalSamplesWritten = 0
|
||||
return false
|
||||
}
|
||||
|
||||
ttsStopped.set(true)
|
||||
ttsPlaying.set(false)
|
||||
needTrackReset.set(true)
|
||||
ttsTotalSamplesWritten = 0
|
||||
ttsQueue.clear()
|
||||
ttsQueue.offer(TtsQueueItem.End)
|
||||
|
||||
try {
|
||||
track?.pause()
|
||||
track?.flush()
|
||||
} catch (_: Throwable) {
|
||||
}
|
||||
|
||||
val deadline = System.currentTimeMillis() + waitTimeoutMs
|
||||
while (ttsWorkerRunning.get() && System.currentTimeMillis() < deadline) {
|
||||
Thread.sleep(10)
|
||||
}
|
||||
|
||||
if (ttsWorkerRunning.get()) {
|
||||
Log.w(TAG, "interruptForNewTurn timeout: worker still running")
|
||||
}
|
||||
|
||||
ttsQueue.clear()
|
||||
ttsStopped.set(false)
|
||||
ttsPlaying.set(false)
|
||||
needTrackReset.set(true)
|
||||
ttsTotalSamplesWritten = 0
|
||||
callback?.onSetSpeaking(false)
|
||||
return true
|
||||
} finally {
|
||||
interrupting.set(false)
|
||||
}
|
||||
}
|
||||
|
||||
fun release() {
|
||||
try {
|
||||
tts?.release()
|
||||
@@ -182,6 +245,10 @@ class TtsManager(private val context: Context) {
|
||||
runTtsWorker()
|
||||
} finally {
|
||||
ttsWorkerRunning.set(false)
|
||||
// Handle race: items may be enqueued while old worker is still exiting.
|
||||
if (!ttsStopped.get() && ttsQueue.isNotEmpty()) {
|
||||
ensureTtsWorker()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -191,7 +258,6 @@ class TtsManager(private val context: Context) {
|
||||
val audioTrack = track ?: return
|
||||
|
||||
var firstAudioMarked = false
|
||||
var isFirstSegment = true
|
||||
while (true) {
|
||||
val item = ttsQueue.take()
|
||||
if (ttsStopped.get()) break
|
||||
@@ -209,14 +275,18 @@ class TtsManager(private val context: Context) {
|
||||
val startMs = System.currentTimeMillis()
|
||||
var firstPcmMarked = false
|
||||
|
||||
if (isFirstSegment) {
|
||||
if (needTrackReset.compareAndSet(true, false)) {
|
||||
try {
|
||||
audioTrack.pause()
|
||||
audioTrack.flush()
|
||||
audioTrack.play()
|
||||
} catch (_: Throwable) {
|
||||
}
|
||||
isFirstSegment = false
|
||||
} else if (audioTrack.playState != AudioTrack.PLAYSTATE_PLAYING) {
|
||||
try {
|
||||
audioTrack.play()
|
||||
} catch (_: Throwable) {
|
||||
}
|
||||
}
|
||||
|
||||
t.generateWithCallback(
|
||||
|
||||
@@ -3,7 +3,9 @@ package com.digitalperson.ui
|
||||
import android.app.Activity
|
||||
import android.opengl.GLSurfaceView
|
||||
import android.text.method.ScrollingMovementMethod
|
||||
import android.view.MotionEvent
|
||||
import android.widget.Button
|
||||
import android.widget.LinearLayout
|
||||
import android.widget.ScrollView
|
||||
import android.widget.TextView
|
||||
import android.widget.Toast
|
||||
@@ -14,6 +16,8 @@ class Live2DUiManager(private val activity: Activity) {
|
||||
private var scrollView: ScrollView? = null
|
||||
private var startButton: Button? = null
|
||||
private var stopButton: Button? = null
|
||||
private var recordButton: Button? = null
|
||||
private var traditionalButtons: LinearLayout? = null
|
||||
private var avatarManager: Live2DAvatarManager? = null
|
||||
|
||||
private var lastUiText: String = ""
|
||||
@@ -21,16 +25,20 @@ class Live2DUiManager(private val activity: Activity) {
|
||||
fun initViews(
|
||||
textViewId: Int,
|
||||
scrollViewId: Int,
|
||||
startButtonId: Int,
|
||||
stopButtonId: Int,
|
||||
startButtonId: Int = -1,
|
||||
stopButtonId: Int = -1,
|
||||
recordButtonId: Int = -1,
|
||||
traditionalButtonsId: Int = -1,
|
||||
silentPlayerViewId: Int,
|
||||
speakingPlayerViewId: Int,
|
||||
live2dViewId: Int
|
||||
) {
|
||||
textView = activity.findViewById(textViewId)
|
||||
scrollView = activity.findViewById(scrollViewId)
|
||||
startButton = activity.findViewById(startButtonId)
|
||||
stopButton = activity.findViewById(stopButtonId)
|
||||
if (startButtonId != -1) startButton = activity.findViewById(startButtonId)
|
||||
if (stopButtonId != -1) stopButton = activity.findViewById(stopButtonId)
|
||||
if (recordButtonId != -1) recordButton = activity.findViewById(recordButtonId)
|
||||
if (traditionalButtonsId != -1) traditionalButtons = activity.findViewById(traditionalButtonsId)
|
||||
|
||||
textView?.movementMethod = ScrollingMovementMethod()
|
||||
|
||||
@@ -47,6 +55,36 @@ class Live2DUiManager(private val activity: Activity) {
|
||||
stopButton?.setOnClickListener { listener() }
|
||||
}
|
||||
|
||||
fun setRecordButtonTouchListener(listener: (Boolean) -> Unit) {
|
||||
recordButton?.setOnTouchListener {
|
||||
_, event ->
|
||||
when (event.action) {
|
||||
MotionEvent.ACTION_DOWN -> {
|
||||
recordButton?.isPressed = true
|
||||
listener(true)
|
||||
true
|
||||
}
|
||||
MotionEvent.ACTION_UP,
|
||||
MotionEvent.ACTION_CANCEL -> {
|
||||
recordButton?.isPressed = false
|
||||
listener(false)
|
||||
true
|
||||
}
|
||||
else -> false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun setUseHoldToSpeak(useHoldToSpeak: Boolean) {
|
||||
if (useHoldToSpeak) {
|
||||
traditionalButtons?.visibility = LinearLayout.GONE
|
||||
recordButton?.visibility = Button.VISIBLE
|
||||
} else {
|
||||
traditionalButtons?.visibility = LinearLayout.VISIBLE
|
||||
recordButton?.visibility = Button.GONE
|
||||
}
|
||||
}
|
||||
|
||||
fun appendToUi(s: String) {
|
||||
lastUiText += s
|
||||
textView?.text = lastUiText
|
||||
@@ -63,9 +101,10 @@ class Live2DUiManager(private val activity: Activity) {
|
||||
textView?.text = text
|
||||
}
|
||||
|
||||
fun setButtonsEnabled(startEnabled: Boolean, stopEnabled: Boolean) {
|
||||
fun setButtonsEnabled(startEnabled: Boolean = false, stopEnabled: Boolean = false, recordEnabled: Boolean = true) {
|
||||
startButton?.isEnabled = startEnabled
|
||||
stopButton?.isEnabled = stopEnabled
|
||||
recordButton?.isEnabled = recordEnabled
|
||||
}
|
||||
|
||||
fun setSpeaking(speaking: Boolean) {
|
||||
@@ -74,6 +113,18 @@ class Live2DUiManager(private val activity: Activity) {
|
||||
}
|
||||
}
|
||||
|
||||
fun setMood(mood: String) {
|
||||
activity.runOnUiThread {
|
||||
avatarManager?.setMood(mood)
|
||||
}
|
||||
}
|
||||
|
||||
fun startSpecificMotion(motionName: String) {
|
||||
activity.runOnUiThread {
|
||||
avatarManager?.startSpecificMotion(motionName)
|
||||
}
|
||||
}
|
||||
|
||||
fun showToast(message: String, duration: Int = Toast.LENGTH_SHORT) {
|
||||
activity.runOnUiThread {
|
||||
Toast.makeText(activity, message, duration).show()
|
||||
|
||||
@@ -3,6 +3,7 @@ package com.digitalperson.ui
|
||||
import android.app.Activity
|
||||
import android.text.method.ScrollingMovementMethod
|
||||
import android.util.Log
|
||||
import android.view.MotionEvent
|
||||
import android.widget.Button
|
||||
import android.widget.ScrollView
|
||||
import android.widget.TextView
|
||||
@@ -17,6 +18,7 @@ class UiManager(private val activity: Activity) {
|
||||
private var scrollView: ScrollView? = null
|
||||
private var startButton: Button? = null
|
||||
private var stopButton: Button? = null
|
||||
private var recordButton: Button? = null
|
||||
private var videoPlayerManager: VideoPlayerManager? = null
|
||||
|
||||
private var lastUiText: String = ""
|
||||
@@ -24,15 +26,17 @@ class UiManager(private val activity: Activity) {
|
||||
fun initViews(
|
||||
textViewId: Int,
|
||||
scrollViewId: Int,
|
||||
startButtonId: Int,
|
||||
stopButtonId: Int,
|
||||
startButtonId: Int = -1,
|
||||
stopButtonId: Int = -1,
|
||||
recordButtonId: Int = -1,
|
||||
silentPlayerViewId: Int,
|
||||
speakingPlayerViewId: Int
|
||||
) {
|
||||
textView = activity.findViewById(textViewId)
|
||||
scrollView = activity.findViewById(scrollViewId)
|
||||
startButton = activity.findViewById(startButtonId)
|
||||
stopButton = activity.findViewById(stopButtonId)
|
||||
if (startButtonId != -1) startButton = activity.findViewById(startButtonId)
|
||||
if (stopButtonId != -1) stopButton = activity.findViewById(stopButtonId)
|
||||
if (recordButtonId != -1) recordButton = activity.findViewById(recordButtonId)
|
||||
|
||||
textView?.movementMethod = ScrollingMovementMethod()
|
||||
|
||||
@@ -54,6 +58,24 @@ class UiManager(private val activity: Activity) {
|
||||
stopButton?.setOnClickListener { listener() }
|
||||
}
|
||||
|
||||
fun setRecordButtonTouchListener(listener: (Boolean) -> Unit) {
|
||||
recordButton?.setOnTouchListener {
|
||||
_, event ->
|
||||
when (event.action) {
|
||||
MotionEvent.ACTION_DOWN -> {
|
||||
listener(true)
|
||||
true
|
||||
}
|
||||
MotionEvent.ACTION_UP,
|
||||
MotionEvent.ACTION_CANCEL -> {
|
||||
listener(false)
|
||||
true
|
||||
}
|
||||
else -> false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun appendToUi(s: String) {
|
||||
if (!AppConfig.SHOW_DEBUG_TEXT) return
|
||||
|
||||
@@ -74,9 +96,10 @@ class UiManager(private val activity: Activity) {
|
||||
textView?.text = text
|
||||
}
|
||||
|
||||
fun setButtonsEnabled(startEnabled: Boolean, stopEnabled: Boolean) {
|
||||
fun setButtonsEnabled(startEnabled: Boolean = false, stopEnabled: Boolean = false, recordEnabled: Boolean = true) {
|
||||
startButton?.isEnabled = startEnabled
|
||||
stopButton?.isEnabled = stopEnabled
|
||||
recordButton?.isEnabled = recordEnabled
|
||||
}
|
||||
|
||||
fun setSpeaking(speaking: Boolean) {
|
||||
|
||||
27
app/src/main/res/animator/button_elevation.xml
Normal file
27
app/src/main/res/animator/button_elevation.xml
Normal file
@@ -0,0 +1,27 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<selector xmlns:android="http://schemas.android.com/apk/res/android">
|
||||
<item android:state_pressed="true">
|
||||
<objectAnimator
|
||||
android:propertyName="translationZ"
|
||||
android:duration="@android:integer/config_shortAnimTime"
|
||||
android:valueTo="6dp"
|
||||
android:valueType="floatType" />
|
||||
<objectAnimator
|
||||
android:propertyName="elevation"
|
||||
android:duration="@android:integer/config_shortAnimTime"
|
||||
android:valueTo="8dp"
|
||||
android:valueType="floatType" />
|
||||
</item>
|
||||
<item>
|
||||
<objectAnimator
|
||||
android:propertyName="translationZ"
|
||||
android:duration="@android:integer/config_shortAnimTime"
|
||||
android:valueTo="0dp"
|
||||
android:valueType="floatType" />
|
||||
<objectAnimator
|
||||
android:propertyName="elevation"
|
||||
android:duration="@android:integer/config_shortAnimTime"
|
||||
android:valueTo="2dp"
|
||||
android:valueType="floatType" />
|
||||
</item>
|
||||
</selector>
|
||||
14
app/src/main/res/drawable/record_button_background.xml
Normal file
14
app/src/main/res/drawable/record_button_background.xml
Normal file
@@ -0,0 +1,14 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<ripple xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
android:color="@android:color/white">
|
||||
<item android:id="@android:id/mask">
|
||||
<shape android:shape="oval">
|
||||
<solid android:color="@android:color/white" />
|
||||
</shape>
|
||||
</item>
|
||||
<item>
|
||||
<shape android:shape="oval">
|
||||
<solid android:color="#4CAF50" />
|
||||
</shape>
|
||||
</item>
|
||||
</ripple>
|
||||
@@ -69,20 +69,47 @@
|
||||
app:layout_constraintEnd_toEndOf="parent"
|
||||
app:layout_constraintStart_toStartOf="parent">
|
||||
|
||||
<Button
|
||||
android:id="@+id/start_button"
|
||||
android:layout_width="0dp"
|
||||
<!-- 传统按钮 -->
|
||||
<LinearLayout
|
||||
android:id="@+id/traditional_buttons"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_marginEnd="12dp"
|
||||
android:layout_weight="1"
|
||||
android:text="@string/start" />
|
||||
android:orientation="horizontal"
|
||||
android:gravity="center">
|
||||
|
||||
<Button
|
||||
android:id="@+id/stop_button"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_weight="1"
|
||||
android:text="@string/stop" />
|
||||
<Button
|
||||
android:id="@+id/start_button"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_marginEnd="12dp"
|
||||
android:layout_weight="1"
|
||||
android:text="@string/start" />
|
||||
|
||||
<Button
|
||||
android:id="@+id/stop_button"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_weight="1"
|
||||
android:text="@string/stop" />
|
||||
</LinearLayout>
|
||||
</LinearLayout>
|
||||
|
||||
<!-- 按住录音按钮 - 右下角 -->
|
||||
<Button
|
||||
android:id="@+id/record_button"
|
||||
android:layout_width="100dp"
|
||||
android:layout_height="100dp"
|
||||
android:layout_margin="24dp"
|
||||
android:layout_marginBottom="24dp"
|
||||
android:text="按住说话"
|
||||
android:textColor="@android:color/white"
|
||||
android:textSize="14sp"
|
||||
android:textAllCaps="false"
|
||||
android:background="@drawable/record_button_background"
|
||||
app:backgroundTint="#4CAF50"
|
||||
android:stateListAnimator="@animator/button_elevation"
|
||||
android:visibility="gone"
|
||||
app:layout_constraintBottom_toBottomOf="parent"
|
||||
app:layout_constraintEnd_toEndOf="parent" />
|
||||
|
||||
</androidx.constraintlayout.widget.ConstraintLayout>
|
||||
|
||||
@@ -91,19 +91,14 @@
|
||||
app:layout_constraintStart_toStartOf="parent">
|
||||
|
||||
<Button
|
||||
android:id="@+id/start_button"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_marginEnd="12dp"
|
||||
android:layout_weight="1"
|
||||
android:text="@string/start" />
|
||||
android:id="@+id/record_button"
|
||||
android:layout_width="200dp"
|
||||
android:layout_height="200dp"
|
||||
android:layout_gravity="center"
|
||||
android:text="按住录音"
|
||||
android:textSize="18sp"
|
||||
android:background="@android:drawable/ic_btn_speak_now" />
|
||||
|
||||
<Button
|
||||
android:id="@+id/stop_button"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_weight="1"
|
||||
android:text="@string/stop" />
|
||||
</LinearLayout>
|
||||
|
||||
<!-- 半透明遮罩层 -->
|
||||
|
||||
@@ -3,5 +3,5 @@
|
||||
<string name="start">开始</string>
|
||||
<string name="stop">结束</string>
|
||||
<string name="hint">点击“开始”说话;识别后会请求大模型并用 TTS 播放回复。</string>
|
||||
<string name="system_prompt">你是一名小学女老师,喜欢回答学生的各种问题,请简洁但温柔地回答,每个回答不超过30字。</string>
|
||||
<string name="system_prompt">你是一名小学女老师,喜欢回答学生的各种问题,请简洁但温柔地回答,每个回答不超过30字。在每次回复的最前面,用方括号标注你的心情,格式为[开心/伤心/愤怒/平和/惊讶/关心/害羞],例如:[开心]同学你好呀!请问有什么问题吗?</string>
|
||||
</resources>
|
||||
|
||||
@@ -24,5 +24,5 @@ android.nonTransitiveRClass=true
|
||||
|
||||
LLM_API_URL=https://ark.cn-beijing.volces.com/api/v3/chat/completions
|
||||
LLM_API_KEY=14ee3e0e-ec07-4678-8b92-64f3b1416592
|
||||
LLM_MODEL=doubao-1-5-pro-32k-character-250228
|
||||
LLM_MODEL=doubao-1-5-pro-32k-character-250715
|
||||
USE_LIVE2D=true
|
||||
Reference in New Issue
Block a user