push2talk

This commit is contained in:
gcw_4spBpAfv
2026-03-02 12:00:33 +08:00
parent 2f6166ab6c
commit 1701ecfb7f
19 changed files with 802 additions and 160 deletions

View File

@@ -55,3 +55,50 @@ TTS Sherpa-ONNX VITS .onnx ❌ 否 CPU ONNX Runtime
│ SenseVoice │
│ (RKNN推理) │
└─────────────────┘
5. live2d, Haru的动作
由于没有官方文档基于Live2D模型的常见设计模式我建议以下映射
### 🎭 开心类情绪
- haru_g_m22 眯眼微笑
- haru_g_m21 跳动微笑
- haru_g_m18 手收背后微笑
- haru_g_m09 微微鞠躬
- haru_g_m08 深深鞠躬
### 😢 伤心类情绪
- haru_g_m25 - 扁嘴
- haru_g_m24 - 低头斜看地板,收手到背后
- haru_g_m05 扁嘴,张开双手
### 😠 愤怒类情绪
- haru_g_m11 双手交叉,摇头,扁嘴
- haru_g_m04 双手交叉,点头
- haru_g_m03 双手交叉,点头
### 😌 平和类情绪
- haru_g_m15 双手交叉在胸前
- haru_g_m07 举起左手
- haru_g_m06 举起右手
- haru_g_m02 双手放到背后
- haru_g_m01 点头
### 😲 惊讶类情绪
- haru_g_m26 - 后退一步(适合:惊讶、吃惊)
- haru_g_m12 摆手,摇头
### 😕 困惑类情绪
- haru_g_m20 手指点腮,思考,皱眉
- haru_g_m16 双手捧腮,思考
- haru_g_m14 身体前倾,皱眉
- haru_g_m13 身体前倾,双手分开
### 害羞
- haru_g_m19 脸红微笑
### ❤️ 关心类情绪
- haru_g_m17 靠近侧脸

View File

@@ -38,6 +38,9 @@ class Live2DChatActivity : AppCompatActivity() {
@Volatile
private var isRecording: Boolean = false
private val holdToSpeakAudioBuffer = mutableListOf<Float>()
private val HOLD_TO_SPEAK_MIN_SAMPLES = 16000 // 1秒的音频数据
private val ioScope = CoroutineScope(SupervisorJob() + Dispatchers.IO)
private var recordingJob: Job? = null
private val nativeLock = Any()
@@ -77,13 +80,30 @@ class Live2DChatActivity : AppCompatActivity() {
scrollViewId = R.id.scroll_view,
startButtonId = R.id.start_button,
stopButtonId = R.id.stop_button,
recordButtonId = R.id.record_button,
traditionalButtonsId = R.id.traditional_buttons,
silentPlayerViewId = 0,
speakingPlayerViewId = 0,
live2dViewId = R.id.live2d_view
)
// 根据配置选择交互方式
uiManager.setUseHoldToSpeak(AppConfig.USE_HOLD_TO_SPEAK)
if (AppConfig.USE_HOLD_TO_SPEAK) {
uiManager.setRecordButtonTouchListener { isDown ->
if (isDown) {
// 按住按钮,开始录音
onRecordButtonDown()
} else {
// 松开按钮,停止录音
onRecordButtonUp()
}
}
} else {
uiManager.setStartButtonListener { onStartClicked() }
uiManager.setStopButtonListener { onStopClicked(userInitiated = true) }
}
ActivityCompat.requestPermissions(this, permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)
@@ -99,10 +119,16 @@ class Live2DChatActivity : AppCompatActivity() {
Log.w(AppConfig.TAG, "Streaming switch not found in layout: ${e.message}")
}
if (AppConfig.USE_HOLD_TO_SPEAK) {
uiManager.setButtonsEnabled(recordEnabled = false)
} else {
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
}
uiManager.setText("初始化中…")
audioProcessor = AudioProcessor(this)
ttsManager = TtsManager(this)
ttsManager.setCallback(createTtsCallback())
asrManager = AsrManager(this)
asrManager.setAudioProcessor(audioProcessor)
@@ -127,24 +153,29 @@ class Live2DChatActivity : AppCompatActivity() {
)
}
uiManager.setText(getString(R.string.hint))
if (AppConfig.USE_HOLD_TO_SPEAK) {
uiManager.setButtonsEnabled(recordEnabled = true)
} else {
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
}
}
} catch (t: Throwable) {
Log.e(AppConfig.TAG, "Initialization failed: ${t.message}", t)
withContext(Dispatchers.Main) {
uiManager.setText("初始化失败:${t.javaClass.simpleName}: ${t.message}")
uiManager.showToast("初始化失败(请看 Logcat: ${t.javaClass.simpleName}", Toast.LENGTH_LONG)
if (AppConfig.USE_HOLD_TO_SPEAK) {
uiManager.setButtonsEnabled(recordEnabled = false)
} else {
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
}
}
}
}
cloudApiManager = CloudApiManager(createCloudApiListener(), applicationContext)
cloudApiManager.setEnableStreaming(enableStreaming)
ttsManager = TtsManager(this)
ttsManager.setCallback(createTtsCallback())
Log.d(AppConfig.TAG, "Pre-starting ASR worker")
ioScope.launch {
asrManager.runAsrWorker()
@@ -205,10 +236,18 @@ class Live2DChatActivity : AppCompatActivity() {
}
ttsManager.enqueueEnd()
} else {
runOnUiThread {
uiManager.appendToUi("${response}\n")
val previousMood = com.digitalperson.mood.MoodManager.getCurrentMood()
val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(response)
android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Final mood: $mood, filtered text: $filteredText")
if (mood != previousMood) {
uiManager.setMood(mood)
}
ttsManager.enqueueSegment(response)
runOnUiThread {
uiManager.appendToUi("${filteredText}\n")
}
ttsManager.enqueueSegment(filteredText)
ttsManager.enqueueEnd()
}
}
@@ -219,9 +258,18 @@ class Live2DChatActivity : AppCompatActivity() {
llmFirstChunkMarked = true
currentTrace?.markLlmFirstChunk()
}
uiManager.appendToUi(chunk)
val segments = segmenter.processChunk(chunk)
val previousMood = com.digitalperson.mood.MoodManager.getCurrentMood()
val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(chunk)
if (mood != previousMood) {
android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Mood changed to: $mood")
// 设置Live2D人物的心情
uiManager.setMood(mood)
}
uiManager.appendToUi(filteredText)
val segments = segmenter.processChunk(filteredText)
for (seg in segments) {
ttsManager.enqueueSegment(seg)
}
@@ -353,6 +401,77 @@ class Live2DChatActivity : AppCompatActivity() {
Log.d(AppConfig.TAG, "onStartClicked completed")
}
private fun onRecordButtonDown() {
Log.d(AppConfig.TAG, "onRecordButtonDown called")
if (isRecording) {
Log.d(AppConfig.TAG, "Already recording, returning")
return
}
// 如果TTS正在播放打断它
val interrupted = ttsManager.interruptForNewTurn()
if (interrupted) {
uiManager.appendToUi("\n[LOG] 已打断TTS播放\n")
}
if (!audioProcessor.initMicrophone(permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) {
uiManager.showToast("麦克风初始化失败/无权限")
return
}
currentTrace = TraceManager.getInstance().startNewTurn()
currentTrace?.mark("turn_start")
llmInFlight = false
uiManager.clearText()
// interruptForNewTurn() already prepared TTS state for next turn.
// Keep reset() only for non-interrupt entry points.
ttsManager.setCurrentTrace(currentTrace)
segmenter.reset()
// 启动按住说话的动作
uiManager.startSpecificMotion("hold_to_speak")
holdToSpeakAudioBuffer.clear()
audioProcessor.startRecording()
isRecording = true
Log.d(AppConfig.TAG, "Starting processSamplesLoop coroutine")
recordingJob?.cancel()
recordingJob = ioScope.launch {
processSamplesLoop()
}
Log.d(AppConfig.TAG, "onRecordButtonDown completed")
}
private fun onRecordButtonUp() {
Log.d(AppConfig.TAG, "onRecordButtonUp called")
if (!isRecording) {
Log.d(AppConfig.TAG, "Not recording, returning")
return
}
isRecording = false
audioProcessor.stopRecording()
recordingJob?.cancel()
recordingJob = ioScope.launch {
// 处理最后的音频数据
val audioData = audioProcessor.getRecordedData()
holdToSpeakAudioBuffer.addAll(audioData.toList())
if (holdToSpeakAudioBuffer.size >= HOLD_TO_SPEAK_MIN_SAMPLES) {
val finalAudio = holdToSpeakAudioBuffer.toFloatArray()
asrManager.enqueueAudioSegment(finalAudio, finalAudio)
} else {
uiManager.showToast("录音时间太短请长按至少1秒")
}
holdToSpeakAudioBuffer.clear()
}
Log.d(AppConfig.TAG, "onRecordButtonUp completed")
}
private fun onStopClicked(userInitiated: Boolean) {
isRecording = false
audioProcessor.stopRecording()
@@ -362,7 +481,11 @@ class Live2DChatActivity : AppCompatActivity() {
ttsManager.stop()
if (AppConfig.USE_HOLD_TO_SPEAK) {
uiManager.setButtonsEnabled(recordEnabled = true)
} else {
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
}
if (userInitiated) {
TraceManager.getInstance().endTurn()
@@ -372,6 +495,19 @@ class Live2DChatActivity : AppCompatActivity() {
private suspend fun processSamplesLoop() {
Log.d(AppConfig.TAG, "processSamplesLoop started")
if (AppConfig.USE_HOLD_TO_SPEAK) {
// 按住说话模式累积音频数据到一定长度后再发送给ASR
while (isRecording && ioScope.coroutineContext.isActive) {
val audioData = audioProcessor.getAudioData()
if (audioData.isNotEmpty()) {
holdToSpeakAudioBuffer.addAll(audioData.toList())
}
// 避免CPU占用过高
kotlinx.coroutines.delay(10)
}
} else {
// 传统模式使用VAD
val windowSize = AppConfig.WINDOW_SIZE
val buffer = ShortArray(windowSize)
var loopCount = 0
@@ -415,4 +551,6 @@ class Live2DChatActivity : AppCompatActivity() {
vadManager.forceFinalize()
}
Log.d(AppConfig.TAG, "processSamplesLoop stopped")
}
}

View File

@@ -75,14 +75,20 @@ class MainActivity : AppCompatActivity() {
uiManager.initViews(
textViewId = R.id.my_text,
scrollViewId = R.id.scroll_view,
startButtonId = R.id.start_button,
stopButtonId = R.id.stop_button,
recordButtonId = R.id.record_button,
silentPlayerViewId = R.id.player_view_silent,
speakingPlayerViewId = R.id.player_view_speaking
)
uiManager.setStartButtonListener { onStartClicked() }
uiManager.setStopButtonListener { onStopClicked(userInitiated = true) }
uiManager.setRecordButtonTouchListener { isDown ->
if (isDown) {
// 按住按钮,开始录音
onRecordButtonDown()
} else {
// 松开按钮,停止录音
onRecordButtonUp()
}
}
ActivityCompat.requestPermissions(this, permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)
@@ -98,7 +104,7 @@ class MainActivity : AppCompatActivity() {
Log.w(AppConfig.TAG, "Streaming switch not found in layout: ${e.message}")
}
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
uiManager.setButtonsEnabled(recordEnabled = false)
uiManager.setText("初始化中…")
audioProcessor = AudioProcessor(this)
@@ -126,14 +132,14 @@ class MainActivity : AppCompatActivity() {
)
}
uiManager.setText(getString(R.string.hint))
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
uiManager.setButtonsEnabled(recordEnabled = true)
}
} catch (t: Throwable) {
Log.e(AppConfig.TAG, "Initialization failed: ${t.message}", t)
withContext(Dispatchers.Main) {
uiManager.setText("初始化失败:${t.javaClass.simpleName}: ${t.message}")
uiManager.showToast("初始化失败(请看 Logcat: ${t.javaClass.simpleName}", Toast.LENGTH_LONG)
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
uiManager.setButtonsEnabled(recordEnabled = false)
}
}
}
@@ -204,10 +210,13 @@ class MainActivity : AppCompatActivity() {
}
ttsManager.enqueueEnd()
} else {
val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(response)
android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Final mood: $mood, filtered text: $filteredText")
runOnUiThread {
uiManager.appendToUi("${response}\n")
uiManager.appendToUi("${filteredText}\n")
}
ttsManager.enqueueSegment(response)
ttsManager.enqueueSegment(filteredText)
ttsManager.enqueueEnd()
}
}
@@ -218,9 +227,15 @@ class MainActivity : AppCompatActivity() {
llmFirstChunkMarked = true
currentTrace?.markLlmFirstChunk()
}
uiManager.appendToUi(chunk)
val segments = segmenter.processChunk(chunk)
val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(chunk)
if (mood != com.digitalperson.mood.MoodManager.getCurrentMood()) {
android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Mood changed to: $mood")
}
uiManager.appendToUi(filteredText)
val segments = segmenter.processChunk(filteredText)
for (seg in segments) {
ttsManager.enqueueSegment(seg)
}
@@ -306,13 +321,19 @@ class MainActivity : AppCompatActivity() {
try { audioProcessor.release() } catch (_: Throwable) {}
}
private fun onStartClicked() {
Log.d(AppConfig.TAG, "onStartClicked called")
private fun onRecordButtonDown() {
Log.d(AppConfig.TAG, "onRecordButtonDown called")
if (isRecording) {
Log.d(AppConfig.TAG, "Already recording, returning")
return
}
// 如果TTS正在播放打断它
if (ttsManager.isPlaying()) {
ttsManager.stop()
uiManager.appendToUi("\n[LOG] 已打断TTS播放\n")
}
if (!audioProcessor.initMicrophone(permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) {
uiManager.showToast("麦克风初始化失败/无权限")
return
@@ -328,18 +349,37 @@ class MainActivity : AppCompatActivity() {
ttsManager.setCurrentTrace(currentTrace)
segmenter.reset()
vadManager.reset()
audioProcessor.startRecording()
isRecording = true
uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = true)
Log.d(AppConfig.TAG, "Starting processSamplesLoop coroutine")
recordingJob?.cancel()
recordingJob = ioScope.launch {
processSamplesLoop()
}
Log.d(AppConfig.TAG, "onStartClicked completed")
Log.d(AppConfig.TAG, "onRecordButtonDown completed")
}
private fun onRecordButtonUp() {
Log.d(AppConfig.TAG, "onRecordButtonUp called")
if (!isRecording) {
Log.d(AppConfig.TAG, "Not recording, returning")
return
}
isRecording = false
audioProcessor.stopRecording()
recordingJob?.cancel()
recordingJob = ioScope.launch {
// 处理最后的音频数据
val audioData = audioProcessor.getRecordedData()
if (audioData.isNotEmpty()) {
// 直接发送到ASR不经过VAD
asrManager.enqueueAudioSegment(audioData, audioData)
}
}
Log.d(AppConfig.TAG, "onRecordButtonUp completed")
}
private fun onStopClicked(userInitiated: Boolean) {
@@ -351,7 +391,7 @@ class MainActivity : AppCompatActivity() {
ttsManager.stop()
uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
uiManager.setButtonsEnabled(recordEnabled = true)
if (userInitiated) {
TraceManager.getInstance().endTurn()
@@ -361,47 +401,16 @@ class MainActivity : AppCompatActivity() {
private suspend fun processSamplesLoop() {
Log.d(AppConfig.TAG, "processSamplesLoop started")
val windowSize = AppConfig.WINDOW_SIZE
val buffer = ShortArray(windowSize)
var loopCount = 0
while (isRecording && ioScope.coroutineContext.isActive) {
loopCount++
if (loopCount % 100 == 0) {
Log.d(AppConfig.TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsManager.isPlaying()}")
val audioData = audioProcessor.getAudioData()
if (audioData.isNotEmpty()) {
// 直接发送到ASR不经过VAD
asrManager.enqueueAudioSegment(audioData, audioData)
}
if (ttsManager.isPlaying()) {
if (vadManager.isInSpeech()) {
Log.d(AppConfig.TAG, "TTS playing, resetting VAD state")
vadManager.clearState()
// 避免CPU占用过高
kotlinx.coroutines.delay(10)
}
val ret = audioProcessor.readAudio(buffer)
if (ret <= 0) continue
continue
}
val ret = audioProcessor.readAudio(buffer)
if (ret <= 0) continue
if (ret != windowSize) continue
val chunk = audioProcessor.convertShortToFloat(buffer)
val processedChunk = audioProcessor.applyGain(chunk)
val result = vadManager.processAudioChunk(chunk, processedChunk)
if (vadManager.vadComputeCount % 100 == 0) {
Log.d(AppConfig.TAG, "VAD result: $result, inSpeech=${vadManager.isInSpeech()}")
}
if (loopCount % 1000 == 0) {
Log.d(AppConfig.TAG, "VAD status: inSpeech=${vadManager.isInSpeech()}, speechLen=${vadManager.getSpeechLength()}")
}
val forced = segmenter.maybeForceByTime()
for (seg in forced) ttsManager.enqueueSegment(seg)
}
vadManager.forceFinalize()
Log.d(AppConfig.TAG, "processSamplesLoop stopped")
}
}

View File

@@ -1,5 +1,7 @@
package com.digitalperson
import com.digitalperson.mood.MoodManager
/**
* 将大模型流式 chunk 做“伪流式 TTS”的分段器
* - 优先按中文/英文标点断句,尽早产出第一段,缩短首包时间
@@ -11,17 +13,28 @@ class StreamingTextSegmenter(
) {
private val buf = StringBuilder()
private var lastEmitAtMs: Long = 0
private var moodExtracted = false
@Synchronized
fun reset(nowMs: Long = System.currentTimeMillis()) {
buf.setLength(0)
lastEmitAtMs = nowMs
moodExtracted = false
}
@Synchronized
fun processChunk(chunk: String, nowMs: Long = System.currentTimeMillis()): List<String> {
if (chunk.isEmpty()) return emptyList()
buf.append(chunk)
var processedChunk = chunk
if (!moodExtracted) {
val (filteredText, _) = MoodManager.extractAndFilterMood(chunk)
processedChunk = filteredText
moodExtracted = true
}
buf.append(processedChunk)
return drain(nowMs, forceByTime = false)
}
@@ -32,6 +45,7 @@ class StreamingTextSegmenter(
buf.setLength(0)
if (remaining.isNotEmpty()) out.add(remaining)
lastEmitAtMs = nowMs
moodExtracted = false
return out
}

View File

@@ -27,6 +27,7 @@ class AudioProcessor(
private var smoothedRms = 0f
private val alpha = 0.8f
private val recordedData = mutableListOf<Float>()
fun initMicrophone(permissions: Array<String>, requestCode: Int): Boolean {
if (ActivityCompat.checkSelfPermission(context, Manifest.permission.RECORD_AUDIO)
@@ -84,6 +85,29 @@ class AudioProcessor(
Log.d(TAG, "Audio recording stopped")
}
fun getAudioData(): FloatArray {
val bufferSize = 1024
val shortBuffer = ShortArray(bufferSize)
val readSize = audioRecord?.read(shortBuffer, 0, bufferSize) ?: 0
if (readSize > 0) {
val floatBuffer = convertShortToFloat(shortBuffer.copyOf(readSize))
// 不再自动添加到recordedData由调用方决定
return floatBuffer
}
return FloatArray(0)
}
fun getRecordedData(): FloatArray {
val data = recordedData.toFloatArray()
recordedData.clear()
return data
}
fun clearRecordedData() {
recordedData.clear()
}
fun release() {
try {
audioRecord?.stop()
@@ -105,6 +129,7 @@ class AudioProcessor(
}
aec = null
ns = null
recordedData.clear()
Log.d(TAG, "AudioProcessor released")
}

View File

@@ -10,6 +10,7 @@ object AppConfig {
const val WINDOW_SIZE = 512
const val SHOW_DEBUG_TEXT = true
const val USE_HOLD_TO_SPEAK = true // true: 按住说话, false: 传统按钮
object Tts {
const val MODEL_DIR = "tts_model/sherpa-onnx-vits-zh-ll"

View File

@@ -15,6 +15,14 @@ class Live2DAvatarManager(private val glSurfaceView: GLSurfaceView) {
renderer.setSpeaking(speaking)
}
fun setMood(mood: String) {
renderer.setMood(mood)
}
fun startSpecificMotion(motionName: String) {
renderer.startSpecificMotion(motionName)
}
fun onResume() {
glSurfaceView.onResume()
}

View File

@@ -4,6 +4,7 @@ import android.content.res.AssetManager
import android.graphics.BitmapFactory
import android.opengl.GLES20
import android.opengl.GLUtils
import android.util.Log
import com.live2d.sdk.cubism.framework.CubismFramework
import com.live2d.sdk.cubism.framework.CubismModelSettingJson
import com.live2d.sdk.cubism.framework.id.CubismId
@@ -12,14 +13,22 @@ import com.live2d.sdk.cubism.framework.model.CubismUserModel
import com.live2d.sdk.cubism.framework.motion.CubismMotion
import com.live2d.sdk.cubism.framework.rendering.android.CubismRendererAndroid
import kotlin.math.sin
import java.util.Random
class Live2DCharacter : CubismUserModel() {
private lateinit var setting: CubismModelSettingJson
private val lipSyncParams = mutableListOf<CubismId>()
private val idleMotions = mutableListOf<CubismMotion>()
private val moodMotions = mutableMapOf<String, List<CubismMotion>>()
private val specificMotions = mutableMapOf<String, CubismMotion>()
private var idleMotionIndex = 0
private var lastElapsedSec = 0f
private val textureIds = mutableListOf<Int>()
private val random = Random()
private var currentMood: String = "平和"
// 添加文件名映射
private val motionFileMap = mutableMapOf<CubismMotion, String>()
fun loadFromAssets(assets: AssetManager, modelDir: String, modelJsonName: String) {
val settingBytes = readAssetBytes(assets, "$modelDir/$modelJsonName")
@@ -41,6 +50,8 @@ class Live2DCharacter : CubismUserModel() {
initLipSyncParams()
loadIdleMotions(assets, modelDir)
loadMoodMotions(assets, modelDir)
loadSpecificMotions(assets, modelDir)
startNextIdleMotion()
}
@@ -147,17 +158,34 @@ class Live2DCharacter : CubismUserModel() {
private fun loadIdleMotions(assets: AssetManager, modelDir: String) {
idleMotions.clear()
val groupName = findIdleGroupName()
if (groupName.isEmpty()) return
// val groupName = findIdleGroupName()
// if (groupName.isNotEmpty()) {
// for (i in 0 until setting.getMotionCount(groupName)) {
// val fileName = setting.getMotionFileName(groupName, i)
// if (fileName.isBlank()) continue
// runCatching {
// // Motion path in model3.json can be either "motion/xxx.motion3.json" or "xxx.motion3.json".
// val path = if (fileName.startsWith("motion/")) {
// "$modelDir/$fileName"
// } else {
// "$modelDir/motion/$fileName"
// }
// val motion = loadMotion(readAssetBytes(assets, path))
// motion?.setLoop(true)
// motion?.setLoopFadeIn(true)
// if (motion != null) idleMotions.add(motion)
// }
// }
// }
for (i in 0 until setting.getMotionCount(groupName)) {
val fileName = setting.getMotionFileName(groupName, i)
if (fileName.isBlank()) continue
runCatching {
val motion = loadMotion(readAssetBytes(assets, "$modelDir/$fileName"))
motion?.setLoop(true)
motion?.setLoopFadeIn(true)
if (motion != null) idleMotions.add(motion)
// Fallback for models without Idle group config.
if (idleMotions.isEmpty()) {
loadMotionByName(assets, modelDir, "haru_g_idle.motion3.json")?.let { motion ->
motion.setLoop(true)
motion.setLoopFadeIn(true)
idleMotions.add(motion)
// 也添加到映射表
motionFileMap[motion] = "haru_g_idle.motion3.json"
}
}
}
@@ -166,7 +194,12 @@ class Live2DCharacter : CubismUserModel() {
if (idleMotions.isEmpty()) return
val index = idleMotionIndex % idleMotions.size
idleMotionIndex++
motionManager.startMotionPriority(idleMotions[index], 1)
val motion = idleMotions[index]
val motionName = motionFileMap[motion]
Log.d("Live2DCharacter", "开始播放空闲动作: $motionName")
motionManager.startMotionPriority(motion, 1)
}
private fun findIdleGroupName(): String {
@@ -179,4 +212,122 @@ class Live2DCharacter : CubismUserModel() {
}
return ""
}
private fun loadMoodMotions(assets: AssetManager, modelDir: String) {
// 开心心情动作
moodMotions["开心"] = listOf(
"haru_g_m22.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m22.motion3.json"),
"haru_g_m21.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m21.motion3.json"),
"haru_g_m18.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m18.motion3.json")
).mapNotNull { (fileName, motion) ->
motion?.let {
motionFileMap[it] = fileName
it
}
}
// 伤心心情动作
moodMotions["伤心"] = listOf(
"haru_g_m25.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m25.motion3.json"),
"haru_g_m24.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m24.motion3.json"),
"haru_g_m05.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m05.motion3.json")
).mapNotNull { (fileName, motion) ->
motion?.let {
motionFileMap[it] = fileName
it
}
}
// 平和心情动作
moodMotions["平和"] = listOf(
"haru_g_m15.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m15.motion3.json"),
"haru_g_m07.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m07.motion3.json"),
"haru_g_m06.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m06.motion3.json"),
"haru_g_m02.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m02.motion3.json"),
"haru_g_m01.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m01.motion3.json")
).mapNotNull { (fileName, motion) ->
motion?.let {
motionFileMap[it] = fileName
it
}
}
// 惊讶心情动作
moodMotions["惊讶"] = listOf(
"haru_g_m26.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m26.motion3.json"),
"haru_g_m12.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m12.motion3.json")
).mapNotNull { (fileName, motion) ->
motion?.let {
motionFileMap[it] = fileName
it
}
}
// 关心心情动作
moodMotions["关心"] = listOf(
"haru_g_m17.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m17.motion3.json")
).mapNotNull { (fileName, motion) ->
motion?.let {
motionFileMap[it] = fileName
it
}
}
// 害羞心情动作
moodMotions["害羞"] = listOf(
"haru_g_m19.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m19.motion3.json")
).mapNotNull { (fileName, motion) ->
motion?.let {
motionFileMap[it] = fileName
it
}
}
}
private fun loadSpecificMotions(assets: AssetManager, modelDir: String) {
// 按住说话时的动作
loadMotionByName(assets, modelDir, "haru_g_m17.motion3.json")?.let { motion ->
motionFileMap[motion] = "haru_g_m17.motion3.json"
specificMotions["hold_to_speak"] = motion
}
}
private fun loadMotionByName(assets: AssetManager, modelDir: String, fileName: String): CubismMotion? {
return runCatching {
val motion = loadMotion(readAssetBytes(assets, "$modelDir/motion/$fileName"))
motion?.setLoop(false)
motion
}.getOrNull()
}
fun startMoodMotion(mood: String) {
currentMood = mood
val motions = moodMotions[mood]
if (motions.isNullOrEmpty()) {
Log.d("Live2DCharacter", "心情 '$mood' 没有对应的动作")
return
}
val randomMotion = motions[random.nextInt(motions.size)]
// 从映射表中获取文件名
val motionName = motionFileMap[randomMotion]
Log.d("Live2DCharacter", "开始播放心情动作: $motionName, 心情: $mood")
motionManager.startMotionPriority(randomMotion, 10)
}
fun startSpecificMotion(motionName: String) {
val motion = specificMotions[motionName]
if (motion != null) {
val fileName = motionFileMap[motion]
Log.d("Live2DCharacter", "开始播放特定动作: $fileName")
motionManager.startMotionPriority(motion, 10)
} else {
Log.d("Live2DCharacter", "特定动作 '$motionName' 不存在")
}
}
fun getCurrentMood(): String {
return currentMood
}
}

View File

@@ -62,6 +62,14 @@ class Live2DRenderer(
this.speaking = speaking
}
fun setMood(mood: String) {
character?.startMoodMotion(mood)
}
fun startSpecificMotion(motionName: String) {
character?.startSpecificMotion(motionName)
}
fun release() {
character?.release()
character = null

View File

@@ -0,0 +1,34 @@
package com.digitalperson.mood
import android.util.Log
import com.digitalperson.config.AppConfig
object MoodManager {
private const val TAG = "MoodManager"
private var currentMood: String = "平和"
private val moodPattern = Regex("""^\[([^\]]+)\]""")
fun extractAndFilterMood(text: String): Pair<String, String> {
val match = moodPattern.find(text)
if (match != null) {
val mood = match.groupValues[1]
val filteredText = text.removeRange(match.range)
currentMood = mood
Log.d(TAG, "Extracted mood: $mood, filtered text: $filteredText")
return Pair(filteredText, mood)
}
return Pair(text, currentMood)
}
fun getCurrentMood(): String = currentMood
fun reset() {
currentMood = "平和"
}
}

View File

@@ -36,6 +36,8 @@ class TtsManager(private val context: Context) {
private val ttsStopped = AtomicBoolean(false)
private val ttsWorkerRunning = AtomicBoolean(false)
private val ttsPlaying = AtomicBoolean(false)
private val interrupting = AtomicBoolean(false)
private val needTrackReset = AtomicBoolean(true)
@Volatile private var ttsTotalSamplesWritten: Long = 0
private var currentTrace: TraceSession? = null
@@ -124,6 +126,10 @@ class TtsManager(private val context: Context) {
}
fun enqueueSegment(seg: String) {
if (ttsStopped.get()) {
// Recover from interrupt state for next turn.
ttsStopped.set(false)
}
val cleanedSeg = seg.trimEnd('.', '。', '!', '', '?', '', ',', '', ';', '', ':', '')
callback?.onTraceMarkTtsRequestEnqueued()
@@ -138,15 +144,26 @@ class TtsManager(private val context: Context) {
fun isPlaying(): Boolean = ttsPlaying.get()
fun reset() {
val workerRunning = ttsWorkerRunning.get()
val wasStopped = ttsStopped.get()
ttsStopped.set(false)
ttsPlaying.set(false)
needTrackReset.set(true)
ttsTotalSamplesWritten = 0
ttsQueue.clear()
// If reset is called right after stop(), old worker may still be alive.
// Re-send an End token so the old worker won't block forever on take().
if (wasStopped && workerRunning) {
ttsQueue.offer(TtsQueueItem.End)
}
}
fun stop() {
ttsStopped.set(true)
ttsPlaying.set(false)
needTrackReset.set(true)
ttsTotalSamplesWritten = 0
ttsQueue.clear()
ttsQueue.offer(TtsQueueItem.End)
@@ -158,6 +175,52 @@ class TtsManager(private val context: Context) {
}
}
@Synchronized
fun interruptForNewTurn(waitTimeoutMs: Long = 300): Boolean {
if (!interrupting.compareAndSet(false, true)) return false
try {
val hadPendingPlayback = ttsPlaying.get() || ttsWorkerRunning.get() || ttsQueue.isNotEmpty()
if (!hadPendingPlayback) {
ttsStopped.set(false)
ttsPlaying.set(false)
ttsTotalSamplesWritten = 0
return false
}
ttsStopped.set(true)
ttsPlaying.set(false)
needTrackReset.set(true)
ttsTotalSamplesWritten = 0
ttsQueue.clear()
ttsQueue.offer(TtsQueueItem.End)
try {
track?.pause()
track?.flush()
} catch (_: Throwable) {
}
val deadline = System.currentTimeMillis() + waitTimeoutMs
while (ttsWorkerRunning.get() && System.currentTimeMillis() < deadline) {
Thread.sleep(10)
}
if (ttsWorkerRunning.get()) {
Log.w(TAG, "interruptForNewTurn timeout: worker still running")
}
ttsQueue.clear()
ttsStopped.set(false)
ttsPlaying.set(false)
needTrackReset.set(true)
ttsTotalSamplesWritten = 0
callback?.onSetSpeaking(false)
return true
} finally {
interrupting.set(false)
}
}
fun release() {
try {
tts?.release()
@@ -182,6 +245,10 @@ class TtsManager(private val context: Context) {
runTtsWorker()
} finally {
ttsWorkerRunning.set(false)
// Handle race: items may be enqueued while old worker is still exiting.
if (!ttsStopped.get() && ttsQueue.isNotEmpty()) {
ensureTtsWorker()
}
}
}
}
@@ -191,7 +258,6 @@ class TtsManager(private val context: Context) {
val audioTrack = track ?: return
var firstAudioMarked = false
var isFirstSegment = true
while (true) {
val item = ttsQueue.take()
if (ttsStopped.get()) break
@@ -209,14 +275,18 @@ class TtsManager(private val context: Context) {
val startMs = System.currentTimeMillis()
var firstPcmMarked = false
if (isFirstSegment) {
if (needTrackReset.compareAndSet(true, false)) {
try {
audioTrack.pause()
audioTrack.flush()
audioTrack.play()
} catch (_: Throwable) {
}
isFirstSegment = false
} else if (audioTrack.playState != AudioTrack.PLAYSTATE_PLAYING) {
try {
audioTrack.play()
} catch (_: Throwable) {
}
}
t.generateWithCallback(

View File

@@ -3,7 +3,9 @@ package com.digitalperson.ui
import android.app.Activity
import android.opengl.GLSurfaceView
import android.text.method.ScrollingMovementMethod
import android.view.MotionEvent
import android.widget.Button
import android.widget.LinearLayout
import android.widget.ScrollView
import android.widget.TextView
import android.widget.Toast
@@ -14,6 +16,8 @@ class Live2DUiManager(private val activity: Activity) {
private var scrollView: ScrollView? = null
private var startButton: Button? = null
private var stopButton: Button? = null
private var recordButton: Button? = null
private var traditionalButtons: LinearLayout? = null
private var avatarManager: Live2DAvatarManager? = null
private var lastUiText: String = ""
@@ -21,16 +25,20 @@ class Live2DUiManager(private val activity: Activity) {
fun initViews(
textViewId: Int,
scrollViewId: Int,
startButtonId: Int,
stopButtonId: Int,
startButtonId: Int = -1,
stopButtonId: Int = -1,
recordButtonId: Int = -1,
traditionalButtonsId: Int = -1,
silentPlayerViewId: Int,
speakingPlayerViewId: Int,
live2dViewId: Int
) {
textView = activity.findViewById(textViewId)
scrollView = activity.findViewById(scrollViewId)
startButton = activity.findViewById(startButtonId)
stopButton = activity.findViewById(stopButtonId)
if (startButtonId != -1) startButton = activity.findViewById(startButtonId)
if (stopButtonId != -1) stopButton = activity.findViewById(stopButtonId)
if (recordButtonId != -1) recordButton = activity.findViewById(recordButtonId)
if (traditionalButtonsId != -1) traditionalButtons = activity.findViewById(traditionalButtonsId)
textView?.movementMethod = ScrollingMovementMethod()
@@ -47,6 +55,36 @@ class Live2DUiManager(private val activity: Activity) {
stopButton?.setOnClickListener { listener() }
}
fun setRecordButtonTouchListener(listener: (Boolean) -> Unit) {
recordButton?.setOnTouchListener {
_, event ->
when (event.action) {
MotionEvent.ACTION_DOWN -> {
recordButton?.isPressed = true
listener(true)
true
}
MotionEvent.ACTION_UP,
MotionEvent.ACTION_CANCEL -> {
recordButton?.isPressed = false
listener(false)
true
}
else -> false
}
}
}
fun setUseHoldToSpeak(useHoldToSpeak: Boolean) {
if (useHoldToSpeak) {
traditionalButtons?.visibility = LinearLayout.GONE
recordButton?.visibility = Button.VISIBLE
} else {
traditionalButtons?.visibility = LinearLayout.VISIBLE
recordButton?.visibility = Button.GONE
}
}
fun appendToUi(s: String) {
lastUiText += s
textView?.text = lastUiText
@@ -63,9 +101,10 @@ class Live2DUiManager(private val activity: Activity) {
textView?.text = text
}
fun setButtonsEnabled(startEnabled: Boolean, stopEnabled: Boolean) {
fun setButtonsEnabled(startEnabled: Boolean = false, stopEnabled: Boolean = false, recordEnabled: Boolean = true) {
startButton?.isEnabled = startEnabled
stopButton?.isEnabled = stopEnabled
recordButton?.isEnabled = recordEnabled
}
fun setSpeaking(speaking: Boolean) {
@@ -74,6 +113,18 @@ class Live2DUiManager(private val activity: Activity) {
}
}
fun setMood(mood: String) {
activity.runOnUiThread {
avatarManager?.setMood(mood)
}
}
fun startSpecificMotion(motionName: String) {
activity.runOnUiThread {
avatarManager?.startSpecificMotion(motionName)
}
}
fun showToast(message: String, duration: Int = Toast.LENGTH_SHORT) {
activity.runOnUiThread {
Toast.makeText(activity, message, duration).show()

View File

@@ -3,6 +3,7 @@ package com.digitalperson.ui
import android.app.Activity
import android.text.method.ScrollingMovementMethod
import android.util.Log
import android.view.MotionEvent
import android.widget.Button
import android.widget.ScrollView
import android.widget.TextView
@@ -17,6 +18,7 @@ class UiManager(private val activity: Activity) {
private var scrollView: ScrollView? = null
private var startButton: Button? = null
private var stopButton: Button? = null
private var recordButton: Button? = null
private var videoPlayerManager: VideoPlayerManager? = null
private var lastUiText: String = ""
@@ -24,15 +26,17 @@ class UiManager(private val activity: Activity) {
fun initViews(
textViewId: Int,
scrollViewId: Int,
startButtonId: Int,
stopButtonId: Int,
startButtonId: Int = -1,
stopButtonId: Int = -1,
recordButtonId: Int = -1,
silentPlayerViewId: Int,
speakingPlayerViewId: Int
) {
textView = activity.findViewById(textViewId)
scrollView = activity.findViewById(scrollViewId)
startButton = activity.findViewById(startButtonId)
stopButton = activity.findViewById(stopButtonId)
if (startButtonId != -1) startButton = activity.findViewById(startButtonId)
if (stopButtonId != -1) stopButton = activity.findViewById(stopButtonId)
if (recordButtonId != -1) recordButton = activity.findViewById(recordButtonId)
textView?.movementMethod = ScrollingMovementMethod()
@@ -54,6 +58,24 @@ class UiManager(private val activity: Activity) {
stopButton?.setOnClickListener { listener() }
}
fun setRecordButtonTouchListener(listener: (Boolean) -> Unit) {
recordButton?.setOnTouchListener {
_, event ->
when (event.action) {
MotionEvent.ACTION_DOWN -> {
listener(true)
true
}
MotionEvent.ACTION_UP,
MotionEvent.ACTION_CANCEL -> {
listener(false)
true
}
else -> false
}
}
}
fun appendToUi(s: String) {
if (!AppConfig.SHOW_DEBUG_TEXT) return
@@ -74,9 +96,10 @@ class UiManager(private val activity: Activity) {
textView?.text = text
}
fun setButtonsEnabled(startEnabled: Boolean, stopEnabled: Boolean) {
fun setButtonsEnabled(startEnabled: Boolean = false, stopEnabled: Boolean = false, recordEnabled: Boolean = true) {
startButton?.isEnabled = startEnabled
stopButton?.isEnabled = stopEnabled
recordButton?.isEnabled = recordEnabled
}
fun setSpeaking(speaking: Boolean) {

View File

@@ -0,0 +1,27 @@
<?xml version="1.0" encoding="utf-8"?>
<selector xmlns:android="http://schemas.android.com/apk/res/android">
<item android:state_pressed="true">
<objectAnimator
android:propertyName="translationZ"
android:duration="@android:integer/config_shortAnimTime"
android:valueTo="6dp"
android:valueType="floatType" />
<objectAnimator
android:propertyName="elevation"
android:duration="@android:integer/config_shortAnimTime"
android:valueTo="8dp"
android:valueType="floatType" />
</item>
<item>
<objectAnimator
android:propertyName="translationZ"
android:duration="@android:integer/config_shortAnimTime"
android:valueTo="0dp"
android:valueType="floatType" />
<objectAnimator
android:propertyName="elevation"
android:duration="@android:integer/config_shortAnimTime"
android:valueTo="2dp"
android:valueType="floatType" />
</item>
</selector>

View File

@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="utf-8"?>
<ripple xmlns:android="http://schemas.android.com/apk/res/android"
android:color="@android:color/white">
<item android:id="@android:id/mask">
<shape android:shape="oval">
<solid android:color="@android:color/white" />
</shape>
</item>
<item>
<shape android:shape="oval">
<solid android:color="#4CAF50" />
</shape>
</item>
</ripple>

View File

@@ -69,6 +69,14 @@
app:layout_constraintEnd_toEndOf="parent"
app:layout_constraintStart_toStartOf="parent">
<!-- 传统按钮 -->
<LinearLayout
android:id="@+id/traditional_buttons"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:orientation="horizontal"
android:gravity="center">
<Button
android:id="@+id/start_button"
android:layout_width="0dp"
@@ -84,5 +92,24 @@
android:layout_weight="1"
android:text="@string/stop" />
</LinearLayout>
</LinearLayout>
<!-- 按住录音按钮 - 右下角 -->
<Button
android:id="@+id/record_button"
android:layout_width="100dp"
android:layout_height="100dp"
android:layout_margin="24dp"
android:layout_marginBottom="24dp"
android:text="按住说话"
android:textColor="@android:color/white"
android:textSize="14sp"
android:textAllCaps="false"
android:background="@drawable/record_button_background"
app:backgroundTint="#4CAF50"
android:stateListAnimator="@animator/button_elevation"
android:visibility="gone"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintEnd_toEndOf="parent" />
</androidx.constraintlayout.widget.ConstraintLayout>

View File

@@ -91,19 +91,14 @@
app:layout_constraintStart_toStartOf="parent">
<Button
android:id="@+id/start_button"
android:layout_width="0dp"
android:layout_height="wrap_content"
android:layout_marginEnd="12dp"
android:layout_weight="1"
android:text="@string/start" />
android:id="@+id/record_button"
android:layout_width="200dp"
android:layout_height="200dp"
android:layout_gravity="center"
android:text="按住录音"
android:textSize="18sp"
android:background="@android:drawable/ic_btn_speak_now" />
<Button
android:id="@+id/stop_button"
android:layout_width="0dp"
android:layout_height="wrap_content"
android:layout_weight="1"
android:text="@string/stop" />
</LinearLayout>
<!-- 半透明遮罩层 -->

View File

@@ -3,5 +3,5 @@
<string name="start">开始</string>
<string name="stop">结束</string>
<string name="hint">点击“开始”说话;识别后会请求大模型并用 TTS 播放回复。</string>
<string name="system_prompt">你是一名小学女老师喜欢回答学生的各种问题请简洁但温柔地回答每个回答不超过30字。</string>
<string name="system_prompt">你是一名小学女老师喜欢回答学生的各种问题请简洁但温柔地回答每个回答不超过30字。在每次回复的最前面,用方括号标注你的心情,格式为[开心/伤心/愤怒/平和/惊讶/关心/害羞],例如:[开心]同学你好呀!请问有什么问题吗?</string>
</resources>

View File

@@ -24,5 +24,5 @@ android.nonTransitiveRClass=true
LLM_API_URL=https://ark.cn-beijing.volces.com/api/v3/chat/completions
LLM_API_KEY=14ee3e0e-ec07-4678-8b92-64f3b1416592
LLM_MODEL=doubao-1-5-pro-32k-character-250228
LLM_MODEL=doubao-1-5-pro-32k-character-250715
USE_LIVE2D=true