push2talk

2026-03-02 12:00:33 +08:00
parent 2f6166ab6c
commit 1701ecfb7f
19 changed files with 802 additions and 160 deletions
--- a/app/note/design_doc
+++ b/app/note/design_doc
@@ -55,3 +55,50 @@ TTS Sherpa-ONNX VITS .onnx ❌ 否 CPU ONNX Runtime
                        │  SenseVoice     │
                        │  (RKNN推理)     │
                        └─────────────────┘
+
+5. live2d, Haru的动作
+由于没有官方文档，基于Live2D模型的常见设计模式，我建议以下映射：
+
+### 🎭 开心类情绪
+- haru_g_m22 眯眼微笑
+- haru_g_m21 跳动微笑
+- haru_g_m18 手收背后微笑
+- haru_g_m09 微微鞠躬
+- haru_g_m08 深深鞠躬
+
+
+### 😢 伤心类情绪
+- haru_g_m25 - 扁嘴
+- haru_g_m24 - 低头斜看地板，收手到背后 
+- haru_g_m05 扁嘴，张开双手
+
+### 😠 愤怒类情绪
+- haru_g_m11 双手交叉，摇头，扁嘴
+- haru_g_m04 双手交叉，点头
+- haru_g_m03 双手交叉，点头
+
+### 😌 平和类情绪
+- haru_g_m15 双手交叉在胸前
+- haru_g_m07 举起左手
+- haru_g_m06 举起右手
+- haru_g_m02 双手放到背后
+- haru_g_m01 点头
+
+
+### 😲 惊讶类情绪
+- haru_g_m26 - 后退一步（适合：惊讶、吃惊）
+- haru_g_m12 摆手，摇头
+
+### 😕 困惑类情绪
+- haru_g_m20 手指点腮，思考，皱眉
+- haru_g_m16 双手捧腮，思考
+- haru_g_m14 身体前倾，皱眉
+- haru_g_m13 身体前倾，双手分开
+
+### 害羞
+- haru_g_m19 脸红微笑
+
+
+### ❤️ 关心类情绪
+- haru_g_m17 靠近侧脸
+
--- a/app/src/main/java/com/digitalperson/Live2DChatActivity.kt
+++ b/app/src/main/java/com/digitalperson/Live2DChatActivity.kt
@@ -38,6 +38,9 @@ class Live2DChatActivity : AppCompatActivity() {
    @Volatile
    private var isRecording: Boolean = false
    
+    private val holdToSpeakAudioBuffer = mutableListOf<Float>()
+    private val HOLD_TO_SPEAK_MIN_SAMPLES = 16000  // 1秒的音频数据
+
    private val ioScope = CoroutineScope(SupervisorJob() + Dispatchers.IO)
    private var recordingJob: Job? = null
    private val nativeLock = Any()
@@ -77,13 +80,30 @@ class Live2DChatActivity : AppCompatActivity() {
            scrollViewId = R.id.scroll_view,
            startButtonId = R.id.start_button,
            stopButtonId = R.id.stop_button,
+            recordButtonId = R.id.record_button,
+            traditionalButtonsId = R.id.traditional_buttons,
            silentPlayerViewId = 0,
            speakingPlayerViewId = 0,
            live2dViewId = R.id.live2d_view
        )
        
-        uiManager.setStartButtonListener { onStartClicked() }
-        uiManager.setStopButtonListener { onStopClicked(userInitiated = true) }
+        // 根据配置选择交互方式
+        uiManager.setUseHoldToSpeak(AppConfig.USE_HOLD_TO_SPEAK)
+        
+        if (AppConfig.USE_HOLD_TO_SPEAK) {
+            uiManager.setRecordButtonTouchListener { isDown ->
+                if (isDown) {
+                    // 按住按钮，开始录音
+                    onRecordButtonDown()
+                } else {
+                    // 松开按钮，停止录音
+                    onRecordButtonUp()
+                }
+            }
+        } else {
+            uiManager.setStartButtonListener { onStartClicked() }
+            uiManager.setStopButtonListener { onStopClicked(userInitiated = true) }
+        }

        ActivityCompat.requestPermissions(this, permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)

@@ -99,10 +119,16 @@ class Live2DChatActivity : AppCompatActivity() {
            Log.w(AppConfig.TAG, "Streaming switch not found in layout: ${e.message}")
        }

-        uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
+        if (AppConfig.USE_HOLD_TO_SPEAK) {
+            uiManager.setButtonsEnabled(recordEnabled = false)
+        } else {
+            uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
+        }
        uiManager.setText("初始化中…")
        
        audioProcessor = AudioProcessor(this)
+        ttsManager = TtsManager(this)
+        ttsManager.setCallback(createTtsCallback())
        
        asrManager = AsrManager(this)
        asrManager.setAudioProcessor(audioProcessor)
@@ -127,14 +153,22 @@ class Live2DChatActivity : AppCompatActivity() {
                        )
                    }
                    uiManager.setText(getString(R.string.hint))
-                    uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
+                    if (AppConfig.USE_HOLD_TO_SPEAK) {
+                        uiManager.setButtonsEnabled(recordEnabled = true)
+                    } else {
+                        uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
+                    }
                }
            } catch (t: Throwable) {
                Log.e(AppConfig.TAG, "Initialization failed: ${t.message}", t)
                withContext(Dispatchers.Main) {
                    uiManager.setText("初始化失败：${t.javaClass.simpleName}: ${t.message}")
                    uiManager.showToast("初始化失败（请看 Logcat）: ${t.javaClass.simpleName}", Toast.LENGTH_LONG)
-                    uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
+                    if (AppConfig.USE_HOLD_TO_SPEAK) {
+                        uiManager.setButtonsEnabled(recordEnabled = false)
+                    } else {
+                        uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
+                    }
                }
            }
        }
@@ -142,9 +176,6 @@ class Live2DChatActivity : AppCompatActivity() {
        cloudApiManager = CloudApiManager(createCloudApiListener(), applicationContext)
        cloudApiManager.setEnableStreaming(enableStreaming)
        
-        ttsManager = TtsManager(this)
-        ttsManager.setCallback(createTtsCallback())
-        
        Log.d(AppConfig.TAG, "Pre-starting ASR worker")
        ioScope.launch {
            asrManager.runAsrWorker()
@@ -205,10 +236,18 @@ class Live2DChatActivity : AppCompatActivity() {
                }
                ttsManager.enqueueEnd()
            } else {
-                runOnUiThread {
-                    uiManager.appendToUi("${response}\n")
+                val previousMood = com.digitalperson.mood.MoodManager.getCurrentMood()
+                val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(response)
+                android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Final mood: $mood, filtered text: $filteredText")
+
+                if (mood != previousMood) {
+                    uiManager.setMood(mood)
                }
-                ttsManager.enqueueSegment(response)
+
+                runOnUiThread {
+                    uiManager.appendToUi("${filteredText}\n")
+                }
+                ttsManager.enqueueSegment(filteredText)
                ttsManager.enqueueEnd()
            }
        }
@@ -219,9 +258,18 @@ class Live2DChatActivity : AppCompatActivity() {
                    llmFirstChunkMarked = true
                    currentTrace?.markLlmFirstChunk()
                }
-                uiManager.appendToUi(chunk)

-                val segments = segmenter.processChunk(chunk)
+                val previousMood = com.digitalperson.mood.MoodManager.getCurrentMood()
+                val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(chunk)
+                if (mood != previousMood) {
+                    android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Mood changed to: $mood")
+                    // 设置Live2D人物的心情
+                    uiManager.setMood(mood)
+                }
+
+                uiManager.appendToUi(filteredText)
+
+                val segments = segmenter.processChunk(filteredText)
                for (seg in segments) {
                    ttsManager.enqueueSegment(seg)
                }
@@ -353,6 +401,77 @@ class Live2DChatActivity : AppCompatActivity() {
        Log.d(AppConfig.TAG, "onStartClicked completed")
    }

+    private fun onRecordButtonDown() {
+        Log.d(AppConfig.TAG, "onRecordButtonDown called")
+        if (isRecording) {
+            Log.d(AppConfig.TAG, "Already recording, returning")
+            return
+        }
+
+        // 如果TTS正在播放，打断它
+        val interrupted = ttsManager.interruptForNewTurn()
+        if (interrupted) {
+            uiManager.appendToUi("\n[LOG] 已打断TTS播放\n")
+        }
+
+        if (!audioProcessor.initMicrophone(permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) {
+            uiManager.showToast("麦克风初始化失败/无权限")
+            return
+        }
+
+        currentTrace = TraceManager.getInstance().startNewTurn()
+        currentTrace?.mark("turn_start")
+        llmInFlight = false
+
+        uiManager.clearText()
+
+        // interruptForNewTurn() already prepared TTS state for next turn.
+        // Keep reset() only for non-interrupt entry points.
+        ttsManager.setCurrentTrace(currentTrace)
+        segmenter.reset()
+
+        // 启动按住说话的动作
+        uiManager.startSpecificMotion("hold_to_speak")
+
+        holdToSpeakAudioBuffer.clear()
+        audioProcessor.startRecording()
+        isRecording = true
+
+        Log.d(AppConfig.TAG, "Starting processSamplesLoop coroutine")
+        recordingJob?.cancel()
+        recordingJob = ioScope.launch {
+            processSamplesLoop()
+        }
+        Log.d(AppConfig.TAG, "onRecordButtonDown completed")
+    }
+    
+    private fun onRecordButtonUp() {
+        Log.d(AppConfig.TAG, "onRecordButtonUp called")
+        if (!isRecording) {
+            Log.d(AppConfig.TAG, "Not recording, returning")
+            return
+        }
+
+        isRecording = false
+        audioProcessor.stopRecording()
+
+        recordingJob?.cancel()
+        recordingJob = ioScope.launch {
+            // 处理最后的音频数据
+            val audioData = audioProcessor.getRecordedData()
+            holdToSpeakAudioBuffer.addAll(audioData.toList())
+            
+            if (holdToSpeakAudioBuffer.size >= HOLD_TO_SPEAK_MIN_SAMPLES) {
+                val finalAudio = holdToSpeakAudioBuffer.toFloatArray()
+                asrManager.enqueueAudioSegment(finalAudio, finalAudio)
+            } else {
+                uiManager.showToast("录音时间太短，请长按至少1秒")
+            }
+            holdToSpeakAudioBuffer.clear()
+        }
+        Log.d(AppConfig.TAG, "onRecordButtonUp completed")
+    }
+    
    private fun onStopClicked(userInitiated: Boolean) {
        isRecording = false
        audioProcessor.stopRecording()
@@ -362,7 +481,11 @@ class Live2DChatActivity : AppCompatActivity() {

        ttsManager.stop()

-        uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
+        if (AppConfig.USE_HOLD_TO_SPEAK) {
+            uiManager.setButtonsEnabled(recordEnabled = true)
+        } else {
+            uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
+        }

        if (userInitiated) {
            TraceManager.getInstance().endTurn()
@@ -372,47 +495,62 @@ class Live2DChatActivity : AppCompatActivity() {

    private suspend fun processSamplesLoop() {
        Log.d(AppConfig.TAG, "processSamplesLoop started")
-        val windowSize = AppConfig.WINDOW_SIZE
-        val buffer = ShortArray(windowSize)
-        var loopCount = 0

-        while (isRecording && ioScope.coroutineContext.isActive) {
-            loopCount++
-            if (loopCount % 100 == 0) {
-                Log.d(AppConfig.TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsManager.isPlaying()}")
-            }
-            
-            if (ttsManager.isPlaying()) {
-                if (vadManager.isInSpeech()) {
-                    Log.d(AppConfig.TAG, "TTS playing, resetting VAD state")
-                    vadManager.clearState()
+        if (AppConfig.USE_HOLD_TO_SPEAK) {
+            // 按住说话模式：累积音频数据到一定长度后再发送给ASR
+            while (isRecording && ioScope.coroutineContext.isActive) {
+                val audioData = audioProcessor.getAudioData()
+                if (audioData.isNotEmpty()) {
+                    holdToSpeakAudioBuffer.addAll(audioData.toList())
                }
+                // 避免CPU占用过高
+                kotlinx.coroutines.delay(10)
+            }
+        } else {
+            // 传统模式：使用VAD
+            val windowSize = AppConfig.WINDOW_SIZE
+            val buffer = ShortArray(windowSize)
+            var loopCount = 0
+
+            while (isRecording && ioScope.coroutineContext.isActive) {
+                loopCount++
+                if (loopCount % 100 == 0) {
+                    Log.d(AppConfig.TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsManager.isPlaying()}")
+                }
+                
+                if (ttsManager.isPlaying()) {
+                    if (vadManager.isInSpeech()) {
+                        Log.d(AppConfig.TAG, "TTS playing, resetting VAD state")
+                        vadManager.clearState()
+                    }
+                    val ret = audioProcessor.readAudio(buffer)
+                    if (ret <= 0) continue
+                    continue
+                }
+                
                val ret = audioProcessor.readAudio(buffer)
                if (ret <= 0) continue
-                continue
+                if (ret != windowSize) continue
+                
+                val chunk = audioProcessor.convertShortToFloat(buffer)
+                val processedChunk = audioProcessor.applyGain(chunk)
+
+                val result = vadManager.processAudioChunk(chunk, processedChunk)
+                
+                if (vadManager.vadComputeCount % 100 == 0) {
+                    Log.d(AppConfig.TAG, "VAD result: $result, inSpeech=${vadManager.isInSpeech()}")
+                }
+
+                if (loopCount % 1000 == 0) {
+                    Log.d(AppConfig.TAG, "VAD status: inSpeech=${vadManager.isInSpeech()}, speechLen=${vadManager.getSpeechLength()}")
+                }
+
+                val forced = segmenter.maybeForceByTime()
+                for (seg in forced) ttsManager.enqueueSegment(seg)
            }

-            val ret = audioProcessor.readAudio(buffer)
-            if (ret <= 0) continue
-            if (ret != windowSize) continue
-            
-            val chunk = audioProcessor.convertShortToFloat(buffer)
-            val processedChunk = audioProcessor.applyGain(chunk)
-
-            val result = vadManager.processAudioChunk(chunk, processedChunk)
-            
-            if (vadManager.vadComputeCount % 100 == 0) {
-                Log.d(AppConfig.TAG, "VAD result: $result, inSpeech=${vadManager.isInSpeech()}")
-            }
-
-            if (loopCount % 1000 == 0) {
-                Log.d(AppConfig.TAG, "VAD status: inSpeech=${vadManager.isInSpeech()}, speechLen=${vadManager.getSpeechLength()}")
-            }
-
-            val forced = segmenter.maybeForceByTime()
-            for (seg in forced) ttsManager.enqueueSegment(seg)
+            vadManager.forceFinalize()
        }
-
-        vadManager.forceFinalize()
+        Log.d(AppConfig.TAG, "processSamplesLoop stopped")
    }
 }
--- a/app/src/main/java/com/digitalperson/MainActivity.kt
+++ b/app/src/main/java/com/digitalperson/MainActivity.kt
@@ -75,14 +75,20 @@ class MainActivity : AppCompatActivity() {
        uiManager.initViews(
            textViewId = R.id.my_text,
            scrollViewId = R.id.scroll_view,
-            startButtonId = R.id.start_button,
-            stopButtonId = R.id.stop_button,
+            recordButtonId = R.id.record_button,
            silentPlayerViewId = R.id.player_view_silent,
            speakingPlayerViewId = R.id.player_view_speaking
        )
        
-        uiManager.setStartButtonListener { onStartClicked() }
-        uiManager.setStopButtonListener { onStopClicked(userInitiated = true) }
+        uiManager.setRecordButtonTouchListener { isDown ->
+            if (isDown) {
+                // 按住按钮，开始录音
+                onRecordButtonDown()
+            } else {
+                // 松开按钮，停止录音
+                onRecordButtonUp()
+            }
+        }

        ActivityCompat.requestPermissions(this, permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)

@@ -98,7 +104,7 @@ class MainActivity : AppCompatActivity() {
            Log.w(AppConfig.TAG, "Streaming switch not found in layout: ${e.message}")
        }

-        uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
+        uiManager.setButtonsEnabled(recordEnabled = false)
        uiManager.setText("初始化中…")
        
        audioProcessor = AudioProcessor(this)
@@ -126,14 +132,14 @@ class MainActivity : AppCompatActivity() {
                        )
                    }
                    uiManager.setText(getString(R.string.hint))
-                    uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
+                    uiManager.setButtonsEnabled(recordEnabled = true)
                }
            } catch (t: Throwable) {
                Log.e(AppConfig.TAG, "Initialization failed: ${t.message}", t)
                withContext(Dispatchers.Main) {
                    uiManager.setText("初始化失败：${t.javaClass.simpleName}: ${t.message}")
                    uiManager.showToast("初始化失败（请看 Logcat）: ${t.javaClass.simpleName}", Toast.LENGTH_LONG)
-                    uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = false)
+                    uiManager.setButtonsEnabled(recordEnabled = false)
                }
            }
        }
@@ -204,10 +210,13 @@ class MainActivity : AppCompatActivity() {
                }
                ttsManager.enqueueEnd()
            } else {
+                val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(response)
+                android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Final mood: $mood, filtered text: $filteredText")
+                
                runOnUiThread {
-                    uiManager.appendToUi("${response}\n")
+                    uiManager.appendToUi("${filteredText}\n")
                }
-                ttsManager.enqueueSegment(response)
+                ttsManager.enqueueSegment(filteredText)
                ttsManager.enqueueEnd()
            }
        }
@@ -218,9 +227,15 @@ class MainActivity : AppCompatActivity() {
                    llmFirstChunkMarked = true
                    currentTrace?.markLlmFirstChunk()
                }
-                uiManager.appendToUi(chunk)
                
-                val segments = segmenter.processChunk(chunk)
+                val (filteredText, mood) = com.digitalperson.mood.MoodManager.extractAndFilterMood(chunk)
+                if (mood != com.digitalperson.mood.MoodManager.getCurrentMood()) {
+                    android.util.Log.d(com.digitalperson.config.AppConfig.TAG, "Mood changed to: $mood")
+                }
+                
+                uiManager.appendToUi(filteredText)
+
+                val segments = segmenter.processChunk(filteredText)
                for (seg in segments) {
                    ttsManager.enqueueSegment(seg)
                }
@@ -306,13 +321,19 @@ class MainActivity : AppCompatActivity() {
        try { audioProcessor.release() } catch (_: Throwable) {}
    }

-    private fun onStartClicked() {
-        Log.d(AppConfig.TAG, "onStartClicked called")
+    private fun onRecordButtonDown() {
+        Log.d(AppConfig.TAG, "onRecordButtonDown called")
        if (isRecording) {
            Log.d(AppConfig.TAG, "Already recording, returning")
            return
        }

+        // 如果TTS正在播放，打断它
+        if (ttsManager.isPlaying()) {
+            ttsManager.stop()
+            uiManager.appendToUi("\n[LOG] 已打断TTS播放\n")
+        }
+
        if (!audioProcessor.initMicrophone(permissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) {
            uiManager.showToast("麦克风初始化失败/无权限")
            return
@@ -328,18 +349,37 @@ class MainActivity : AppCompatActivity() {
        ttsManager.setCurrentTrace(currentTrace)
        segmenter.reset()

-        vadManager.reset()
        audioProcessor.startRecording()
        isRecording = true

-        uiManager.setButtonsEnabled(startEnabled = false, stopEnabled = true)
-
        Log.d(AppConfig.TAG, "Starting processSamplesLoop coroutine")
        recordingJob?.cancel()
        recordingJob = ioScope.launch {
            processSamplesLoop()
        }
-        Log.d(AppConfig.TAG, "onStartClicked completed")
+        Log.d(AppConfig.TAG, "onRecordButtonDown completed")
+    }
+    
+    private fun onRecordButtonUp() {
+        Log.d(AppConfig.TAG, "onRecordButtonUp called")
+        if (!isRecording) {
+            Log.d(AppConfig.TAG, "Not recording, returning")
+            return
+        }
+
+        isRecording = false
+        audioProcessor.stopRecording()
+
+        recordingJob?.cancel()
+        recordingJob = ioScope.launch {
+            // 处理最后的音频数据
+            val audioData = audioProcessor.getRecordedData()
+            if (audioData.isNotEmpty()) {
+                // 直接发送到ASR，不经过VAD
+                asrManager.enqueueAudioSegment(audioData, audioData)
+            }
+        }
+        Log.d(AppConfig.TAG, "onRecordButtonUp completed")
    }
    
    private fun onStopClicked(userInitiated: Boolean) {
@@ -351,7 +391,7 @@ class MainActivity : AppCompatActivity() {

        ttsManager.stop()

-        uiManager.setButtonsEnabled(startEnabled = true, stopEnabled = false)
+        uiManager.setButtonsEnabled(recordEnabled = true)

        if (userInitiated) {
            TraceManager.getInstance().endTurn()
@@ -361,47 +401,16 @@ class MainActivity : AppCompatActivity() {

    private suspend fun processSamplesLoop() {
        Log.d(AppConfig.TAG, "processSamplesLoop started")
-        val windowSize = AppConfig.WINDOW_SIZE
-        val buffer = ShortArray(windowSize)
-        var loopCount = 0

        while (isRecording && ioScope.coroutineContext.isActive) {
-            loopCount++
-            if (loopCount % 100 == 0) {
-                Log.d(AppConfig.TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsManager.isPlaying()}")
+            val audioData = audioProcessor.getAudioData()
+            if (audioData.isNotEmpty()) {
+                // 直接发送到ASR，不经过VAD
+                asrManager.enqueueAudioSegment(audioData, audioData)
            }
-            
-            if (ttsManager.isPlaying()) {
-                if (vadManager.isInSpeech()) {
-                    Log.d(AppConfig.TAG, "TTS playing, resetting VAD state")
-                    vadManager.clearState()
-                }
-                val ret = audioProcessor.readAudio(buffer)
-                if (ret <= 0) continue
-                continue
-            }
-            
-            val ret = audioProcessor.readAudio(buffer)
-            if (ret <= 0) continue
-            if (ret != windowSize) continue
-            
-            val chunk = audioProcessor.convertShortToFloat(buffer)
-            val processedChunk = audioProcessor.applyGain(chunk)
-
-            val result = vadManager.processAudioChunk(chunk, processedChunk)
-            
-            if (vadManager.vadComputeCount % 100 == 0) {
-                Log.d(AppConfig.TAG, "VAD result: $result, inSpeech=${vadManager.isInSpeech()}")
-            }
-
-            if (loopCount % 1000 == 0) {
-                Log.d(AppConfig.TAG, "VAD status: inSpeech=${vadManager.isInSpeech()}, speechLen=${vadManager.getSpeechLength()}")
-            }
-
-            val forced = segmenter.maybeForceByTime()
-            for (seg in forced) ttsManager.enqueueSegment(seg)
+            // 避免CPU占用过高
+            kotlinx.coroutines.delay(10)
        }
-
-        vadManager.forceFinalize()
+        Log.d(AppConfig.TAG, "processSamplesLoop stopped")
    }
 }
--- a/app/src/main/java/com/digitalperson/StreamingTextSegmenter.kt
+++ b/app/src/main/java/com/digitalperson/StreamingTextSegmenter.kt
@@ -1,5 +1,7 @@
 package com.digitalperson

+import com.digitalperson.mood.MoodManager
+
 /**
 * 将大模型流式 chunk 做“伪流式 TTS”的分段器：
 * - 优先按中文/英文标点断句，尽早产出第一段，缩短首包时间
@@ -11,17 +13,28 @@ class StreamingTextSegmenter(
 ) {
    private val buf = StringBuilder()
    private var lastEmitAtMs: Long = 0
+    private var moodExtracted = false

    @Synchronized
    fun reset(nowMs: Long = System.currentTimeMillis()) {
        buf.setLength(0)
        lastEmitAtMs = nowMs
+        moodExtracted = false
    }

    @Synchronized
    fun processChunk(chunk: String, nowMs: Long = System.currentTimeMillis()): List<String> {
        if (chunk.isEmpty()) return emptyList()
-        buf.append(chunk)
+        
+        var processedChunk = chunk
+        
+        if (!moodExtracted) {
+            val (filteredText, _) = MoodManager.extractAndFilterMood(chunk)
+            processedChunk = filteredText
+            moodExtracted = true
+        }
+        
+        buf.append(processedChunk)
        return drain(nowMs, forceByTime = false)
    }

@@ -32,6 +45,7 @@ class StreamingTextSegmenter(
        buf.setLength(0)
        if (remaining.isNotEmpty()) out.add(remaining)
        lastEmitAtMs = nowMs
+        moodExtracted = false
        return out
    }

--- a/app/src/main/java/com/digitalperson/audio/AudioProcessor.kt
+++ b/app/src/main/java/com/digitalperson/audio/AudioProcessor.kt
@@ -27,6 +27,7 @@ class AudioProcessor(

    private var smoothedRms = 0f
    private val alpha = 0.8f
+    private val recordedData = mutableListOf<Float>()

    fun initMicrophone(permissions: Array<String>, requestCode: Int): Boolean {
        if (ActivityCompat.checkSelfPermission(context, Manifest.permission.RECORD_AUDIO)
@@ -84,6 +85,29 @@ class AudioProcessor(
        Log.d(TAG, "Audio recording stopped")
    }
    
+    fun getAudioData(): FloatArray {
+        val bufferSize = 1024
+        val shortBuffer = ShortArray(bufferSize)
+        val readSize = audioRecord?.read(shortBuffer, 0, bufferSize) ?: 0
+        
+        if (readSize > 0) {
+            val floatBuffer = convertShortToFloat(shortBuffer.copyOf(readSize))
+            // 不再自动添加到recordedData，由调用方决定
+            return floatBuffer
+        }
+        return FloatArray(0)
+    }
+    
+    fun getRecordedData(): FloatArray {
+        val data = recordedData.toFloatArray()
+        recordedData.clear()
+        return data
+    }
+    
+    fun clearRecordedData() {
+        recordedData.clear()
+    }
+
    fun release() {
        try {
            audioRecord?.stop()
@@ -105,6 +129,7 @@ class AudioProcessor(
        }
        aec = null
        ns = null
+        recordedData.clear()

        Log.d(TAG, "AudioProcessor released")
    }
--- a/app/src/main/java/com/digitalperson/config/AppConfig.kt
+++ b/app/src/main/java/com/digitalperson/config/AppConfig.kt
@@ -10,6 +10,7 @@ object AppConfig {
    const val WINDOW_SIZE = 512
    
    const val SHOW_DEBUG_TEXT = true
+    const val USE_HOLD_TO_SPEAK = true  // true: 按住说话, false: 传统按钮
    
    object Tts {
        const val MODEL_DIR = "tts_model/sherpa-onnx-vits-zh-ll"
--- a/app/src/main/java/com/digitalperson/live2d/Live2DAvatarManager.kt
+++ b/app/src/main/java/com/digitalperson/live2d/Live2DAvatarManager.kt
@@ -15,6 +15,14 @@ class Live2DAvatarManager(private val glSurfaceView: GLSurfaceView) {
        renderer.setSpeaking(speaking)
    }

+    fun setMood(mood: String) {
+        renderer.setMood(mood)
+    }
+
+    fun startSpecificMotion(motionName: String) {
+        renderer.startSpecificMotion(motionName)
+    }
+
    fun onResume() {
        glSurfaceView.onResume()
    }
--- a/app/src/main/java/com/digitalperson/live2d/Live2DCharacter.kt
+++ b/app/src/main/java/com/digitalperson/live2d/Live2DCharacter.kt
@@ -4,6 +4,7 @@ import android.content.res.AssetManager
 import android.graphics.BitmapFactory
 import android.opengl.GLES20
 import android.opengl.GLUtils
+import android.util.Log
 import com.live2d.sdk.cubism.framework.CubismFramework
 import com.live2d.sdk.cubism.framework.CubismModelSettingJson
 import com.live2d.sdk.cubism.framework.id.CubismId
@@ -12,14 +13,22 @@ import com.live2d.sdk.cubism.framework.model.CubismUserModel
 import com.live2d.sdk.cubism.framework.motion.CubismMotion
 import com.live2d.sdk.cubism.framework.rendering.android.CubismRendererAndroid
 import kotlin.math.sin
+import java.util.Random

 class Live2DCharacter : CubismUserModel() {
    private lateinit var setting: CubismModelSettingJson
    private val lipSyncParams = mutableListOf<CubismId>()
    private val idleMotions = mutableListOf<CubismMotion>()
+    private val moodMotions = mutableMapOf<String, List<CubismMotion>>()
+    private val specificMotions = mutableMapOf<String, CubismMotion>()
    private var idleMotionIndex = 0
    private var lastElapsedSec = 0f
    private val textureIds = mutableListOf<Int>()
+    private val random = Random()
+    private var currentMood: String = "平和"
+    
+    // 添加文件名映射
+    private val motionFileMap = mutableMapOf<CubismMotion, String>()

    fun loadFromAssets(assets: AssetManager, modelDir: String, modelJsonName: String) {
        val settingBytes = readAssetBytes(assets, "$modelDir/$modelJsonName")
@@ -41,6 +50,8 @@ class Live2DCharacter : CubismUserModel() {

        initLipSyncParams()
        loadIdleMotions(assets, modelDir)
+        loadMoodMotions(assets, modelDir)
+        loadSpecificMotions(assets, modelDir)
        startNextIdleMotion()
    }

@@ -147,17 +158,34 @@ class Live2DCharacter : CubismUserModel() {

    private fun loadIdleMotions(assets: AssetManager, modelDir: String) {
        idleMotions.clear()
-        val groupName = findIdleGroupName()
-        if (groupName.isEmpty()) return
+//        val groupName = findIdleGroupName()
+//        if (groupName.isNotEmpty()) {
+//            for (i in 0 until setting.getMotionCount(groupName)) {
+//                val fileName = setting.getMotionFileName(groupName, i)
+//                if (fileName.isBlank()) continue
+//                runCatching {
+//                    // Motion path in model3.json can be either "motion/xxx.motion3.json" or "xxx.motion3.json".
+//                    val path = if (fileName.startsWith("motion/")) {
+//                        "$modelDir/$fileName"
+//                    } else {
+//                        "$modelDir/motion/$fileName"
+//                    }
+//                    val motion = loadMotion(readAssetBytes(assets, path))
+//                    motion?.setLoop(true)
+//                    motion?.setLoopFadeIn(true)
+//                    if (motion != null) idleMotions.add(motion)
+//                }
+//            }
+//        }

-        for (i in 0 until setting.getMotionCount(groupName)) {
-            val fileName = setting.getMotionFileName(groupName, i)
-            if (fileName.isBlank()) continue
-            runCatching {
-                val motion = loadMotion(readAssetBytes(assets, "$modelDir/$fileName"))
-                motion?.setLoop(true)
-                motion?.setLoopFadeIn(true)
-                if (motion != null) idleMotions.add(motion)
+        // Fallback for models without Idle group config.
+        if (idleMotions.isEmpty()) {
+            loadMotionByName(assets, modelDir, "haru_g_idle.motion3.json")?.let { motion ->
+                motion.setLoop(true)
+                motion.setLoopFadeIn(true)
+                idleMotions.add(motion)
+                // 也添加到映射表
+                motionFileMap[motion] = "haru_g_idle.motion3.json"
            }
        }
    }
@@ -166,7 +194,12 @@ class Live2DCharacter : CubismUserModel() {
        if (idleMotions.isEmpty()) return
        val index = idleMotionIndex % idleMotions.size
        idleMotionIndex++
-        motionManager.startMotionPriority(idleMotions[index], 1)
+        
+        val motion = idleMotions[index]
+        val motionName = motionFileMap[motion]
+        Log.d("Live2DCharacter", "开始播放空闲动作: $motionName")
+        
+        motionManager.startMotionPriority(motion, 1)
    }

    private fun findIdleGroupName(): String {
@@ -179,4 +212,122 @@ class Live2DCharacter : CubismUserModel() {
        }
        return ""
    }
+
+    private fun loadMoodMotions(assets: AssetManager, modelDir: String) {
+        // 开心心情动作
+        moodMotions["开心"] = listOf(
+            "haru_g_m22.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m22.motion3.json"),
+            "haru_g_m21.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m21.motion3.json"),
+            "haru_g_m18.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m18.motion3.json")
+        ).mapNotNull { (fileName, motion) ->
+            motion?.let {
+                motionFileMap[it] = fileName
+                it
+            }
+        }
+
+        // 伤心心情动作
+        moodMotions["伤心"] = listOf(
+            "haru_g_m25.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m25.motion3.json"),
+            "haru_g_m24.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m24.motion3.json"),
+            "haru_g_m05.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m05.motion3.json")
+        ).mapNotNull { (fileName, motion) ->
+            motion?.let {
+                motionFileMap[it] = fileName
+                it
+            }
+        }
+
+        // 平和心情动作
+        moodMotions["平和"] = listOf(
+            "haru_g_m15.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m15.motion3.json"),
+            "haru_g_m07.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m07.motion3.json"),
+            "haru_g_m06.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m06.motion3.json"),
+            "haru_g_m02.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m02.motion3.json"),
+            "haru_g_m01.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m01.motion3.json")
+        ).mapNotNull { (fileName, motion) ->
+            motion?.let {
+                motionFileMap[it] = fileName
+                it
+            }
+        }
+
+        // 惊讶心情动作
+        moodMotions["惊讶"] = listOf(
+            "haru_g_m26.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m26.motion3.json"),
+            "haru_g_m12.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m12.motion3.json")
+        ).mapNotNull { (fileName, motion) ->
+            motion?.let {
+                motionFileMap[it] = fileName
+                it
+            }
+        }
+
+        // 关心心情动作
+        moodMotions["关心"] = listOf(
+            "haru_g_m17.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m17.motion3.json")
+        ).mapNotNull { (fileName, motion) ->
+            motion?.let {
+                motionFileMap[it] = fileName
+                it
+            }
+        }
+
+        // 害羞心情动作
+        moodMotions["害羞"] = listOf(
+            "haru_g_m19.motion3.json" to loadMotionByName(assets, modelDir, "haru_g_m19.motion3.json")
+        ).mapNotNull { (fileName, motion) ->
+            motion?.let {
+                motionFileMap[it] = fileName
+                it
+            }
+        }
+    }
+
+    private fun loadSpecificMotions(assets: AssetManager, modelDir: String) {
+        // 按住说话时的动作
+        loadMotionByName(assets, modelDir, "haru_g_m17.motion3.json")?.let { motion ->
+            motionFileMap[motion] = "haru_g_m17.motion3.json"
+            specificMotions["hold_to_speak"] = motion
+        }
+    }
+
+    private fun loadMotionByName(assets: AssetManager, modelDir: String, fileName: String): CubismMotion? {
+        return runCatching {
+            val motion = loadMotion(readAssetBytes(assets, "$modelDir/motion/$fileName"))
+            motion?.setLoop(false)
+            motion
+        }.getOrNull()
+    }
+
+    fun startMoodMotion(mood: String) {
+        currentMood = mood
+        val motions = moodMotions[mood]
+        if (motions.isNullOrEmpty()) {
+            Log.d("Live2DCharacter", "心情 '$mood' 没有对应的动作")
+            return
+        }
+
+        val randomMotion = motions[random.nextInt(motions.size)]
+        // 从映射表中获取文件名
+        val motionName = motionFileMap[randomMotion]
+        Log.d("Live2DCharacter", "开始播放心情动作: $motionName, 心情: $mood")
+
+        motionManager.startMotionPriority(randomMotion, 10)
+    }
+
+    fun startSpecificMotion(motionName: String) {
+        val motion = specificMotions[motionName]
+        if (motion != null) {
+            val fileName = motionFileMap[motion]
+            Log.d("Live2DCharacter", "开始播放特定动作: $fileName")
+            motionManager.startMotionPriority(motion, 10)
+        } else {
+            Log.d("Live2DCharacter", "特定动作 '$motionName' 不存在")
+        }
+    }
+
+    fun getCurrentMood(): String {
+        return currentMood
+    }
 }
--- a/app/src/main/java/com/digitalperson/live2d/Live2DRenderer.kt
+++ b/app/src/main/java/com/digitalperson/live2d/Live2DRenderer.kt
@@ -62,6 +62,14 @@ class Live2DRenderer(
        this.speaking = speaking
    }

+    fun setMood(mood: String) {
+        character?.startMoodMotion(mood)
+    }
+
+    fun startSpecificMotion(motionName: String) {
+        character?.startSpecificMotion(motionName)
+    }
+
    fun release() {
        character?.release()
        character = null
--- a/app/src/main/java/com/digitalperson/mood/MoodManager.kt
+++ b/app/src/main/java/com/digitalperson/mood/MoodManager.kt
@@ -0,0 +1,34 @@
+package com.digitalperson.mood
+
+import android.util.Log
+import com.digitalperson.config.AppConfig
+
+object MoodManager {
+    private const val TAG = "MoodManager"
+    
+    private var currentMood: String = "平和"
+    
+    private val moodPattern = Regex("""^\[([^\]]+)\]""")
+    
+    fun extractAndFilterMood(text: String): Pair<String, String> {
+        val match = moodPattern.find(text)
+        
+        if (match != null) {
+            val mood = match.groupValues[1]
+            val filteredText = text.removeRange(match.range)
+            
+            currentMood = mood
+            Log.d(TAG, "Extracted mood: $mood, filtered text: $filteredText")
+            
+            return Pair(filteredText, mood)
+        }
+        
+        return Pair(text, currentMood)
+    }
+    
+    fun getCurrentMood(): String = currentMood
+    
+    fun reset() {
+        currentMood = "平和"
+    }
+}
--- a/app/src/main/java/com/digitalperson/tts/TtsManager.kt
+++ b/app/src/main/java/com/digitalperson/tts/TtsManager.kt
@@ -36,6 +36,8 @@ class TtsManager(private val context: Context) {
    private val ttsStopped = AtomicBoolean(false)
    private val ttsWorkerRunning = AtomicBoolean(false)
    private val ttsPlaying = AtomicBoolean(false)
+    private val interrupting = AtomicBoolean(false)
+    private val needTrackReset = AtomicBoolean(true)
    @Volatile private var ttsTotalSamplesWritten: Long = 0
    
    private var currentTrace: TraceSession? = null
@@ -124,6 +126,10 @@ class TtsManager(private val context: Context) {
    }
    
    fun enqueueSegment(seg: String) {
+        if (ttsStopped.get()) {
+            // Recover from interrupt state for next turn.
+            ttsStopped.set(false)
+        }
        val cleanedSeg = seg.trimEnd('.', '。', '!', '！', '?', '？', ',', '，', ';', '；', ':', '：')
        
        callback?.onTraceMarkTtsRequestEnqueued()
@@ -138,15 +144,26 @@ class TtsManager(private val context: Context) {
    fun isPlaying(): Boolean = ttsPlaying.get()
    
    fun reset() {
+        val workerRunning = ttsWorkerRunning.get()
+        val wasStopped = ttsStopped.get()
+
        ttsStopped.set(false)
        ttsPlaying.set(false)
+        needTrackReset.set(true)
        ttsTotalSamplesWritten = 0
        ttsQueue.clear()
+
+        // If reset is called right after stop(), old worker may still be alive.
+        // Re-send an End token so the old worker won't block forever on take().
+        if (wasStopped && workerRunning) {
+            ttsQueue.offer(TtsQueueItem.End)
+        }
    }
    
    fun stop() {
        ttsStopped.set(true)
        ttsPlaying.set(false)
+        needTrackReset.set(true)
        ttsTotalSamplesWritten = 0
        ttsQueue.clear()
        ttsQueue.offer(TtsQueueItem.End)
@@ -158,6 +175,52 @@ class TtsManager(private val context: Context) {
        }
    }

+    @Synchronized
+    fun interruptForNewTurn(waitTimeoutMs: Long = 300): Boolean {
+        if (!interrupting.compareAndSet(false, true)) return false
+        try {
+            val hadPendingPlayback = ttsPlaying.get() || ttsWorkerRunning.get() || ttsQueue.isNotEmpty()
+            if (!hadPendingPlayback) {
+                ttsStopped.set(false)
+                ttsPlaying.set(false)
+                ttsTotalSamplesWritten = 0
+                return false
+            }
+
+            ttsStopped.set(true)
+            ttsPlaying.set(false)
+            needTrackReset.set(true)
+            ttsTotalSamplesWritten = 0
+            ttsQueue.clear()
+            ttsQueue.offer(TtsQueueItem.End)
+
+            try {
+                track?.pause()
+                track?.flush()
+            } catch (_: Throwable) {
+            }
+
+            val deadline = System.currentTimeMillis() + waitTimeoutMs
+            while (ttsWorkerRunning.get() && System.currentTimeMillis() < deadline) {
+                Thread.sleep(10)
+            }
+
+            if (ttsWorkerRunning.get()) {
+                Log.w(TAG, "interruptForNewTurn timeout: worker still running")
+            }
+
+            ttsQueue.clear()
+            ttsStopped.set(false)
+            ttsPlaying.set(false)
+            needTrackReset.set(true)
+            ttsTotalSamplesWritten = 0
+            callback?.onSetSpeaking(false)
+            return true
+        } finally {
+            interrupting.set(false)
+        }
+    }
+    
    fun release() {
        try {
            tts?.release()
@@ -182,6 +245,10 @@ class TtsManager(private val context: Context) {
                runTtsWorker()
            } finally {
                ttsWorkerRunning.set(false)
+                // Handle race: items may be enqueued while old worker is still exiting.
+                if (!ttsStopped.get() && ttsQueue.isNotEmpty()) {
+                    ensureTtsWorker()
+                }
            }
        }
    }
@@ -191,7 +258,6 @@ class TtsManager(private val context: Context) {
        val audioTrack = track ?: return

        var firstAudioMarked = false
-        var isFirstSegment = true
        while (true) {
            val item = ttsQueue.take()
            if (ttsStopped.get()) break
@@ -209,14 +275,18 @@ class TtsManager(private val context: Context) {
                    val startMs = System.currentTimeMillis()
                    var firstPcmMarked = false

-                    if (isFirstSegment) {
+                    if (needTrackReset.compareAndSet(true, false)) {
                        try {
                            audioTrack.pause()
                            audioTrack.flush()
                            audioTrack.play()
                        } catch (_: Throwable) {
                        }
-                        isFirstSegment = false
+                    } else if (audioTrack.playState != AudioTrack.PLAYSTATE_PLAYING) {
+                        try {
+                            audioTrack.play()
+                        } catch (_: Throwable) {
+                        }
                    }

                    t.generateWithCallback(
--- a/app/src/main/java/com/digitalperson/ui/Live2DUiManager.kt
+++ b/app/src/main/java/com/digitalperson/ui/Live2DUiManager.kt
@@ -3,7 +3,9 @@ package com.digitalperson.ui
 import android.app.Activity
 import android.opengl.GLSurfaceView
 import android.text.method.ScrollingMovementMethod
+import android.view.MotionEvent
 import android.widget.Button
+import android.widget.LinearLayout
 import android.widget.ScrollView
 import android.widget.TextView
 import android.widget.Toast
@@ -14,6 +16,8 @@ class Live2DUiManager(private val activity: Activity) {
    private var scrollView: ScrollView? = null
    private var startButton: Button? = null
    private var stopButton: Button? = null
+    private var recordButton: Button? = null
+    private var traditionalButtons: LinearLayout? = null
    private var avatarManager: Live2DAvatarManager? = null

    private var lastUiText: String = ""
@@ -21,16 +25,20 @@ class Live2DUiManager(private val activity: Activity) {
    fun initViews(
        textViewId: Int,
        scrollViewId: Int,
-        startButtonId: Int,
-        stopButtonId: Int,
+        startButtonId: Int = -1,
+        stopButtonId: Int = -1,
+        recordButtonId: Int = -1,
+        traditionalButtonsId: Int = -1,
        silentPlayerViewId: Int,
        speakingPlayerViewId: Int,
        live2dViewId: Int
    ) {
        textView = activity.findViewById(textViewId)
        scrollView = activity.findViewById(scrollViewId)
-        startButton = activity.findViewById(startButtonId)
-        stopButton = activity.findViewById(stopButtonId)
+        if (startButtonId != -1) startButton = activity.findViewById(startButtonId)
+        if (stopButtonId != -1) stopButton = activity.findViewById(stopButtonId)
+        if (recordButtonId != -1) recordButton = activity.findViewById(recordButtonId)
+        if (traditionalButtonsId != -1) traditionalButtons = activity.findViewById(traditionalButtonsId)

        textView?.movementMethod = ScrollingMovementMethod()

@@ -47,6 +55,36 @@ class Live2DUiManager(private val activity: Activity) {
        stopButton?.setOnClickListener { listener() }
    }
    
+    fun setRecordButtonTouchListener(listener: (Boolean) -> Unit) {
+        recordButton?.setOnTouchListener {
+            _, event ->
+            when (event.action) {
+                MotionEvent.ACTION_DOWN -> {
+                    recordButton?.isPressed = true
+                    listener(true)
+                    true
+                }
+                MotionEvent.ACTION_UP,
+                MotionEvent.ACTION_CANCEL -> {
+                    recordButton?.isPressed = false
+                    listener(false)
+                    true
+                }
+                else -> false
+            }
+        }
+    }
+    
+    fun setUseHoldToSpeak(useHoldToSpeak: Boolean) {
+        if (useHoldToSpeak) {
+            traditionalButtons?.visibility = LinearLayout.GONE
+            recordButton?.visibility = Button.VISIBLE
+        } else {
+            traditionalButtons?.visibility = LinearLayout.VISIBLE
+            recordButton?.visibility = Button.GONE
+        }
+    }
+
    fun appendToUi(s: String) {
        lastUiText += s
        textView?.text = lastUiText
@@ -63,9 +101,10 @@ class Live2DUiManager(private val activity: Activity) {
        textView?.text = text
    }

-    fun setButtonsEnabled(startEnabled: Boolean, stopEnabled: Boolean) {
+    fun setButtonsEnabled(startEnabled: Boolean = false, stopEnabled: Boolean = false, recordEnabled: Boolean = true) {
        startButton?.isEnabled = startEnabled
        stopButton?.isEnabled = stopEnabled
+        recordButton?.isEnabled = recordEnabled
    }

    fun setSpeaking(speaking: Boolean) {
@@ -74,6 +113,18 @@ class Live2DUiManager(private val activity: Activity) {
        }
    }

+    fun setMood(mood: String) {
+        activity.runOnUiThread {
+            avatarManager?.setMood(mood)
+        }
+    }
+
+    fun startSpecificMotion(motionName: String) {
+        activity.runOnUiThread {
+            avatarManager?.startSpecificMotion(motionName)
+        }
+    }
+
    fun showToast(message: String, duration: Int = Toast.LENGTH_SHORT) {
        activity.runOnUiThread {
            Toast.makeText(activity, message, duration).show()
--- a/app/src/main/java/com/digitalperson/ui/UiManager.kt
+++ b/app/src/main/java/com/digitalperson/ui/UiManager.kt
@@ -3,6 +3,7 @@ package com.digitalperson.ui
 import android.app.Activity
 import android.text.method.ScrollingMovementMethod
 import android.util.Log
+import android.view.MotionEvent
 import android.widget.Button
 import android.widget.ScrollView
 import android.widget.TextView
@@ -17,6 +18,7 @@ class UiManager(private val activity: Activity) {
    private var scrollView: ScrollView? = null
    private var startButton: Button? = null
    private var stopButton: Button? = null
+    private var recordButton: Button? = null
    private var videoPlayerManager: VideoPlayerManager? = null
    
    private var lastUiText: String = ""
@@ -24,15 +26,17 @@ class UiManager(private val activity: Activity) {
    fun initViews(
        textViewId: Int,
        scrollViewId: Int,  
-        startButtonId: Int,
-        stopButtonId: Int,
+        startButtonId: Int = -1,
+        stopButtonId: Int = -1,
+        recordButtonId: Int = -1,
        silentPlayerViewId: Int,
        speakingPlayerViewId: Int
    ) {
        textView = activity.findViewById(textViewId)
        scrollView = activity.findViewById(scrollViewId)
-        startButton = activity.findViewById(startButtonId)
-        stopButton = activity.findViewById(stopButtonId)
+        if (startButtonId != -1) startButton = activity.findViewById(startButtonId)
+        if (stopButtonId != -1) stopButton = activity.findViewById(stopButtonId)
+        if (recordButtonId != -1) recordButton = activity.findViewById(recordButtonId)
        
        textView?.movementMethod = ScrollingMovementMethod()
        
@@ -54,6 +58,24 @@ class UiManager(private val activity: Activity) {
        stopButton?.setOnClickListener { listener() }
    }
    
+    fun setRecordButtonTouchListener(listener: (Boolean) -> Unit) {
+        recordButton?.setOnTouchListener {
+            _, event ->
+            when (event.action) {
+                MotionEvent.ACTION_DOWN -> {
+                    listener(true)
+                    true
+                }
+                MotionEvent.ACTION_UP,
+                MotionEvent.ACTION_CANCEL -> {
+                    listener(false)
+                    true
+                }
+                else -> false
+            }
+        }
+    }
+    
    fun appendToUi(s: String) {
        if (!AppConfig.SHOW_DEBUG_TEXT) return

@@ -74,9 +96,10 @@ class UiManager(private val activity: Activity) {
        textView?.text = text
    }
    
-    fun setButtonsEnabled(startEnabled: Boolean, stopEnabled: Boolean) {
+    fun setButtonsEnabled(startEnabled: Boolean = false, stopEnabled: Boolean = false, recordEnabled: Boolean = true) {
        startButton?.isEnabled = startEnabled
        stopButton?.isEnabled = stopEnabled
+        recordButton?.isEnabled = recordEnabled
    }
    
    fun setSpeaking(speaking: Boolean) {
--- a/app/src/main/res/animator/button_elevation.xml
+++ b/app/src/main/res/animator/button_elevation.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="utf-8"?>
+<selector xmlns:android="http://schemas.android.com/apk/res/android">
+    <item android:state_pressed="true">
+        <objectAnimator
+            android:propertyName="translationZ"
+            android:duration="@android:integer/config_shortAnimTime"
+            android:valueTo="6dp"
+            android:valueType="floatType" />
+        <objectAnimator
+            android:propertyName="elevation"
+            android:duration="@android:integer/config_shortAnimTime"
+            android:valueTo="8dp"
+            android:valueType="floatType" />
+    </item>
+    <item>
+        <objectAnimator
+            android:propertyName="translationZ"
+            android:duration="@android:integer/config_shortAnimTime"
+            android:valueTo="0dp"
+            android:valueType="floatType" />
+        <objectAnimator
+            android:propertyName="elevation"
+            android:duration="@android:integer/config_shortAnimTime"
+            android:valueTo="2dp"
+            android:valueType="floatType" />
+    </item>
+</selector>
--- a/app/src/main/res/drawable/record_button_background.xml
+++ b/app/src/main/res/drawable/record_button_background.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="utf-8"?>
+<ripple xmlns:android="http://schemas.android.com/apk/res/android"
+    android:color="@android:color/white">
+    <item android:id="@android:id/mask">
+        <shape android:shape="oval">
+            <solid android:color="@android:color/white" />
+        </shape>
+    </item>
+    <item>
+        <shape android:shape="oval">
+            <solid android:color="#4CAF50" />
+        </shape>
+    </item>
+</ripple>
--- a/app/src/main/res/layout/activity_live2d_chat.xml
+++ b/app/src/main/res/layout/activity_live2d_chat.xml
@@ -69,20 +69,47 @@
        app:layout_constraintEnd_toEndOf="parent"
        app:layout_constraintStart_toStartOf="parent">

-        <Button
-            android:id="@+id/start_button"
-            android:layout_width="0dp"
+        <!-- 传统按钮 -->
+        <LinearLayout
+            android:id="@+id/traditional_buttons"
+            android:layout_width="match_parent"
            android:layout_height="wrap_content"
-            android:layout_marginEnd="12dp"
-            android:layout_weight="1"
-            android:text="@string/start" />
+            android:orientation="horizontal"
+            android:gravity="center">

-        <Button
-            android:id="@+id/stop_button"
-            android:layout_width="0dp"
-            android:layout_height="wrap_content"
-            android:layout_weight="1"
-            android:text="@string/stop" />
+            <Button
+                android:id="@+id/start_button"
+                android:layout_width="0dp"
+                android:layout_height="wrap_content"
+                android:layout_marginEnd="12dp"
+                android:layout_weight="1"
+                android:text="@string/start" />
+
+            <Button
+                android:id="@+id/stop_button"
+                android:layout_width="0dp"
+                android:layout_height="wrap_content"
+                android:layout_weight="1"
+                android:text="@string/stop" />
+        </LinearLayout>
    </LinearLayout>

+    <!-- 按住录音按钮 - 右下角 -->
+    <Button
+        android:id="@+id/record_button"
+        android:layout_width="100dp"
+        android:layout_height="100dp"
+        android:layout_margin="24dp"
+        android:layout_marginBottom="24dp"
+        android:text="按住说话"
+        android:textColor="@android:color/white"
+        android:textSize="14sp"
+        android:textAllCaps="false"
+        android:background="@drawable/record_button_background"
+        app:backgroundTint="#4CAF50"
+        android:stateListAnimator="@animator/button_elevation"
+        android:visibility="gone"
+        app:layout_constraintBottom_toBottomOf="parent"
+        app:layout_constraintEnd_toEndOf="parent" />
+
 </androidx.constraintlayout.widget.ConstraintLayout>
--- a/app/src/main/res/layout/activity_main.xml
+++ b/app/src/main/res/layout/activity_main.xml
@@ -91,19 +91,14 @@
        app:layout_constraintStart_toStartOf="parent">

        <Button
-            android:id="@+id/start_button"
-            android:layout_width="0dp"
-            android:layout_height="wrap_content"
-            android:layout_marginEnd="12dp"
-            android:layout_weight="1"
-            android:text="@string/start" />
+            android:id="@+id/record_button"
+            android:layout_width="200dp"
+            android:layout_height="200dp"
+            android:layout_gravity="center"
+            android:text="按住录音"
+            android:textSize="18sp"
+            android:background="@android:drawable/ic_btn_speak_now" />

-        <Button
-            android:id="@+id/stop_button"
-            android:layout_width="0dp"
-            android:layout_height="wrap_content"
-            android:layout_weight="1"
-            android:text="@string/stop" />
    </LinearLayout>

    <!-- 半透明遮罩层 -->
--- a/app/src/main/res/values/strings.xml
+++ b/app/src/main/res/values/strings.xml
@@ -3,5 +3,5 @@
    <string name="start">开始</string>
    <string name="stop">结束</string>
    <string name="hint">点击“开始”说话；识别后会请求大模型并用 TTS 播放回复。</string>
-    <string name="system_prompt">你是一名小学女老师，喜欢回答学生的各种问题，请简洁但温柔地回答，每个回答不超过30字。</string>
+    <string name="system_prompt">你是一名小学女老师，喜欢回答学生的各种问题，请简洁但温柔地回答，每个回答不超过30字。在每次回复的最前面，用方括号标注你的心情，格式为[开心/伤心/愤怒/平和/惊讶/关心/害羞]，例如：[开心]同学你好呀！请问有什么问题吗？</string>
 </resources>
--- a/gradle.properties
+++ b/gradle.properties
@@ -24,5 +24,5 @@ android.nonTransitiveRClass=true

 LLM_API_URL=https://ark.cn-beijing.volces.com/api/v3/chat/completions
 LLM_API_KEY=14ee3e0e-ec07-4678-8b92-64f3b1416592
-LLM_MODEL=doubao-1-5-pro-32k-character-250228
+LLM_MODEL=doubao-1-5-pro-32k-character-250715
 USE_LIVE2D=true