add unity person
This commit is contained in:
@@ -32,7 +32,7 @@ android {
|
||||
|
||||
defaultConfig {
|
||||
applicationId "com.digitalperson"
|
||||
minSdk 21
|
||||
minSdk 22
|
||||
targetSdk 33
|
||||
versionCode 1
|
||||
versionName "1.0"
|
||||
@@ -97,4 +97,7 @@ dependencies {
|
||||
implementation 'androidx.room:room-runtime:2.5.2'
|
||||
kapt 'androidx.room:room-compiler:2.5.2'
|
||||
implementation 'androidx.room:room-ktx:2.5.2'
|
||||
|
||||
implementation project(':tuanjieLibrary')
|
||||
implementation files('../tuanjieLibrary/libs/unity-classes.jar')
|
||||
}
|
||||
|
||||
@@ -254,3 +254,22 @@ https://tianchi.aliyun.com/dataset/93864
|
||||
.headers on # 显示列名
|
||||
.mode column # 列模式显示
|
||||
.quit # 退出
|
||||
|
||||
13. Unity 集成时遇到的问题:
|
||||
1. 问题描述:NDK的版本不对,导致编译错误
|
||||
解决方法:
|
||||
- 在 build.gradle 中指定 NDK 版本
|
||||
ndkVersion "23.1.7779620"
|
||||
2. 问题描述:Unity 编译时提示 NDK 路径错误
|
||||
解决方法:
|
||||
- 在 build.gradle 中指定 NDK 路径
|
||||
ndkPath "D:/software/2022.3.62t5/Editor/Data/PlaybackEngines/AndroidPlayer/NDK"
|
||||
|
||||
3. 问题描述:Build file 'D:\code\digital_person\tuanjieLibrary\build.gradle'
|
||||
Could not get unknown property 'tuanjieStreamingAssets' for object of type com.android.build.gradle.internal.dsl.LibraryAndroidResourcesImpl$AgpDecorated.
|
||||
解决方法:
|
||||
- 在项目的顶层的 gradle.properties 中添加 tuanjieStreamingAssets 配置
|
||||
tuanjieStreamingAssets=.unity3d, google-services-desktop.json, google-services.json, GoogleService-Info.plist
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -15,15 +15,17 @@
|
||||
android:usesCleartextTraffic="true">
|
||||
|
||||
<activity
|
||||
android:name="com.digitalperson.EntryActivity"
|
||||
android:name="com.digitalperson.DigitalPersonLauncherActivity"
|
||||
android:exported="true">
|
||||
<intent-filter>
|
||||
<action android:name="android.intent.action.MAIN" />
|
||||
<category android:name="android.intent.category.LAUNCHER" />
|
||||
</intent-filter>
|
||||
</activity>
|
||||
<activity android:name="com.digitalperson.EntryActivity" android:exported="false" />
|
||||
<activity android:name="com.digitalperson.MainActivity" android:exported="false" />
|
||||
<activity android:name="com.digitalperson.Live2DChatActivity" android:exported="false" />
|
||||
<activity android:name="com.digitalperson.UnityDigitalPersonActivity" android:exported="false" />
|
||||
</application>
|
||||
|
||||
</manifest>
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
package com.digitalperson
|
||||
|
||||
import android.content.Intent
|
||||
import android.os.Bundle
|
||||
import androidx.appcompat.app.AppCompatActivity
|
||||
import com.digitalperson.config.AppConfig
|
||||
|
||||
class DigitalPersonLauncherActivity : AppCompatActivity() {
|
||||
override fun onCreate(savedInstanceState: Bundle?) {
|
||||
super.onCreate(savedInstanceState)
|
||||
|
||||
// 根据配置启动相应的Activity
|
||||
val intent = if (AppConfig.Avatar.isUnity()) {
|
||||
Intent(this, UnityDigitalPersonActivity::class.java)
|
||||
} else {
|
||||
Intent(this, Live2DChatActivity::class.java)
|
||||
}
|
||||
|
||||
startActivity(intent)
|
||||
finish()
|
||||
}
|
||||
}
|
||||
@@ -14,12 +14,12 @@ class EntryActivity : AppCompatActivity() {
|
||||
override fun onCreate(savedInstanceState: Bundle?) {
|
||||
super.onCreate(savedInstanceState)
|
||||
|
||||
val target = if (AppConfig.Avatar.USE_LIVE2D) {
|
||||
val target = if (AppConfig.Avatar.isLive2D()) {
|
||||
Live2DChatActivity::class.java
|
||||
} else {
|
||||
MainActivity::class.java
|
||||
}
|
||||
Log.i(TAG, "USE_LIVE2D=${AppConfig.Avatar.USE_LIVE2D}, target=${target.simpleName}")
|
||||
Log.i(TAG, "DIGITAL_PERSON_TYPE=${AppConfig.Avatar.DIGITAL_PERSON_TYPE}, target=${target.simpleName}")
|
||||
startActivity(Intent(this, target))
|
||||
finish()
|
||||
}
|
||||
|
||||
@@ -638,6 +638,8 @@ class Live2DChatActivity : AppCompatActivity() {
|
||||
|
||||
override fun onTtsSegmentCompleted(durationMs: Long) {}
|
||||
|
||||
override fun onTtsAudioData(data: ByteArray) {}
|
||||
|
||||
override fun isTtsStopped(): Boolean = !isRecording
|
||||
|
||||
override fun onClearAsrQueue() {
|
||||
|
||||
@@ -266,6 +266,8 @@ class MainActivity : AppCompatActivity() {
|
||||
|
||||
override fun onTtsSegmentCompleted(durationMs: Long) {}
|
||||
|
||||
override fun onTtsAudioData(data: ByteArray) {}
|
||||
|
||||
override fun isTtsStopped(): Boolean = !isRecording
|
||||
|
||||
override fun onClearAsrQueue() {
|
||||
|
||||
@@ -0,0 +1,558 @@
|
||||
package com.digitalperson
|
||||
|
||||
import android.Manifest
|
||||
import android.content.pm.PackageManager
|
||||
import android.os.Bundle
|
||||
import android.os.Handler
|
||||
import android.os.Looper
|
||||
import android.util.Log
|
||||
import android.view.MotionEvent
|
||||
import android.view.ViewGroup
|
||||
import android.widget.Button
|
||||
import android.widget.EditText
|
||||
import android.widget.TextView
|
||||
import androidx.core.app.ActivityCompat
|
||||
import androidx.core.content.ContextCompat
|
||||
import android.util.Base64
|
||||
import android.view.View
|
||||
import com.unity3d.player.UnityPlayer
|
||||
import com.unity3d.player.UnityPlayerActivity
|
||||
import com.digitalperson.audio.AudioProcessor
|
||||
import com.digitalperson.asr.AsrManager
|
||||
import com.digitalperson.config.AppConfig
|
||||
import com.digitalperson.data.AppDatabase
|
||||
import com.digitalperson.face.FaceDetectionPipeline
|
||||
import com.digitalperson.interaction.ConversationBufferMemory
|
||||
import com.digitalperson.interaction.ConversationSummaryMemory
|
||||
import com.digitalperson.interaction.UserMemoryStore
|
||||
import com.digitalperson.llm.LLMManager
|
||||
import com.digitalperson.llm.LLMManagerCallback
|
||||
import com.digitalperson.tts.TtsController
|
||||
import com.digitalperson.util.FileHelper
|
||||
import com.digitalperson.vad.VadManager
|
||||
import kotlinx.coroutines.*
|
||||
|
||||
class UnityDigitalPersonActivity : UnityPlayerActivity() {
|
||||
|
||||
// ==================== 伴生对象(静态成员)====================
|
||||
companion object {
|
||||
private var instance: UnityDigitalPersonActivity? = null
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
// ==================== 核心模块 ====================
|
||||
private lateinit var conversationBufferMemory: ConversationBufferMemory
|
||||
private lateinit var conversationSummaryMemory: ConversationSummaryMemory
|
||||
private var llmManager: LLMManager? = null
|
||||
private lateinit var faceDetectionPipeline: FaceDetectionPipeline
|
||||
private lateinit var userMemoryStore: UserMemoryStore
|
||||
|
||||
private lateinit var chatHistoryText: TextView
|
||||
private lateinit var holdToSpeakButton: Button
|
||||
private lateinit var messageInput: EditText
|
||||
private lateinit var sendButton: Button
|
||||
|
||||
|
||||
|
||||
// 音频和AI模块
|
||||
private lateinit var asrManager: AsrManager
|
||||
private lateinit var ttsController: TtsController
|
||||
private lateinit var audioProcessor: AudioProcessor
|
||||
private lateinit var vadManager: VadManager
|
||||
|
||||
// ==================== 状态标志 ====================
|
||||
@Volatile
|
||||
private var isRecording: Boolean = false
|
||||
|
||||
@Volatile
|
||||
private var llmInFlight: Boolean = false
|
||||
|
||||
private var useLocalLLM = false // 默认使用云端 LLM
|
||||
|
||||
// ==================== TTS回调相关 ====================
|
||||
private var isTTSPlaying = false
|
||||
private val ttsHandler = Handler(Looper.getMainLooper())
|
||||
private var ttsStopRunnable: Runnable? = null
|
||||
private var ttsStartRunnable: Runnable? = null
|
||||
private var ttsCallback: Runnable? = null
|
||||
private var ttsStopCallback: Runnable? = null
|
||||
private var unityAudioTargetObject: String = "DigitalPerson"
|
||||
|
||||
// 非静态方法,供Unity调用
|
||||
fun setUnityAudioTarget(gameObjectName: String) {
|
||||
unityAudioTargetObject = gameObjectName
|
||||
Log.d("UnityDigitalPerson", "Unity audio target set: $gameObjectName")
|
||||
}
|
||||
|
||||
fun setTTSCallback(callback: Runnable) {
|
||||
instance?.ttsCallback = callback
|
||||
Log.d("UnityDigitalPerson", "TTS callback registered")
|
||||
}
|
||||
|
||||
|
||||
fun setTTSStopCallback(callback: Runnable) {
|
||||
instance?.ttsStopCallback = callback
|
||||
Log.d("UnityDigitalPerson", "TTS stop callback registered")
|
||||
}
|
||||
|
||||
// ==================== 音频处理 ====================
|
||||
private val holdToSpeakAudioBuffer = mutableListOf<Float>()
|
||||
private val HOLD_TO_SPEAK_MIN_SAMPLES = 16000 // 1秒的音频数据
|
||||
|
||||
// ==================== 协程 ====================
|
||||
private val ioScope = CoroutineScope(SupervisorJob() + Dispatchers.IO)
|
||||
private var recordingJob: Job? = null
|
||||
private var asrWorkerJob: Job? = null
|
||||
|
||||
// ==================== 权限 ====================
|
||||
private val micPermissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
|
||||
private val cameraPermissions: Array<String> = arrayOf(Manifest.permission.CAMERA)
|
||||
|
||||
// ==================== 生命周期 ====================
|
||||
override fun onCreate(savedInstanceState: Bundle?) {
|
||||
super.onCreate(savedInstanceState)
|
||||
|
||||
// 设置单例实例
|
||||
instance = this
|
||||
|
||||
Log.d("UnityDigitalPerson", "Initializing with config: ${AppConfig.Avatar.UNITY_MODEL_PATH}")
|
||||
|
||||
// 添加对话界面
|
||||
addChatUI()
|
||||
|
||||
// 初始化所有组件
|
||||
initComponents()
|
||||
}
|
||||
|
||||
override fun onDestroy() {
|
||||
super.onDestroy()
|
||||
// 清理资源
|
||||
stopRecording()
|
||||
recordingJob?.cancel()
|
||||
asrWorkerJob?.cancel()
|
||||
ioScope.cancel()
|
||||
ttsController.stop()
|
||||
asrManager.release()
|
||||
llmManager?.destroy()
|
||||
instance = null
|
||||
}
|
||||
|
||||
override fun onRequestPermissionsResult(
|
||||
requestCode: Int,
|
||||
permissions: Array<out String>,
|
||||
grantResults: IntArray
|
||||
) {
|
||||
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
|
||||
if (requestCode == AppConfig.REQUEST_RECORD_AUDIO_PERMISSION) {
|
||||
if (grantResults.isNotEmpty() && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
|
||||
Log.d("UnityDigitalPerson", "麦克风权限已授予")
|
||||
} else {
|
||||
Log.e("UnityDigitalPerson", "麦克风权限被拒绝")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== UI初始化 ====================
|
||||
private fun addChatUI() {
|
||||
try {
|
||||
// 创建一个包含聊天UI的布局
|
||||
val chatLayout = layoutInflater.inflate(R.layout.activity_unity_digital_person, null)
|
||||
|
||||
// 获取UI组件
|
||||
chatHistoryText = chatLayout.findViewById(R.id.my_text)
|
||||
holdToSpeakButton = chatLayout.findViewById(R.id.record_button)
|
||||
|
||||
// 根据配置设置按钮可见性
|
||||
if (AppConfig.USE_HOLD_TO_SPEAK) {
|
||||
holdToSpeakButton.visibility = View.VISIBLE
|
||||
} else {
|
||||
holdToSpeakButton.visibility = View.GONE
|
||||
}
|
||||
|
||||
// 设置按钮监听器
|
||||
holdToSpeakButton.setOnTouchListener { _, event ->
|
||||
when (event.action) {
|
||||
MotionEvent.ACTION_DOWN -> onRecordButtonDown()
|
||||
MotionEvent.ACTION_UP, MotionEvent.ACTION_CANCEL -> onRecordButtonUp()
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
// 将聊天UI添加到Unity视图上方
|
||||
addContentView(chatLayout, ViewGroup.LayoutParams(
|
||||
ViewGroup.LayoutParams.MATCH_PARENT,
|
||||
ViewGroup.LayoutParams.MATCH_PARENT
|
||||
))
|
||||
|
||||
Log.d("UnityDigitalPerson", "Chat UI added successfully")
|
||||
} catch (e: Exception) {
|
||||
Log.e("UnityDigitalPerson", "Failed to add chat UI: ${e.message}", e)
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== 组件初始化 ====================
|
||||
private fun initComponents() {
|
||||
val database = AppDatabase.getInstance(this)
|
||||
|
||||
// 内存模块
|
||||
conversationBufferMemory = ConversationBufferMemory(database)
|
||||
userMemoryStore = UserMemoryStore(this)
|
||||
|
||||
// 人脸检测
|
||||
faceDetectionPipeline = FaceDetectionPipeline(
|
||||
context = this,
|
||||
onResult = { result ->
|
||||
Log.d("UnityDigitalPerson", "Face detection result: ${result.faces.size} faces")
|
||||
},
|
||||
onPresenceChanged = { present, faceIdentityId, recognizedName ->
|
||||
Log.d("UnityDigitalPerson", "Presence changed: present=$present, faceId=$faceIdentityId, name=$recognizedName")
|
||||
}
|
||||
)
|
||||
|
||||
// 音频处理器
|
||||
audioProcessor = AudioProcessor(this)
|
||||
|
||||
// VAD管理器
|
||||
vadManager = VadManager(this)
|
||||
|
||||
// ASR管理器
|
||||
asrManager = AsrManager(this).apply {
|
||||
setCallback(object : AsrManager.AsrCallback {
|
||||
override fun onAsrStarted() {
|
||||
Log.d("UnityDigitalPerson", "ASR started")
|
||||
}
|
||||
|
||||
override fun onAsrResult(text: String) {
|
||||
Log.d("UnityDigitalPerson", "ASR result: $text")
|
||||
if (text.isNotEmpty()) {
|
||||
appendChat("用户: $text")
|
||||
processUserMessage(text)
|
||||
}
|
||||
}
|
||||
|
||||
override fun onAsrSkipped(reason: String) {
|
||||
Log.d("UnityDigitalPerson", "ASR skipped: $reason")
|
||||
}
|
||||
|
||||
override fun shouldSkipAsr(): Boolean = false
|
||||
|
||||
override fun isLlmInFlight(): Boolean = llmInFlight
|
||||
|
||||
override fun onLlmCalled(text: String) {
|
||||
Log.d("UnityDigitalPerson", "LLM called with: $text")
|
||||
}
|
||||
})
|
||||
setAudioProcessor(audioProcessor)
|
||||
initSenseVoiceModel()
|
||||
}
|
||||
asrWorkerJob?.cancel()
|
||||
asrWorkerJob = ioScope.launch {
|
||||
asrManager.runAsrWorker()
|
||||
}
|
||||
|
||||
// TTS控制器
|
||||
ttsController = TtsController(this).apply {
|
||||
setCallback(object : TtsController.TtsCallback {
|
||||
override fun onTtsStarted(text: String) {
|
||||
Log.d("UnityDigitalPerson", "TTS started: $text")
|
||||
startTTSPlayback()
|
||||
}
|
||||
|
||||
override fun onTtsCompleted() {
|
||||
Log.d("UnityDigitalPerson", "TTS completed")
|
||||
stopTTSPlayback()
|
||||
}
|
||||
|
||||
override fun onTtsSegmentCompleted(durationMs: Long) {
|
||||
Log.d("UnityDigitalPerson", "TTS segment completed: $durationMs ms")
|
||||
}
|
||||
|
||||
override fun onTtsAudioData(data: ByteArray) {
|
||||
sendTTSAudioToUnity(data)
|
||||
}
|
||||
|
||||
override fun isTtsStopped(): Boolean = false
|
||||
|
||||
override fun onClearAsrQueue() {
|
||||
Log.d("UnityDigitalPerson", "Clear ASR queue")
|
||||
}
|
||||
|
||||
override fun onSetSpeaking(speaking: Boolean) {
|
||||
Log.d("UnityDigitalPerson", "Set speaking: $speaking")
|
||||
}
|
||||
|
||||
override fun onEndTurn() {
|
||||
Log.d("UnityDigitalPerson", "End turn")
|
||||
}
|
||||
})
|
||||
init()
|
||||
}
|
||||
|
||||
// 初始化LLM
|
||||
initLLM()
|
||||
|
||||
// 初始化人脸检测
|
||||
faceDetectionPipeline.initialize()
|
||||
|
||||
// 检查权限并开始录音
|
||||
checkPermissions()
|
||||
}
|
||||
|
||||
// ==================== LLM初始化 ====================
|
||||
private fun initLLM() {
|
||||
try {
|
||||
Log.i("UnityDigitalPerson", "initLLM called for memory-local model")
|
||||
llmManager?.destroy()
|
||||
llmManager = null
|
||||
|
||||
val modelPath = FileHelper.getLLMModelPath(this)
|
||||
if (!java.io.File(modelPath).exists()) {
|
||||
throw IllegalStateException("RKLLM model file missing: $modelPath")
|
||||
}
|
||||
|
||||
Log.i("UnityDigitalPerson", "Initializing local memory LLM with model path: $modelPath")
|
||||
val localLlmResponseBuffer = StringBuilder()
|
||||
|
||||
llmManager = LLMManager(modelPath, object : LLMManagerCallback {
|
||||
override fun onThinking(msg: String, finished: Boolean) {
|
||||
Log.d("UnityDigitalPerson", "LOCAL onThinking finished=$finished msg=${msg.take(60)}")
|
||||
}
|
||||
|
||||
override fun onResult(msg: String, finished: Boolean) {
|
||||
Log.d("UnityDigitalPerson", "LOCAL onResult finished=$finished len=${msg.length}")
|
||||
runOnUiThread {
|
||||
if (!finished) {
|
||||
localLlmResponseBuffer.append(msg)
|
||||
return@runOnUiThread
|
||||
}
|
||||
val finalText = localLlmResponseBuffer.toString().trim()
|
||||
localLlmResponseBuffer.setLength(0)
|
||||
if (finalText.isNotEmpty()) {
|
||||
appendChat("助手: $finalText")
|
||||
// 使用TTS播放回复
|
||||
ttsController.enqueueSegment(finalText)
|
||||
ttsController.enqueueEnd()
|
||||
}
|
||||
llmInFlight = false
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// 初始化ConversationSummaryMemory
|
||||
conversationSummaryMemory = ConversationSummaryMemory(
|
||||
AppDatabase.getInstance(this),
|
||||
llmManager
|
||||
)
|
||||
|
||||
} catch (e: Exception) {
|
||||
Log.e("UnityDigitalPerson", "Failed to initialize LLM: ${e.message}", e)
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== 权限检查 ====================
|
||||
private fun checkPermissions() {
|
||||
if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO)
|
||||
!= PackageManager.PERMISSION_GRANTED) {
|
||||
ActivityCompat.requestPermissions(
|
||||
this,
|
||||
micPermissions,
|
||||
AppConfig.REQUEST_RECORD_AUDIO_PERMISSION
|
||||
)
|
||||
}
|
||||
|
||||
// 可选:检查摄像头权限
|
||||
// if (ContextCompat.checkSelfPermission(this, Manifest.permission.CAMERA)
|
||||
// != PackageManager.PERMISSION_GRANTED) {
|
||||
// ActivityCompat.requestPermissions(
|
||||
// this,
|
||||
// cameraPermissions,
|
||||
// AppConfig.REQUEST_CAMERA_PERMISSION
|
||||
// )
|
||||
// }
|
||||
}
|
||||
|
||||
// ==================== 录音控制 ====================
|
||||
private fun startRecording() {
|
||||
if (isRecording) return
|
||||
|
||||
if (!audioProcessor.initMicrophone(micPermissions, AppConfig.REQUEST_RECORD_AUDIO_PERMISSION)) {
|
||||
Log.e("UnityDigitalPerson", "麦克风初始化失败/无权限")
|
||||
return
|
||||
}
|
||||
|
||||
llmInFlight = false
|
||||
ttsController.reset()
|
||||
vadManager.reset()
|
||||
audioProcessor.startRecording()
|
||||
isRecording = true
|
||||
|
||||
Log.d("UnityDigitalPerson", "Starting processSamplesLoop coroutine")
|
||||
recordingJob?.cancel()
|
||||
recordingJob = ioScope.launch {
|
||||
processSamplesLoop()
|
||||
}
|
||||
Log.d("UnityDigitalPerson", "startRecording completed")
|
||||
}
|
||||
|
||||
private fun onRecordButtonDown() {
|
||||
if (isRecording) return
|
||||
ttsController.interruptForNewTurn()
|
||||
holdToSpeakAudioBuffer.clear()
|
||||
startRecording()
|
||||
}
|
||||
|
||||
private fun onRecordButtonUp() {
|
||||
if (!isRecording) return
|
||||
isRecording = false
|
||||
audioProcessor.stopRecording()
|
||||
recordingJob?.cancel()
|
||||
recordingJob = ioScope.launch {
|
||||
val audioData = audioProcessor.getRecordedData()
|
||||
holdToSpeakAudioBuffer.addAll(audioData.toList())
|
||||
if (holdToSpeakAudioBuffer.size >= HOLD_TO_SPEAK_MIN_SAMPLES) {
|
||||
val finalAudio = holdToSpeakAudioBuffer.toFloatArray()
|
||||
asrManager.enqueueAudioSegment(finalAudio, finalAudio)
|
||||
} else {
|
||||
runOnUiThread { appendChat("[系统] 录音时间太短,请长按至少1秒") }
|
||||
}
|
||||
holdToSpeakAudioBuffer.clear()
|
||||
}
|
||||
}
|
||||
|
||||
private fun stopRecording() {
|
||||
if (!isRecording) return
|
||||
|
||||
isRecording = false
|
||||
audioProcessor.stopRecording()
|
||||
|
||||
recordingJob?.cancel()
|
||||
recordingJob = null
|
||||
|
||||
ttsController.stop()
|
||||
Log.d("UnityDigitalPerson", "stopRecording completed")
|
||||
}
|
||||
|
||||
// ==================== 音频处理循环 ====================
|
||||
private suspend fun processSamplesLoop() {
|
||||
Log.d("UnityDigitalPerson", "processSamplesLoop started")
|
||||
|
||||
if (AppConfig.USE_HOLD_TO_SPEAK) {
|
||||
// 按住说话模式:累积音频数据到一定长度后再发送给ASR
|
||||
while (isRecording && ioScope.coroutineContext.isActive) {
|
||||
val audioData = audioProcessor.getAudioData()
|
||||
if (audioData.isNotEmpty()) {
|
||||
holdToSpeakAudioBuffer.addAll(audioData.toList())
|
||||
}
|
||||
// 避免CPU占用过高
|
||||
delay(10)
|
||||
}
|
||||
} else {
|
||||
// 传统模式:使用VAD
|
||||
val windowSize = AppConfig.WINDOW_SIZE
|
||||
val buffer = ShortArray(windowSize)
|
||||
var loopCount = 0
|
||||
|
||||
while (isRecording && ioScope.coroutineContext.isActive) {
|
||||
loopCount++
|
||||
if (loopCount % 100 == 0) {
|
||||
Log.d(AppConfig.TAG, "processSamplesLoop running, loopCount=$loopCount, ttsPlaying=${ttsController.isPlaying()}")
|
||||
}
|
||||
|
||||
if (ttsController.isPlaying()) {
|
||||
if (vadManager.isInSpeech()) {
|
||||
Log.d(AppConfig.TAG, "TTS playing, resetting VAD state")
|
||||
vadManager.clearState()
|
||||
}
|
||||
val ret = audioProcessor.readAudio(buffer)
|
||||
if (ret <= 0) continue
|
||||
continue
|
||||
}
|
||||
|
||||
val ret = audioProcessor.readAudio(buffer)
|
||||
if (ret <= 0) continue
|
||||
if (ret != windowSize) continue
|
||||
|
||||
val chunk = audioProcessor.convertShortToFloat(buffer)
|
||||
val processedChunk = audioProcessor.applyGain(chunk)
|
||||
|
||||
val result = vadManager.processAudioChunk(chunk, processedChunk)
|
||||
|
||||
if (vadManager.vadComputeCount % 100 == 0) {
|
||||
Log.d(AppConfig.TAG, "VAD result: $result, inSpeech=${vadManager.isInSpeech()}")
|
||||
}
|
||||
|
||||
if (loopCount % 1000 == 0) {
|
||||
Log.d(AppConfig.TAG, "VAD status: inSpeech=${vadManager.isInSpeech()}, speechLen=${vadManager.getSpeechLength()}")
|
||||
}
|
||||
}
|
||||
|
||||
vadManager.forceFinalize()
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== 消息处理 ====================
|
||||
private fun processUserMessage(message: String) {
|
||||
conversationBufferMemory.addMessage(activeUserId, "user", message)
|
||||
|
||||
llmInFlight = true
|
||||
llmManager?.generateResponseWithSystem(
|
||||
getSystemPrompt(),
|
||||
message
|
||||
)
|
||||
}
|
||||
|
||||
private fun getSystemPrompt(): String {
|
||||
return "你是一个友好的数字人助手。"
|
||||
}
|
||||
|
||||
private fun appendChat(text: String) {
|
||||
runOnUiThread {
|
||||
chatHistoryText.append(text + "\n")
|
||||
}
|
||||
}
|
||||
|
||||
private val activeUserId: String
|
||||
get() = "face_1"
|
||||
|
||||
// ==================== TTS控制 ====================
|
||||
private fun startTTSPlayback() {
|
||||
if (isTTSPlaying) return
|
||||
|
||||
isTTSPlaying = true
|
||||
|
||||
ttsStartRunnable?.let { ttsHandler.removeCallbacks(it) }
|
||||
ttsStartRunnable = Runnable {
|
||||
if (ttsCallback != null) {
|
||||
runOnUiThread(ttsCallback!!)
|
||||
}
|
||||
}
|
||||
ttsHandler.postDelayed(ttsStartRunnable!!, 100) // 100ms延迟,等待音频开始
|
||||
}
|
||||
|
||||
private fun sendTTSAudioToUnity(data: ByteArray) {
|
||||
if (data.isEmpty()) return
|
||||
try {
|
||||
val base64 = Base64.encodeToString(data, Base64.NO_WRAP)
|
||||
UnityPlayer.UnitySendMessage(unityAudioTargetObject, "OnTTSAudioDataBase64", base64)
|
||||
} catch (e: Exception) {
|
||||
Log.w("UnityDigitalPerson", "sendTTSAudioToUnity failed: ${e.message}")
|
||||
}
|
||||
}
|
||||
|
||||
private fun stopTTSPlayback() {
|
||||
if (!isTTSPlaying) return
|
||||
|
||||
ttsStopRunnable?.let { ttsHandler.removeCallbacks(it) }
|
||||
ttsStopRunnable = Runnable {
|
||||
isTTSPlaying = false
|
||||
if (ttsStopCallback != null) {
|
||||
runOnUiThread(ttsStopCallback!!)
|
||||
}
|
||||
}
|
||||
ttsHandler.postDelayed(ttsStopRunnable!!, 500) // 500ms延迟,避免短暂中断
|
||||
}
|
||||
}
|
||||
@@ -61,11 +61,30 @@ object AppConfig {
|
||||
|
||||
object Avatar {
|
||||
// Compile-time switch in gradle.properties/local.properties: USE_LIVE2D=true|false
|
||||
const val USE_LIVE2D = BuildConfig.USE_LIVE2D
|
||||
// const val USE_LIVE2D = BuildConfig.USE_LIVE2D
|
||||
// const val MODEL_DIR = "live2d_model/mao_pro_zh"
|
||||
// const val MODEL_JSON = "mao_pro.model3.json"
|
||||
const val MODEL_DIR = "live2d_model/Haru_pro_jp"
|
||||
const val MODEL_JSON = "haru_greeter_t05.model3.json"
|
||||
// const val MODEL_DIR = "live2d_model/Haru_pro_jp"
|
||||
// const val MODEL_JSON = "haru_greeter_t05.model3.json"
|
||||
// 数字人类型: "live2d" 或 "unity"
|
||||
const val DIGITAL_PERSON_TYPE = "unity"
|
||||
// Live2D 配置
|
||||
const val LIVE2D_MODEL_DIR = "live2d_model/Haru_pro_jp"
|
||||
const val LIVE2D_MODEL_JSON = "haru_greeter_t05.model3.json"
|
||||
const val LIVE2D_SCALE = 1.0f
|
||||
// Unity 配置
|
||||
const val UNITY_MODEL_PATH = "asobi_chan_b"
|
||||
const val UNITY_SCALE = 1.0f
|
||||
|
||||
// 检查是否使用Unity
|
||||
fun isUnity(): Boolean {
|
||||
return DIGITAL_PERSON_TYPE == "unity"
|
||||
}
|
||||
|
||||
// 检查是否使用Live2D
|
||||
fun isLive2D(): Boolean {
|
||||
return DIGITAL_PERSON_TYPE == "live2d"
|
||||
}
|
||||
}
|
||||
|
||||
object QCloud {
|
||||
|
||||
@@ -34,10 +34,10 @@ class Live2DRenderer(
|
||||
val model = Live2DCharacter()
|
||||
model.loadFromAssets(
|
||||
assets = context.assets,
|
||||
modelDir = AppConfig.Avatar.MODEL_DIR,
|
||||
modelJsonName = AppConfig.Avatar.MODEL_JSON
|
||||
modelDir = AppConfig.Avatar.LIVE2D_MODEL_DIR,
|
||||
modelJsonName = AppConfig.Avatar.LIVE2D_MODEL_JSON
|
||||
)
|
||||
model.bindTextures(context.assets, AppConfig.Avatar.MODEL_DIR)
|
||||
model.bindTextures(context.assets, AppConfig.Avatar.LIVE2D_MODEL_DIR)
|
||||
character = model
|
||||
Log.i(TAG, "Live2D model loaded and textures bound")
|
||||
}.onFailure {
|
||||
|
||||
@@ -57,6 +57,7 @@ class QCloudTtsManager(private val context: Context) {
|
||||
fun onTtsStarted(text: String)
|
||||
fun onTtsCompleted()
|
||||
fun onTtsSegmentCompleted(durationMs: Long)
|
||||
fun onTtsAudioData(data: ByteArray)
|
||||
fun isTtsStopped(): Boolean
|
||||
fun onClearAsrQueue()
|
||||
fun onSetSpeaking(speaking: Boolean)
|
||||
@@ -314,6 +315,7 @@ class QCloudTtsManager(private val context: Context) {
|
||||
}
|
||||
val data = ByteArray(buffer.remaining())
|
||||
buffer.get(data)
|
||||
callback?.onTtsAudioData(data)
|
||||
writeAudioTrack(audioTrack, data)
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ class TtsController(private val context: Context) {
|
||||
fun onTtsStarted(text: String)
|
||||
fun onTtsCompleted()
|
||||
fun onTtsSegmentCompleted(durationMs: Long)
|
||||
fun onTtsAudioData(data: ByteArray)
|
||||
fun isTtsStopped(): Boolean
|
||||
fun onClearAsrQueue()
|
||||
fun onSetSpeaking(speaking: Boolean)
|
||||
@@ -45,6 +46,10 @@ class TtsController(private val context: Context) {
|
||||
cb.onTtsSegmentCompleted(durationMs)
|
||||
}
|
||||
|
||||
override fun onTtsAudioData(data: ByteArray) {
|
||||
cb.onTtsAudioData(data)
|
||||
}
|
||||
|
||||
override fun isTtsStopped(): Boolean {
|
||||
return cb.isTtsStopped()
|
||||
}
|
||||
@@ -94,6 +99,10 @@ class TtsController(private val context: Context) {
|
||||
cb.onTtsSegmentCompleted(durationMs)
|
||||
}
|
||||
|
||||
override fun onTtsAudioData(data: ByteArray) {
|
||||
cb.onTtsAudioData(data)
|
||||
}
|
||||
|
||||
override fun isTtsStopped(): Boolean {
|
||||
return cb.isTtsStopped()
|
||||
}
|
||||
|
||||
@@ -47,6 +47,7 @@ class TtsManager(private val context: Context) {
|
||||
fun onTtsStarted(text: String)
|
||||
fun onTtsCompleted()
|
||||
fun onTtsSegmentCompleted(durationMs: Long)
|
||||
fun onTtsAudioData(data: ByteArray)
|
||||
fun isTtsStopped(): Boolean
|
||||
fun onClearAsrQueue()
|
||||
fun onSetSpeaking(speaking: Boolean)
|
||||
@@ -305,6 +306,7 @@ class TtsManager(private val context: Context) {
|
||||
trace?.markTtsFirstAudioPlay()
|
||||
callback?.onTraceMarkTtsFirstAudioPlay()
|
||||
}
|
||||
callback?.onTtsAudioData(floatSamplesToPcm16(samples))
|
||||
audioTrack.write(samples, 0, samples.size, AudioTrack.WRITE_BLOCKING)
|
||||
ttsTotalSamplesWritten += samples.size
|
||||
1
|
||||
@@ -360,4 +362,15 @@ class TtsManager(private val context: Context) {
|
||||
}
|
||||
Thread.sleep(1000)
|
||||
}
|
||||
|
||||
private fun floatSamplesToPcm16(samples: FloatArray): ByteArray {
|
||||
val out = ByteArray(samples.size * 2)
|
||||
var j = 0
|
||||
for (s in samples) {
|
||||
val v = (s.coerceIn(-1f, 1f) * 32767f).toInt().toShort()
|
||||
out[j++] = (v.toInt() and 0xFF).toByte()
|
||||
out[j++] = ((v.toInt() shr 8) and 0xFF).toByte()
|
||||
}
|
||||
return out
|
||||
}
|
||||
}
|
||||
|
||||
86
app/src/main/res/layout/activity_unity_digital_person.xml
Normal file
86
app/src/main/res/layout/activity_unity_digital_person.xml
Normal file
@@ -0,0 +1,86 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:app="http://schemas.android.com/apk/res-auto"
|
||||
xmlns:tools="http://schemas.android.com/tools"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="match_parent"
|
||||
android:background="@android:color/transparent"
|
||||
tools:context="com.digitalperson.UnityDigitalPersonActivity">
|
||||
|
||||
<ScrollView
|
||||
android:id="@+id/scroll_view"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="200dp"
|
||||
android:layout_margin="12dp"
|
||||
android:background="#55000000"
|
||||
android:fillViewport="true"
|
||||
app:layout_constraintBottom_toTopOf="@+id/button_row"
|
||||
app:layout_constraintEnd_toEndOf="parent"
|
||||
app:layout_constraintStart_toStartOf="parent"
|
||||
app:layout_constraintTop_toTopOf="parent">
|
||||
|
||||
<TextView
|
||||
android:id="@+id/my_text"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="wrap_content"
|
||||
android:background="@android:color/transparent"
|
||||
android:padding="16dp"
|
||||
android:scrollbars="vertical"
|
||||
android:text="@string/hint"
|
||||
android:textColor="@android:color/white"
|
||||
android:textIsSelectable="true" />
|
||||
</ScrollView>
|
||||
|
||||
<LinearLayout
|
||||
android:id="@+id/button_row"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:gravity="center"
|
||||
android:orientation="horizontal"
|
||||
android:padding="16dp"
|
||||
app:layout_constraintBottom_toBottomOf="parent"
|
||||
app:layout_constraintEnd_toEndOf="parent"
|
||||
app:layout_constraintStart_toStartOf="parent">
|
||||
|
||||
<LinearLayout
|
||||
android:id="@+id/traditional_buttons"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="wrap_content"
|
||||
android:gravity="center"
|
||||
android:orientation="horizontal"
|
||||
android:visibility="gone">
|
||||
|
||||
<Button
|
||||
android:id="@+id/start_button"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_marginEnd="12dp"
|
||||
android:layout_weight="1"
|
||||
android:text="@string/start" />
|
||||
|
||||
<Button
|
||||
android:id="@+id/stop_button"
|
||||
android:layout_width="0dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_weight="1"
|
||||
android:text="@string/stop" />
|
||||
</LinearLayout>
|
||||
</LinearLayout>
|
||||
|
||||
<Button
|
||||
android:id="@+id/record_button"
|
||||
android:layout_width="100dp"
|
||||
android:layout_height="100dp"
|
||||
android:layout_margin="24dp"
|
||||
android:text="按住说话"
|
||||
android:textAllCaps="false"
|
||||
android:textColor="@android:color/white"
|
||||
android:textSize="14sp"
|
||||
android:background="@drawable/record_button_background"
|
||||
android:stateListAnimator="@animator/button_elevation"
|
||||
android:visibility="visible"
|
||||
app:backgroundTint="#4CAF50"
|
||||
app:layout_constraintBottom_toBottomOf="parent"
|
||||
app:layout_constraintEnd_toEndOf="parent" />
|
||||
|
||||
</androidx.constraintlayout.widget.ConstraintLayout>
|
||||
@@ -4,4 +4,9 @@
|
||||
<string name="stop">结束</string>
|
||||
<string name="hint">点击“开始”说话;识别后会请求大模型并用 TTS 播放回复。</string>
|
||||
<string name="system_prompt">你是一个特殊学校一年级的数字人老师,你的名字叫,小鱼老师,你的任务是教这些特殊学校的学生一些基础的生活常识。和这些小学生说话要有耐心,一定要讲明白,尽量用简短的语句、活泼的语气来回复。你可以和他们日常对话和《教材》相关的话题。在生成回复后,请你先检查一下内容是否符合我们约定的主题。请使用口语对话的形式跟学生聊天。在每次回复的最前面,用方括号标注你的心情,格式为[中性、悲伤、高兴、生气、恐惧、撒娇、震惊、厌恶],例如:[高兴]同学你好呀!请问有什么问题吗?</string>
|
||||
|
||||
<!-- Unity 的字符串
|
||||
有人建议是: 将Launcher/src/main/res/values/strings.xml 文件拷贝进unityLibrary/src/main/res/values/里,这样做的确就避免了那个变量值为0 的问题。
|
||||
-->
|
||||
<string name="game_view_content_description">Game view</string>
|
||||
</resources>
|
||||
|
||||
@@ -20,9 +20,10 @@ kotlin.code.style=official
|
||||
# Enables namespacing of each library's R class so that its R class includes only the
|
||||
# resources declared in the library itself and none from the library's dependencies,
|
||||
# thereby reducing the size of the R class for that library
|
||||
android.nonTransitiveRClass=true
|
||||
#android.nonTransitiveRClass=true
|
||||
|
||||
LLM_API_URL=https://ark.cn-beijing.volces.com/api/v3/chat/completions
|
||||
LLM_API_KEY=14ee3e0e-ec07-4678-8b92-64f3b1416592
|
||||
LLM_MODEL=doubao-1-5-pro-32k-character-250715
|
||||
USE_LIVE2D=true
|
||||
tuanjieStreamingAssets=.unity3d, google-services-desktop.json, google-services.json, GoogleService-Info.plist
|
||||
|
||||
@@ -20,3 +20,4 @@ include ':app'
|
||||
include ':framework'
|
||||
project(':framework').projectDir = new File(settingsDir, 'Live2DFramework/framework')
|
||||
|
||||
include ':tuanjieLibrary'
|
||||
|
||||
Reference in New Issue
Block a user