live2d model

This commit is contained in:
gcw_4spBpAfv
2026-03-02 09:25:50 +08:00
parent d63d4b03cf
commit 2f6166ab6c
179 changed files with 100625 additions and 2018 deletions

View File

@@ -0,0 +1,223 @@
package com.digitalperson.asr
import android.content.Context
import android.os.SystemClock
import android.util.Log
import com.digitalperson.BuildConfig
import com.digitalperson.audio.AudioProcessor
import com.digitalperson.config.AppConfig
import com.digitalperson.engine.SenseVoiceEngineRKNN
import com.digitalperson.util.FileHelper
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.channels.Channel
import kotlinx.coroutines.currentCoroutineContext
import kotlinx.coroutines.isActive
import kotlinx.coroutines.withContext
import java.io.File
class AsrManager(private val context: Context) {
companion object {
private const val TAG = "AsrManager"
}
private var senseVoice: SenseVoiceEngineRKNN? = null
private val nativeLock = Any()
private val asrQueue = Channel<Pair<FloatArray, FloatArray>>(capacity = Channel.UNLIMITED)
private var audioProcessor: AudioProcessor? = null
interface AsrCallback {
fun onAsrStarted()
fun onAsrResult(text: String)
fun onAsrSkipped(reason: String)
fun shouldSkipAsr(): Boolean
fun isLlmInFlight(): Boolean
fun onLlmCalled(text: String)
}
private var callback: AsrCallback? = null
fun setCallback(callback: AsrCallback) {
this.callback = callback
}
fun setAudioProcessor(audioProcessor: AudioProcessor) {
this.audioProcessor = audioProcessor
}
fun initSenseVoiceModel(): Boolean {
return try {
Log.i(TAG, "ASR: init SenseVoice RKNN (scheme A)")
val modelDir = FileHelper.copySenseVoiceAssets(context)
val modelPath = File(modelDir, "sense-voice-encoder.rknn").absolutePath
val embeddingPath = File(modelDir, "embedding.npy").absolutePath
val bpePath = File(modelDir, "chn_jpn_yue_eng_ko_spectok.bpe.model").absolutePath
try {
val libDir = context.applicationInfo.nativeLibraryDir
Log.i(TAG, "nativeLibraryDir=$libDir")
try {
val names = File(libDir).list()?.joinToString(", ") ?: "(empty)"
Log.i(TAG, "nativeLibraryDir files: $names")
} catch (t: Throwable) {
Log.w(TAG, "Failed to list nativeLibraryDir: ${t.message}")
}
} catch (_: Throwable) {
}
Log.i(TAG, "SenseVoice model paths:")
Log.i(TAG, " model=$modelPath exists=${File(modelPath).exists()} size=${File(modelPath).length()}")
Log.i(TAG, " embedding=$embeddingPath exists=${File(embeddingPath).exists()} size=${File(embeddingPath).length()}")
Log.i(TAG, " bpe=$bpePath exists=${File(bpePath).exists()} size=${File(bpePath).length()}")
val t0 = SystemClock.elapsedRealtime()
val engine = try {
SenseVoiceEngineRKNN(context)
} catch (e: UnsatisfiedLinkError) {
throw IllegalStateException("Load native libraries failed: ${e.message}", e)
}
val ok = try {
engine.loadModelDirectly(modelPath, embeddingPath, bpePath)
} catch (t: Throwable) {
throw IllegalStateException("SenseVoice loadModelDirectly crashed: ${t.message}", t)
}
val dt = SystemClock.elapsedRealtime() - t0
Log.i(TAG, "SenseVoice loadModelDirectly ok=$ok costMs=$dt")
if (!ok) throw IllegalStateException("SenseVoiceEngineRKNN loadModelDirectly returned false")
senseVoice = engine
true
} catch (e: Exception) {
Log.e(TAG, "Failed to initialize SenseVoice model: ${e.message}", e)
false
}
}
fun enqueueAudioSegment(originalAudio: FloatArray, processedAudio: FloatArray) {
try {
asrQueue.trySend(Pair(originalAudio, processedAudio))
} catch (e: Exception) {
Log.e(TAG, "Failed to enqueue audio segment: ${e.message}")
}
}
fun clearQueue() {
while (asrQueue.tryReceive().isSuccess) { }
}
suspend fun runAsrWorker() {
Log.d(TAG, "ASR worker started")
try {
while (currentCoroutineContext().isActive) {
val (originalSeg, processedSeg) = try {
Log.d(TAG, "ASR worker waiting for audio segment")
asrQueue.receive()
} catch (e: Throwable) {
Log.e(TAG, "ASR worker receive failed: ${e.message}")
break
}
Log.d(TAG, "ASR worker received audio segment, size=${processedSeg.size}")
if (callback?.shouldSkipAsr() == true || callback?.isLlmInFlight() == true) {
Log.d(TAG, "ASR worker skipping segment: shouldSkip=${callback?.shouldSkipAsr()}, llmInFlight=${callback?.isLlmInFlight()}")
continue
}
callback?.onAsrStarted()
Log.d(TAG, "ASR started: processing audio segment")
saveAsrAudio(originalSeg, processedSeg)
val raw = synchronized(nativeLock) {
val e = senseVoice
if (e == null || !e.isInitialized) {
Log.e(TAG, "ASR failed: SenseVoice engine not initialized")
""
} else {
try {
e.transcribeBuffer(processedSeg)
} catch (e: Throwable) {
Log.e(TAG, "ASR transcribe failed: ${e.message}")
""
}
}
}
Log.d(TAG, "ASR raw result: $raw")
val text = removeTokens(raw)
val filterResult = filterText(text)
if (filterResult != null) {
callback?.onAsrSkipped(filterResult)
continue
}
callback?.onAsrResult(text)
if (BuildConfig.LLM_API_KEY.isBlank()) {
Log.w(TAG, "LLM API Key is not configured")
continue
}
callback?.onLlmCalled(text)
}
} catch (e: Throwable) {
Log.e(TAG, "ASR worker error: ${e.message}", e)
} finally {
Log.d(TAG, "ASR worker exiting")
}
}
fun release() {
try {
senseVoice?.deinitialize()
} catch (e: Exception) {
Log.e(TAG, "Error deinitializing SenseVoice: ${e.message}")
}
senseVoice = null
clearQueue()
}
fun isInitialized(): Boolean = senseVoice?.isInitialized ?: false
private fun saveAsrAudio(originalAudio: FloatArray, processedAudio: FloatArray) {
try {
val timestamp = System.currentTimeMillis()
val asrAudioDir = FileHelper.getAsrAudioDir(context)
audioProcessor?.let { processor ->
val originalFile = File(asrAudioDir, "asr_${timestamp}_original.wav")
processor.saveAudioAsWav(originalFile, originalAudio, AppConfig.SAMPLE_RATE)
val processedFile = File(asrAudioDir, "asr_${timestamp}_processed.wav")
processor.saveAudioAsWav(processedFile, processedAudio, AppConfig.SAMPLE_RATE)
}
} catch (e: Exception) {
Log.e(TAG, "Error saving ASR audio: ${e.message}")
}
}
private fun removeTokens(text: String): String {
var cleaned = text.replace(Regex("<\\|[^>]+\\|>"), "")
cleaned = cleaned.replace(Regex("[>>≥≫]"), "")
cleaned = cleaned.trim().replace(Regex("\\s+"), " ")
return cleaned
}
private fun filterText(text: String): String? {
if (text.isBlank()) {
return "blank text"
}
if (text.length == 1 && text[0].equals('i', ignoreCase = true)) {
return "single 'i'"
}
if (text.length > AppConfig.Asr.MAX_TEXT_LENGTH) {
return "too long (${text.length} chars)"
}
return null
}
}