live2d model
This commit is contained in:
223
app/src/main/java/com/digitalperson/asr/AsrManager.kt
Normal file
223
app/src/main/java/com/digitalperson/asr/AsrManager.kt
Normal file
@@ -0,0 +1,223 @@
|
||||
package com.digitalperson.asr
|
||||
|
||||
import android.content.Context
|
||||
import android.os.SystemClock
|
||||
import android.util.Log
|
||||
import com.digitalperson.BuildConfig
|
||||
import com.digitalperson.audio.AudioProcessor
|
||||
import com.digitalperson.config.AppConfig
|
||||
import com.digitalperson.engine.SenseVoiceEngineRKNN
|
||||
import com.digitalperson.util.FileHelper
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.channels.Channel
|
||||
import kotlinx.coroutines.currentCoroutineContext
|
||||
import kotlinx.coroutines.isActive
|
||||
import kotlinx.coroutines.withContext
|
||||
import java.io.File
|
||||
|
||||
class AsrManager(private val context: Context) {
|
||||
|
||||
companion object {
|
||||
private const val TAG = "AsrManager"
|
||||
}
|
||||
|
||||
private var senseVoice: SenseVoiceEngineRKNN? = null
|
||||
private val nativeLock = Any()
|
||||
|
||||
private val asrQueue = Channel<Pair<FloatArray, FloatArray>>(capacity = Channel.UNLIMITED)
|
||||
|
||||
private var audioProcessor: AudioProcessor? = null
|
||||
|
||||
interface AsrCallback {
|
||||
fun onAsrStarted()
|
||||
fun onAsrResult(text: String)
|
||||
fun onAsrSkipped(reason: String)
|
||||
fun shouldSkipAsr(): Boolean
|
||||
fun isLlmInFlight(): Boolean
|
||||
fun onLlmCalled(text: String)
|
||||
}
|
||||
|
||||
private var callback: AsrCallback? = null
|
||||
|
||||
fun setCallback(callback: AsrCallback) {
|
||||
this.callback = callback
|
||||
}
|
||||
|
||||
fun setAudioProcessor(audioProcessor: AudioProcessor) {
|
||||
this.audioProcessor = audioProcessor
|
||||
}
|
||||
|
||||
fun initSenseVoiceModel(): Boolean {
|
||||
return try {
|
||||
Log.i(TAG, "ASR: init SenseVoice RKNN (scheme A)")
|
||||
|
||||
val modelDir = FileHelper.copySenseVoiceAssets(context)
|
||||
val modelPath = File(modelDir, "sense-voice-encoder.rknn").absolutePath
|
||||
val embeddingPath = File(modelDir, "embedding.npy").absolutePath
|
||||
val bpePath = File(modelDir, "chn_jpn_yue_eng_ko_spectok.bpe.model").absolutePath
|
||||
|
||||
try {
|
||||
val libDir = context.applicationInfo.nativeLibraryDir
|
||||
Log.i(TAG, "nativeLibraryDir=$libDir")
|
||||
try {
|
||||
val names = File(libDir).list()?.joinToString(", ") ?: "(empty)"
|
||||
Log.i(TAG, "nativeLibraryDir files: $names")
|
||||
} catch (t: Throwable) {
|
||||
Log.w(TAG, "Failed to list nativeLibraryDir: ${t.message}")
|
||||
}
|
||||
} catch (_: Throwable) {
|
||||
}
|
||||
|
||||
Log.i(TAG, "SenseVoice model paths:")
|
||||
Log.i(TAG, " model=$modelPath exists=${File(modelPath).exists()} size=${File(modelPath).length()}")
|
||||
Log.i(TAG, " embedding=$embeddingPath exists=${File(embeddingPath).exists()} size=${File(embeddingPath).length()}")
|
||||
Log.i(TAG, " bpe=$bpePath exists=${File(bpePath).exists()} size=${File(bpePath).length()}")
|
||||
|
||||
val t0 = SystemClock.elapsedRealtime()
|
||||
val engine = try {
|
||||
SenseVoiceEngineRKNN(context)
|
||||
} catch (e: UnsatisfiedLinkError) {
|
||||
throw IllegalStateException("Load native libraries failed: ${e.message}", e)
|
||||
}
|
||||
|
||||
val ok = try {
|
||||
engine.loadModelDirectly(modelPath, embeddingPath, bpePath)
|
||||
} catch (t: Throwable) {
|
||||
throw IllegalStateException("SenseVoice loadModelDirectly crashed: ${t.message}", t)
|
||||
}
|
||||
|
||||
val dt = SystemClock.elapsedRealtime() - t0
|
||||
Log.i(TAG, "SenseVoice loadModelDirectly ok=$ok costMs=$dt")
|
||||
if (!ok) throw IllegalStateException("SenseVoiceEngineRKNN loadModelDirectly returned false")
|
||||
|
||||
senseVoice = engine
|
||||
true
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "Failed to initialize SenseVoice model: ${e.message}", e)
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fun enqueueAudioSegment(originalAudio: FloatArray, processedAudio: FloatArray) {
|
||||
try {
|
||||
asrQueue.trySend(Pair(originalAudio, processedAudio))
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "Failed to enqueue audio segment: ${e.message}")
|
||||
}
|
||||
}
|
||||
|
||||
fun clearQueue() {
|
||||
while (asrQueue.tryReceive().isSuccess) { }
|
||||
}
|
||||
|
||||
suspend fun runAsrWorker() {
|
||||
Log.d(TAG, "ASR worker started")
|
||||
try {
|
||||
while (currentCoroutineContext().isActive) {
|
||||
val (originalSeg, processedSeg) = try {
|
||||
Log.d(TAG, "ASR worker waiting for audio segment")
|
||||
asrQueue.receive()
|
||||
} catch (e: Throwable) {
|
||||
Log.e(TAG, "ASR worker receive failed: ${e.message}")
|
||||
break
|
||||
}
|
||||
|
||||
Log.d(TAG, "ASR worker received audio segment, size=${processedSeg.size}")
|
||||
|
||||
if (callback?.shouldSkipAsr() == true || callback?.isLlmInFlight() == true) {
|
||||
Log.d(TAG, "ASR worker skipping segment: shouldSkip=${callback?.shouldSkipAsr()}, llmInFlight=${callback?.isLlmInFlight()}")
|
||||
continue
|
||||
}
|
||||
|
||||
callback?.onAsrStarted()
|
||||
Log.d(TAG, "ASR started: processing audio segment")
|
||||
|
||||
saveAsrAudio(originalSeg, processedSeg)
|
||||
|
||||
val raw = synchronized(nativeLock) {
|
||||
val e = senseVoice
|
||||
if (e == null || !e.isInitialized) {
|
||||
Log.e(TAG, "ASR failed: SenseVoice engine not initialized")
|
||||
""
|
||||
} else {
|
||||
try {
|
||||
e.transcribeBuffer(processedSeg)
|
||||
} catch (e: Throwable) {
|
||||
Log.e(TAG, "ASR transcribe failed: ${e.message}")
|
||||
""
|
||||
}
|
||||
}
|
||||
}
|
||||
Log.d(TAG, "ASR raw result: $raw")
|
||||
val text = removeTokens(raw)
|
||||
|
||||
val filterResult = filterText(text)
|
||||
if (filterResult != null) {
|
||||
callback?.onAsrSkipped(filterResult)
|
||||
continue
|
||||
}
|
||||
|
||||
callback?.onAsrResult(text)
|
||||
|
||||
if (BuildConfig.LLM_API_KEY.isBlank()) {
|
||||
Log.w(TAG, "LLM API Key is not configured")
|
||||
continue
|
||||
}
|
||||
|
||||
callback?.onLlmCalled(text)
|
||||
}
|
||||
} catch (e: Throwable) {
|
||||
Log.e(TAG, "ASR worker error: ${e.message}", e)
|
||||
} finally {
|
||||
Log.d(TAG, "ASR worker exiting")
|
||||
}
|
||||
}
|
||||
|
||||
fun release() {
|
||||
try {
|
||||
senseVoice?.deinitialize()
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "Error deinitializing SenseVoice: ${e.message}")
|
||||
}
|
||||
senseVoice = null
|
||||
clearQueue()
|
||||
}
|
||||
|
||||
fun isInitialized(): Boolean = senseVoice?.isInitialized ?: false
|
||||
|
||||
private fun saveAsrAudio(originalAudio: FloatArray, processedAudio: FloatArray) {
|
||||
try {
|
||||
val timestamp = System.currentTimeMillis()
|
||||
val asrAudioDir = FileHelper.getAsrAudioDir(context)
|
||||
|
||||
audioProcessor?.let { processor ->
|
||||
val originalFile = File(asrAudioDir, "asr_${timestamp}_original.wav")
|
||||
processor.saveAudioAsWav(originalFile, originalAudio, AppConfig.SAMPLE_RATE)
|
||||
val processedFile = File(asrAudioDir, "asr_${timestamp}_processed.wav")
|
||||
processor.saveAudioAsWav(processedFile, processedAudio, AppConfig.SAMPLE_RATE)
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "Error saving ASR audio: ${e.message}")
|
||||
}
|
||||
}
|
||||
|
||||
private fun removeTokens(text: String): String {
|
||||
var cleaned = text.replace(Regex("<\\|[^>]+\\|>"), "")
|
||||
cleaned = cleaned.replace(Regex("[>>≥≫]"), "")
|
||||
cleaned = cleaned.trim().replace(Regex("\\s+"), " ")
|
||||
return cleaned
|
||||
}
|
||||
|
||||
private fun filterText(text: String): String? {
|
||||
if (text.isBlank()) {
|
||||
return "blank text"
|
||||
}
|
||||
if (text.length == 1 && text[0].equals('i', ignoreCase = true)) {
|
||||
return "single 'i'"
|
||||
}
|
||||
if (text.length > AppConfig.Asr.MAX_TEXT_LENGTH) {
|
||||
return "too long (${text.length} chars)"
|
||||
}
|
||||
return null
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user