package com.digitalperson.asr import android.content.Context import android.os.SystemClock import android.util.Log import com.digitalperson.BuildConfig import com.digitalperson.audio.AudioProcessor import com.digitalperson.config.AppConfig import com.digitalperson.env.RuntimeEnv import com.digitalperson.engine.SenseVoiceEngineRKNN import com.digitalperson.util.FileHelper import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.channels.Channel import kotlinx.coroutines.currentCoroutineContext import kotlinx.coroutines.isActive import kotlinx.coroutines.withContext import java.io.File class AsrManager(private val context: Context) { companion object { private const val TAG = "AsrManager" } private var senseVoice: SenseVoiceEngineRKNN? = null private val nativeLock = Any() private val asrQueue = Channel>(capacity = Channel.UNLIMITED) private var audioProcessor: AudioProcessor? = null interface AsrCallback { fun onAsrStarted() fun onAsrResult(text: String) fun onAsrSkipped(reason: String) fun shouldSkipAsr(): Boolean fun isLlmInFlight(): Boolean fun onLlmCalled(text: String) } private var callback: AsrCallback? = null fun setCallback(callback: AsrCallback) { this.callback = callback } fun setAudioProcessor(audioProcessor: AudioProcessor) { this.audioProcessor = audioProcessor } fun initSenseVoiceModel(): Boolean { if (RuntimeEnv.isEmulator()) { Log.w(TAG, "ASR: emulator detected; skip local RKNN init and use cloud ASR") return false } return try { Log.i(TAG, "ASR: init SenseVoice RKNN (scheme A)") val modelDir = FileHelper.copySenseVoiceAssets(context) val modelPath = File(modelDir, "sense-voice-encoder.rknn").absolutePath val embeddingPath = File(modelDir, "embedding.npy").absolutePath val bpePath = File(modelDir, "chn_jpn_yue_eng_ko_spectok.bpe.model").absolutePath try { val libDir = context.applicationInfo.nativeLibraryDir Log.i(TAG, "nativeLibraryDir=$libDir") try { val names = File(libDir).list()?.joinToString(", ") ?: "(empty)" Log.i(TAG, "nativeLibraryDir files: $names") } catch (t: Throwable) { Log.w(TAG, "Failed to list nativeLibraryDir: ${t.message}") } } catch (_: Throwable) { } Log.i(TAG, "SenseVoice model paths:") Log.i(TAG, " model=$modelPath exists=${File(modelPath).exists()} size=${File(modelPath).length()}") Log.i(TAG, " embedding=$embeddingPath exists=${File(embeddingPath).exists()} size=${File(embeddingPath).length()}") Log.i(TAG, " bpe=$bpePath exists=${File(bpePath).exists()} size=${File(bpePath).length()}") val t0 = SystemClock.elapsedRealtime() val engine = try { SenseVoiceEngineRKNN(context) } catch (e: UnsatisfiedLinkError) { throw IllegalStateException("Load native libraries failed: ${e.message}", e) } val ok = try { engine.loadModelDirectly(modelPath, embeddingPath, bpePath) } catch (t: Throwable) { throw IllegalStateException("SenseVoice loadModelDirectly crashed: ${t.message}", t) } val dt = SystemClock.elapsedRealtime() - t0 Log.i(TAG, "SenseVoice loadModelDirectly ok=$ok costMs=$dt") if (!ok) throw IllegalStateException("SenseVoiceEngineRKNN loadModelDirectly returned false") senseVoice = engine true } catch (e: Exception) { Log.e(TAG, "Failed to initialize SenseVoice model: ${e.message}", e) false } } fun enqueueAudioSegment(originalAudio: FloatArray, processedAudio: FloatArray) { try { asrQueue.trySend(Pair(originalAudio, processedAudio)) } catch (e: Exception) { Log.e(TAG, "Failed to enqueue audio segment: ${e.message}") } } fun clearQueue() { while (asrQueue.tryReceive().isSuccess) { } } suspend fun runAsrWorker() { Log.d(TAG, "ASR worker started") try { while (currentCoroutineContext().isActive) { val (originalSeg, processedSeg) = try { Log.d(TAG, "ASR worker waiting for audio segment") asrQueue.receive() } catch (e: Throwable) { Log.e(TAG, "ASR worker receive failed: ${e.message}") break } Log.d(TAG, "ASR worker received audio segment, size=${processedSeg.size}") if (callback?.shouldSkipAsr() == true || callback?.isLlmInFlight() == true) { Log.d(TAG, "ASR worker skipping segment: shouldSkip=${callback?.shouldSkipAsr()}, llmInFlight=${callback?.isLlmInFlight()}") continue } callback?.onAsrStarted() Log.d(TAG, "ASR started: processing audio segment") saveAsrAudio(originalSeg, processedSeg) val localText = synchronized(nativeLock) { val e = senseVoice if (e == null || !e.isInitialized) { "" } else { try { removeTokens(e.transcribeBuffer(processedSeg)) } catch (t: Throwable) { Log.e(TAG, "ASR transcribe failed: ${t.message}") "" } } }.trim() val text = if (localText.isNotBlank()) { localText } else { // 模拟器或本地 RKNN 未就绪:使用腾讯云「一句话识别」SDK(app/libs/asr-one-sentence-release.aar) val shouldTryTencent = BuildConfig.HAS_TENCENT_ASR_SDK && (RuntimeEnv.isEmulator() || !isInitialized()) if (!shouldTryTencent) { Log.e( TAG, "ASR failed: local RKNN not ready and Tencent SDK unavailable " + "(add libs/asr-one-sentence-release.aar or fix SenseVoice init)" ) "" } else { withContext(Dispatchers.IO) { try { // 云端 ASR 使用原始录音(未经 AEC/NS): // 模拟器上 AEC/NS 不可用,processedSeg 可能被处理成近似静音 TencentOneSentenceAsr.transcribePcm16Mono(originalSeg) } catch (t: Throwable) { Log.e(TAG, "Tencent ASR failed: ${t.message}") "" } }.trim() } } val filterResult = filterText(text) if (filterResult != null) { callback?.onAsrSkipped(filterResult) continue } callback?.onAsrResult(text) if (BuildConfig.LLM_API_KEY.isBlank()) { Log.w(TAG, "LLM API Key is not configured") continue } callback?.onLlmCalled(text) } } catch (e: Throwable) { Log.e(TAG, "ASR worker error: ${e.message}", e) } finally { Log.d(TAG, "ASR worker exiting") } } fun release() { try { senseVoice?.deinitialize() } catch (e: Exception) { Log.e(TAG, "Error deinitializing SenseVoice: ${e.message}") } senseVoice = null clearQueue() } fun isInitialized(): Boolean = senseVoice?.isInitialized ?: false private fun saveAsrAudio(originalAudio: FloatArray, processedAudio: FloatArray) { try { val timestamp = System.currentTimeMillis() val asrAudioDir = FileHelper.getAsrAudioDir(context) audioProcessor?.let { processor -> val originalFile = File(asrAudioDir, "asr_${timestamp}_original.wav") processor.saveAudioAsWav(originalFile, originalAudio, AppConfig.SAMPLE_RATE) val processedFile = File(asrAudioDir, "asr_${timestamp}_processed.wav") processor.saveAudioAsWav(processedFile, processedAudio, AppConfig.SAMPLE_RATE) } } catch (e: Exception) { Log.e(TAG, "Error saving ASR audio: ${e.message}") } } private fun removeTokens(text: String): String { var cleaned = text.replace(Regex("<\\|[^>]+\\|>"), "") cleaned = cleaned.replace(Regex("[>>≥≫]"), "") cleaned = cleaned.trim().replace(Regex("\\s+"), " ") return cleaned } private fun filterText(text: String): String? { if (text.isBlank()) { return "blank text" } if (text.length == 1 && text[0].equals('i', ignoreCase = true)) { return "single 'i'" } if (text.length > AppConfig.Asr.MAX_TEXT_LENGTH) { return "too long (${text.length} chars)" } return null } }