digital_person/app/src/main/java/com/digitalperson/face/FaceDetectionPipeline.kt

package com.digitalperson.face

import android.content.Context
import android.graphics.Bitmap
import android.util.Log
import com.digitalperson.config.AppConfig
import com.digitalperson.engine.RetinaFaceEngineRKNN
import java.util.concurrent.atomic.AtomicBoolean
import kotlin.math.abs
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.cancel
import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext

data class FaceBox(
    val left: Float,
    val top: Float,
    val right: Float,
    val bottom: Float,
    val score: Float,
)

data class FaceDetectionResult(
    val sourceWidth: Int,
    val sourceHeight: Int,
    val faces: List<FaceBox>,
)

class FaceDetectionPipeline(
    private val context: Context,
    private val onResult: (FaceDetectionResult) -> Unit,
    private val onGreeting: (String) -> Unit,
) {
    private val engine = RetinaFaceEngineRKNN()
    private val recognizer = FaceRecognizer(context)
    private val scope = CoroutineScope(SupervisorJob() + Dispatchers.Default)
    private val frameInFlight = AtomicBoolean(false)
    private val initialized = AtomicBoolean(false)
    private var trackFace: FaceBox? = null
    private var trackId: Long = 0
    private var trackStableSinceMs: Long = 0
    private var greetedTrackId: Long = -1
    private var lastGreetMs: Long = 0

    fun initialize(): Boolean {
        val detectorOk = engine.initialize(context)
        val recognizerOk = recognizer.initialize()
        val ok = detectorOk && recognizerOk
        initialized.set(ok)
        Log.i(AppConfig.TAG, "Face pipeline initialize result=$ok detector=$detectorOk recognizer=$recognizerOk")
        return ok
    }

    fun submitFrame(bitmap: Bitmap) {
        if (!initialized.get()) {
            bitmap.recycle()
            return
        }
        if (!frameInFlight.compareAndSet(false, true)) {
            bitmap.recycle()
            return
        }

        scope.launch {
            try {
                val width = bitmap.width
                val height = bitmap.height
                val raw = engine.detect(bitmap)

                val faceCount = raw.size / 5
                val faces = ArrayList<FaceBox>(faceCount)
                var i = 0
                while (i + 4 < raw.size) {
                    faces.add(
                        FaceBox(
                            left = raw[i],
                            top = raw[i + 1],
                            right = raw[i + 2],
                            bottom = raw[i + 3],
                            score = raw[i + 4],
                        )
                    )
                    i += 5
                }
                // 过滤太小的人脸
                val minFaceSize = 50 // 最小人脸大小（像素）
                val filteredFaces = faces.filter { face ->
                    val width = face.right - face.left
                    val height = face.bottom - face.top
                    width > minFaceSize && height > minFaceSize
                }

//                if (filteredFaces.isNotEmpty()) {
//                    Log.d(
//                        AppConfig.TAG,"[Face] filtered detected ${filteredFaces.size} face(s)"
//                    )
//                }

                maybeRecognizeAndGreet(bitmap, filteredFaces)
                withContext(Dispatchers.Main) {
                    onResult(FaceDetectionResult(width, height, filteredFaces))
                }
            } catch (t: Throwable) {
                Log.e(AppConfig.TAG, "Face detection pipeline failed: ${t.message}", t)
            } finally {
                bitmap.recycle()
                frameInFlight.set(false)
            }
        }
    }

    private suspend fun maybeRecognizeAndGreet(bitmap: Bitmap, faces: List<FaceBox>) {
        val now = System.currentTimeMillis()
        if (faces.isEmpty()) {
            trackFace = null
            trackStableSinceMs = 0
            return
        }

        val primary = faces.maxByOrNull { (it.right - it.left) * (it.bottom - it.top) } ?: return
        val prev = trackFace
        if (prev == null || iou(prev, primary) < AppConfig.Face.TRACK_IOU_THRESHOLD) {
            trackId += 1
            greetedTrackId = -1
            trackStableSinceMs = now
        }
        trackFace = primary

        val stableMs = now - trackStableSinceMs
        val frontal = isFrontal(primary, bitmap.width, bitmap.height)
        val coolingDown = (now - lastGreetMs) < AppConfig.FaceRecognition.GREETING_COOLDOWN_MS
        if (stableMs < AppConfig.Face.STABLE_MS || !frontal || greetedTrackId == trackId || coolingDown) {
            return
        }

        val match = recognizer.identify(bitmap, primary)

        Log.d(AppConfig.TAG, "[Face] Recognition result: matchedName=${match.matchedName}, similarity=${match.similarity}")

        // 检查是否需要保存新人脸
        if (match.matchedName.isNullOrBlank()) {
            Log.d(AppConfig.TAG, "[Face] No match found, attempting to add new face")
            // 提取人脸特征
            val embedding = extractEmbedding(bitmap, primary)
            Log.d(AppConfig.TAG, "[Face] Extracted embedding size: ${embedding.size}")
            if (embedding.isNotEmpty()) {
                // 尝试添加新人脸
                val added = recognizer.addNewFace(embedding)
                Log.d(AppConfig.TAG, "[Face] Add new face result: $added")
                if (added) {
                    Log.i(AppConfig.TAG, "[Face] New face added to database")
                } else {
                    Log.i(AppConfig.TAG, "[Face] Face already exists in database (similar face found)")
                }
            } else {
                Log.w(AppConfig.TAG, "[Face] Failed to extract embedding")
            }
        } else {
            Log.d(AppConfig.TAG, "[Face] Matched existing face: ${match.matchedName}")
        }

        val greeting = if (!match.matchedName.isNullOrBlank()) {
            "你好，${match.matchedName}！"
        } else {
            "你好，很高兴见到你。"
        }
        greetedTrackId = trackId
        lastGreetMs = now
        Log.i(
            AppConfig.TAG,
            "[Face] greeting track=$trackId stable=${stableMs}ms frontal=$frontal matched=${match.matchedName} score=${match.similarity}"
        )
        withContext(Dispatchers.Main) {
            onGreeting(greeting)
        }
    }

    private fun extractEmbedding(bitmap: Bitmap, face: FaceBox): FloatArray {
        return recognizer.extractEmbedding(bitmap, face)
    }

    private fun isFrontal(face: FaceBox, frameW: Int, frameH: Int): Boolean {
        val w = face.right - face.left
        val h = face.bottom - face.top
        if (w < AppConfig.Face.FRONTAL_MIN_FACE_SIZE || h < AppConfig.Face.FRONTAL_MIN_FACE_SIZE) {
            return false
        }
        val aspectDiff = abs((w / h) - 1f)
        if (aspectDiff > AppConfig.Face.FRONTAL_MAX_ASPECT_DIFF) {
            return false
        }
        val cx = (face.left + face.right) * 0.5f
        val cy = (face.top + face.bottom) * 0.5f
        val minX = frameW * 0.15f
        val maxX = frameW * 0.85f
        val minY = frameH * 0.15f
        val maxY = frameH * 0.85f
        return cx in minX..maxX && cy in minY..maxY
    }

    private fun iou(a: FaceBox, b: FaceBox): Float {
        val left = maxOf(a.left, b.left)
        val top = maxOf(a.top, b.top)
        val right = minOf(a.right, b.right)
        val bottom = minOf(a.bottom, b.bottom)
        val w = maxOf(0f, right - left)
        val h = maxOf(0f, bottom - top)
        val inter = w * h
        val areaA = maxOf(0f, a.right - a.left) * maxOf(0f, a.bottom - a.top)
        val areaB = maxOf(0f, b.right - b.left) * maxOf(0f, b.bottom - b.top)
        val union = areaA + areaB - inter
        return if (union <= 0f) 0f else inter / union
    }

    fun release() {
        scope.cancel()
        engine.release()
        recognizer.release()
        initialized.set(false)
    }
}