# Import FastAPI Libs
from pydantic import BaseModel

# Import Qcloud OCR Service Libs
import sys
# sys.path.append("../qcloud")
from qcloud.common import credential
from qcloud.asr import flash_recognizer

# Common log part
import os
import logging
from common.script.logging_manager import LoggingManager

curr_module = os.path.basename(__file__)    # Initialize logging manager
lm = LoggingManager.get_instance()          # lm = LoggingManager()

# Import Other Common Libs
import json
import base64
import os
import random
import string

# ------------------------------------------------
# Define constants
# ------------------------------------------------
c_ret_code_success = 0
c_ret_code_error = 1

user_name = 'asr-program'
credential_file = 'credential/credential.json'


# ------------------------------------------------
# Define OCR Request Service Request and Response
# ------------------------------------------------
class asr_req_basemodel(BaseModel):
    asr_tool: str # W: Whisper; Q: Qcloud
    model: str
    audio_b64_string: str
    initial_prompt: str

class asr_res_basemodel(BaseModel):
    result: int
    result_message: str
    detected_text: str


# ------------------------------------------------
# Get credentials
# ------------------------------------------------
def get_credentials(file: str, user_name: str) -> tuple:

    try:
        # Open and read the JSON file
        with open(file, "r") as credential_file:
            data = json.load(credential_file)

        # Access data from the JSON
        app_id = data['asr'][user_name]['app_id']
        secret_id = data['asr'][user_name]['secret_id']
        secret_key = data['asr'][user_name]['secret_key']

        return app_id, secret_id, secret_key

    except Exception as e:

        lm.log(logging.INFO, curr_module, str(e))
        return None, None, None


# ------------------------------------------------
# Function of ASR request to Qcloud COS
# ------------------------------------------------
def get_detected_text(asr_req: asr_req_basemodel) -> tuple:

    lm.log(logging.INFO, curr_module, "qcloud get_detected_text start")

    # 初始化处理结果
    ret_code = c_ret_code_success
    err_msg = ""

    # 初始化ID
    app_id = ""
    secret_id = ""
    secret_key = ""
    engine_type = "16k_en"

    # 初始化返回对象
    asr_resp = asr_res_basemodel(
        result = c_ret_code_success,
        result_message = '',
        detected_text = ''
    )

    try:
        app_id, secret_id, secret_key = get_credentials(credential_file, user_name)
        cred = credential.Credential(secret_id, secret_key)

        if (app_id == None or secret_id == None or secret_key == None):
            lm.log(logging.ERROR, curr_module, 'Failed to get credential')
            asr_resp.result = c_ret_code_error
            asr_resp.detected_text = ''
            asr_resp.result_message = 'Failed to get credential'
            return asr_resp


        # 新建FlashRecognizer，一个recognizer可以执行N次识别请求
        recognizer = flash_recognizer.FlashRecognizer(app_id, cred)

        # 新建识别请求
        req = flash_recognizer.FlashRecognitionRequest(engine_type)
        req.set_filter_modal(0)
        req.set_filter_punc(0)
        req.set_filter_dirty(0)
        # 目前aac格式可用于解码MacOS Safari/Chrome的录音文件，IOS Safari待确认
        # req.set_voice_format("wav")
        req.set_voice_format("aac")
        req.set_word_info(0)
        req.set_convert_num_mode(1)

        # 将Base64字符串写到临时文件
        # Decode base64 string
        decoded_data = base64.b64decode(asr_req.audio_b64_string)

        # Write the decoded data to a file
        characters = string.ascii_letters + string.digits
        random_string = ''.join(random.choice(characters) for i in range(16))
        tmp_file = '/tmp/whisper_' + random_string
        with open(tmp_file, 'wb') as file:
            file.write(decoded_data)

        with open(tmp_file, 'rb') as f:
            #读取音频数据
            data = f.read()
            #执行识别
            resultData = recognizer.recognize(req, data)
            resp = json.loads(resultData)
            request_id = resp["request_id"]
            code = resp["code"]
            if code != 0:
                lm.log(logging.ERROR, curr_module, "recognize faild! request_id: ", request_id, " code: ", code, ", message: ", resp["message"])
                resp.result = c_ret_code_error
                resp.detected_text = ''
                resp.result_message = 'Recognize faild! Please find detail in log.'
                return resp
    
            lm.log(logging.INFO, curr_module, "request_id: ", request_id)

            #一个channl_result对应一个声道的识别结果
            #大多数音频是单声道，对应一个channl_result
            for channl_result in resp["flash_result"]:

                lm.log(logging.INFO, curr_module, "channel_id: " + str(channl_result["channel_id"]))
                lm.log(logging.INFO, curr_module, channl_result["text"])

                asr_resp.result = c_ret_code_success
                asr_resp.detected_text = channl_result["text"]
                asr_resp.result_message = ''

                return asr_resp

        lm.log(logging.INFO, curr_module, "qcloud get_detected_text complete")

    except Exception as e:

        lm.log(logging.ERROR, curr_module, str(e))

        asr_resp.result = c_ret_code_error
        asr_resp.detected_text = ''
        asr_resp.result_message = str(e)
        return asr_resp