# Import FastAPI Libs from pydantic import BaseModel # Import Qcloud OCR Service Libs import sys # sys.path.append("../qcloud") from qcloud.common import credential from qcloud.asr import flash_recognizer # Common log part import os import logging from common.script.logging_manager import LoggingManager curr_module = os.path.basename(__file__) # Initialize logging manager lm = LoggingManager.get_instance() # lm = LoggingManager() # Import Other Common Libs import json import base64 import os import random import string # ------------------------------------------------ # Define constants # ------------------------------------------------ c_ret_code_success = 0 c_ret_code_error = 1 user_name = 'asr-program' credential_file = 'credential/credential.json' # ------------------------------------------------ # Define OCR Request Service Request and Response # ------------------------------------------------ class asr_req_basemodel(BaseModel): asr_tool: str # W: Whisper; Q: Qcloud model: str audio_b64_string: str initial_prompt: str class asr_res_basemodel(BaseModel): result: int result_message: str detected_text: str # ------------------------------------------------ # Get credentials # ------------------------------------------------ def get_credentials(file: str, user_name: str) -> tuple: try: # Open and read the JSON file with open(file, "r") as credential_file: data = json.load(credential_file) # Access data from the JSON app_id = data['asr'][user_name]['app_id'] secret_id = data['asr'][user_name]['secret_id'] secret_key = data['asr'][user_name]['secret_key'] return app_id, secret_id, secret_key except Exception as e: lm.log(logging.INFO, curr_module, str(e)) return None, None, None # ------------------------------------------------ # Function of ASR request to Qcloud COS # ------------------------------------------------ def get_detected_text(asr_req: asr_req_basemodel) -> tuple: lm.log(logging.INFO, curr_module, "qcloud get_detected_text start") # 初始化处理结果 ret_code = c_ret_code_success err_msg = "" # 初始化ID app_id = "" secret_id = "" secret_key = "" engine_type = "16k_en" # 初始化返回对象 asr_resp = asr_res_basemodel( result = c_ret_code_success, result_message = '', detected_text = '' ) try: app_id, secret_id, secret_key = get_credentials(credential_file, user_name) cred = credential.Credential(secret_id, secret_key) if (app_id == None or secret_id == None or secret_key == None): lm.log(logging.ERROR, curr_module, 'Failed to get credential') asr_resp.result = c_ret_code_error asr_resp.detected_text = '' asr_resp.result_message = 'Failed to get credential' return asr_resp # 新建FlashRecognizer,一个recognizer可以执行N次识别请求 recognizer = flash_recognizer.FlashRecognizer(app_id, cred) # 新建识别请求 req = flash_recognizer.FlashRecognitionRequest(engine_type) req.set_filter_modal(0) req.set_filter_punc(0) req.set_filter_dirty(0) # 目前aac格式可用于解码MacOS Safari/Chrome的录音文件,IOS Safari待确认 # req.set_voice_format("wav") req.set_voice_format("aac") req.set_word_info(0) req.set_convert_num_mode(1) # 将Base64字符串写到临时文件 # Decode base64 string decoded_data = base64.b64decode(asr_req.audio_b64_string) # Write the decoded data to a file characters = string.ascii_letters + string.digits random_string = ''.join(random.choice(characters) for i in range(16)) tmp_file = '/tmp/whisper_' + random_string with open(tmp_file, 'wb') as file: file.write(decoded_data) with open(tmp_file, 'rb') as f: #读取音频数据 data = f.read() #执行识别 resultData = recognizer.recognize(req, data) resp = json.loads(resultData) request_id = resp["request_id"] code = resp["code"] if code != 0: lm.log(logging.ERROR, curr_module, "recognize faild! request_id: ", request_id, " code: ", code, ", message: ", resp["message"]) resp.result = c_ret_code_error resp.detected_text = '' resp.result_message = 'Recognize faild! Please find detail in log.' return resp lm.log(logging.INFO, curr_module, "request_id: ", request_id) #一个channl_result对应一个声道的识别结果 #大多数音频是单声道,对应一个channl_result for channl_result in resp["flash_result"]: lm.log(logging.INFO, curr_module, "channel_id: " + str(channl_result["channel_id"])) lm.log(logging.INFO, curr_module, channl_result["text"]) asr_resp.result = c_ret_code_success asr_resp.detected_text = channl_result["text"] asr_resp.result_message = '' return asr_resp lm.log(logging.INFO, curr_module, "qcloud get_detected_text complete") except Exception as e: lm.log(logging.ERROR, curr_module, str(e)) asr_resp.result = c_ret_code_error asr_resp.detected_text = '' asr_resp.result_message = str(e) return asr_resp