# -*- coding: utf-8 -*- # ------------------------------------------------------------------------------ # File Name: whisper_service.py # Original Author: Clark Lin # Email: clark_lin@outlook.com # # Change History # Version Date By Description # 0.01 2024-05-09 Clark Lin Initial version # # Main features summary: # - Implementation of Whisper automatic speech recognition (ASR) # # Copyright Information: # Copyright © 2024 Oasis # Licensed TBD # ------------------------------------------------------------------------------ # Common fastapi part import fastapi_security_util from pydantic import BaseModel from jose import JWTError, jwt from fastapi import HTTPException, status # Common log part import os import logging from common.script.logging_manager import LoggingManager curr_module = os.path.basename(__file__) # Initialize logging manager lm = LoggingManager.get_instance() # lm = LoggingManager() # Whisper part import whisper import ssl import base64 import io import random import string ssl._create_default_https_context = ssl._create_unverified_context # ingore SSL verification # ------------------------------------------------ # Model Definition # ------------------------------------------------ class RawAudio(BaseModel): model: str audio_b64_string: str initial_prompt: str class RespAudio(BaseModel): text: str error_message: str # ------------------------------------------------ # Sub Function - Verify Access Token # ------------------------------------------------ def verify_token(token: str): secret_key, client_db = fastapi_security_util.get_credentials(fastapi_security_util.credential_file) try: payload = jwt.decode(token, secret_key, algorithms=[fastapi_security_util.algorithm]) username: str = payload.get("sub") if username is None: return False return True except JWTError: lm.log(logging.ERROR, curr_module, 'JWTError: ', str(JWTError)) return False # ------------------------------------------------ # Sub Function - Read Image # ------------------------------------------------ def read_audio(token: str, audio: RawAudio): # lm.log(logging.INFO, curr_module, "token: " + token) lm.log(logging.INFO, curr_module, "read_audio start") if not verify_token(token = token): raise HTTPException( status_code = status.HTTP_401_UNAUTHORIZED, detail = "Authentication Failed", headers={"WWW-Authenticate": "Bearer"}, ) text = '' resp = RespAudio( text = '', error_message = '' ) try: # Choose model model = whisper.load_model(audio.model) # Decode base64 string decoded_data = base64.b64decode(audio.audio_b64_string) # Write the decoded data to a file characters = string.ascii_letters + string.digits random_string = ''.join(random.choice(characters) for i in range(16)) tmp_file = '/tmp/whisper_' + random_string with open(tmp_file, 'wb') as file: file.write(decoded_data) # Transcribe the audio stream result = model.transcribe(tmp_file, initial_prompt=audio.initial_prompt) text = result["text"] os.remove(tmp_file) except Exception as e: lm.log(logging.ERROR, curr_module, 'Exception: ', str(e)) resp.text = '' resp.error_message = str(e) return resp lm.log(logging.INFO, curr_module, "read_audio complete with normal") resp.text = text resp.error_message = '' return resp