added whisper service

837bc14f · Administrator · 976b079b · 837bc14f · 837bc14f
Commit 837bc14f authored May 16, 2024 by Administrator
--- a/fastapi_service.py
+++ b/fastapi_service.py
@@ -7,6 +7,7 @@
 # Change History
 # Version   Date        By          Description
 # 0.01      2024-04-24  Clark Lin   Initial version
+# 0.02      2024-05-16  Clark Lin   Added whisper service
 # 
 # Main features summary:
 # - REST API for OAuth2 Authentication
@@ -26,7 +27,8 @@ import paddleocr_service
 from paddleocr_service import RawImage
 from typing import Annotated
 from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
+from whisper_service import RawAudio
+import whisper_service
 # ------------------------------------------------
 # Init Global Variables
@@ -57,3 +59,12 @@ def get_access_token(form_data: Annotated[OAuth2PasswordRequestForm, Depends()])
 def read_image(token: Annotated[str, Depends(oauth2_scheme)], image:RawImage):
    return paddleocr_service.read_image(token, image)
+# ------------------------------------------------
+# Call Whisper Service
+# ------------------------------------------------
+@app.post("/whisper/read-audio")
+def read_audio(token: Annotated[str, Depends(oauth2_scheme)], audio:RawAudio):
+    return whisper_service.read_audio(token, audio)
--- a/whisper_service.py
+++ b/whisper_service.py
+# -*- coding: utf-8 -*-
+# ------------------------------------------------------------------------------
+# File Name: whisper_service.py
+# Original Author: Clark Lin
+# Email: clark_lin@outlook.com
+#
+# Change History
+# Version   Date        By          Description
+# 0.01      2024-05-09  Clark Lin   Initial version
+# 
+# Main features summary:
+# - Implementation of Whisper automatic speech recognition (ASR)
+# 
+# Copyright Information:
+# Copyright © 2024 Oasis
+# Licensed TBD
+# ------------------------------------------------------------------------------
+# Common fastapi part
+import fastapi_security_util
+from pydantic import BaseModel
+from jose import JWTError, jwt
+from fastapi import HTTPException, status
+# Common log part
+import os
+import logging
+from common.script.logging_manager import LoggingManager
+curr_module = os.path.basename(__file__)    # Initialize logging manager
+lm = LoggingManager.get_instance()          # lm = LoggingManager()
+# Whisper part
+import whisper
+import ssl
+import base64
+import io
+import random
+import string
+ssl._create_default_https_context = ssl._create_unverified_context  # ingore SSL verification
+# ------------------------------------------------
+# Model Definition
+# ------------------------------------------------
+class RawAudio(BaseModel):
+    model:              str
+    audio_b64_string:   str
+    initial_prompt:     str
+class RespAudio(BaseModel):
+    text:           str
+    error_message:  str
+# ------------------------------------------------
+# Sub Function - Verify Access Token
+# ------------------------------------------------
+def verify_token(token: str):
+    secret_key, client_db = fastapi_security_util.get_credentials(fastapi_security_util.credential_file)
+    try:
+        payload = jwt.decode(token, secret_key, algorithms=[fastapi_security_util.algorithm])
+        username: str = payload.get("sub")
+        if username is None:
+            return False
+        return True
+    except JWTError:
+        lm.log(logging.ERROR, curr_module, 'JWTError: ', str(JWTError))
+        return False
+# ------------------------------------------------
+# Sub Function - Read Image
+# ------------------------------------------------
+def read_audio(token: str, audio: RawAudio):
+    # lm.log(logging.INFO, curr_module, "token: " + token)
+    lm.log(logging.INFO, curr_module, "read_audio start")
+    if not verify_token(token = token):
+        raise HTTPException(
+            status_code = status.HTTP_401_UNAUTHORIZED,
+            detail = "Authentication Failed",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
+    text = ''
+    resp = RespAudio(
+        text = '',
+        error_message = ''
+    )
+    try:
+        # Choose model
+        model = whisper.load_model(audio.model)
+        # Decode base64 string
+        decoded_data = base64.b64decode(audio.audio_b64_string)
+        # Write the decoded data to a file
+        characters = string.ascii_letters + string.digits
+        random_string = ''.join(random.choice(characters) for i in range(16))
+        tmp_file = '/tmp/whisper_' + random_string
+        with open(tmp_file, 'wb') as file:
+            file.write(decoded_data)
+        # Transcribe the audio stream
+        result = model.transcribe(tmp_file, initial_prompt=audio.initial_prompt)
+        text = result["text"]
+        os.remove(tmp_file)
+    except Exception as e:
+        lm.log(logging.ERROR, curr_module, 'Exception: ', str(e))
+        resp.text = ''
+        resp.error_message = str(e)
+        return resp
+    lm.log(logging.INFO, curr_module, "read_audio complete with normal")
+    resp.text = text
+    resp.error_message = ''
+    return resp