Commit 837bc14f authored by Administrator's avatar Administrator
Browse files

added whisper service

parent 976b079b
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
# Change History # Change History
# Version Date By Description # Version Date By Description
# 0.01 2024-04-24 Clark Lin Initial version # 0.01 2024-04-24 Clark Lin Initial version
# 0.02 2024-05-16 Clark Lin Added whisper service
# #
# Main features summary: # Main features summary:
# - REST API for OAuth2 Authentication # - REST API for OAuth2 Authentication
...@@ -26,7 +27,8 @@ import paddleocr_service ...@@ -26,7 +27,8 @@ import paddleocr_service
from paddleocr_service import RawImage from paddleocr_service import RawImage
from typing import Annotated from typing import Annotated
from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
from whisper_service import RawAudio
import whisper_service
# ------------------------------------------------ # ------------------------------------------------
# Init Global Variables # Init Global Variables
...@@ -57,3 +59,12 @@ def get_access_token(form_data: Annotated[OAuth2PasswordRequestForm, Depends()]) ...@@ -57,3 +59,12 @@ def get_access_token(form_data: Annotated[OAuth2PasswordRequestForm, Depends()])
def read_image(token: Annotated[str, Depends(oauth2_scheme)], image:RawImage): def read_image(token: Annotated[str, Depends(oauth2_scheme)], image:RawImage):
return paddleocr_service.read_image(token, image) return paddleocr_service.read_image(token, image)
# ------------------------------------------------
# Call Whisper Service
# ------------------------------------------------
@app.post("/whisper/read-audio")
def read_audio(token: Annotated[str, Depends(oauth2_scheme)], audio:RawAudio):
return whisper_service.read_audio(token, audio)
# -*- coding: utf-8 -*-
# ------------------------------------------------------------------------------
# File Name: whisper_service.py
# Original Author: Clark Lin
# Email: clark_lin@outlook.com
#
# Change History
# Version Date By Description
# 0.01 2024-05-09 Clark Lin Initial version
#
# Main features summary:
# - Implementation of Whisper automatic speech recognition (ASR)
#
# Copyright Information:
# Copyright © 2024 Oasis
# Licensed TBD
# ------------------------------------------------------------------------------
# Common fastapi part
import fastapi_security_util
from pydantic import BaseModel
from jose import JWTError, jwt
from fastapi import HTTPException, status
# Common log part
import os
import logging
from common.script.logging_manager import LoggingManager
curr_module = os.path.basename(__file__) # Initialize logging manager
lm = LoggingManager.get_instance() # lm = LoggingManager()
# Whisper part
import whisper
import ssl
import base64
import io
import random
import string
ssl._create_default_https_context = ssl._create_unverified_context # ingore SSL verification
# ------------------------------------------------
# Model Definition
# ------------------------------------------------
class RawAudio(BaseModel):
model: str
audio_b64_string: str
initial_prompt: str
class RespAudio(BaseModel):
text: str
error_message: str
# ------------------------------------------------
# Sub Function - Verify Access Token
# ------------------------------------------------
def verify_token(token: str):
secret_key, client_db = fastapi_security_util.get_credentials(fastapi_security_util.credential_file)
try:
payload = jwt.decode(token, secret_key, algorithms=[fastapi_security_util.algorithm])
username: str = payload.get("sub")
if username is None:
return False
return True
except JWTError:
lm.log(logging.ERROR, curr_module, 'JWTError: ', str(JWTError))
return False
# ------------------------------------------------
# Sub Function - Read Image
# ------------------------------------------------
def read_audio(token: str, audio: RawAudio):
# lm.log(logging.INFO, curr_module, "token: " + token)
lm.log(logging.INFO, curr_module, "read_audio start")
if not verify_token(token = token):
raise HTTPException(
status_code = status.HTTP_401_UNAUTHORIZED,
detail = "Authentication Failed",
headers={"WWW-Authenticate": "Bearer"},
)
text = ''
resp = RespAudio(
text = '',
error_message = ''
)
try:
# Choose model
model = whisper.load_model(audio.model)
# Decode base64 string
decoded_data = base64.b64decode(audio.audio_b64_string)
# Write the decoded data to a file
characters = string.ascii_letters + string.digits
random_string = ''.join(random.choice(characters) for i in range(16))
tmp_file = '/tmp/whisper_' + random_string
with open(tmp_file, 'wb') as file:
file.write(decoded_data)
# Transcribe the audio stream
result = model.transcribe(tmp_file, initial_prompt=audio.initial_prompt)
text = result["text"]
os.remove(tmp_file)
except Exception as e:
lm.log(logging.ERROR, curr_module, 'Exception: ', str(e))
resp.text = ''
resp.error_message = str(e)
return resp
lm.log(logging.INFO, curr_module, "read_audio complete with normal")
resp.text = text
resp.error_message = ''
return resp
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment