Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
Clark Lin
Fastapi Service
Commits
837bc14f
Commit
837bc14f
authored
May 16, 2024
by
Administrator
Browse files
added whisper service
parent
976b079b
Changes
2
Show whitespace changes
Inline
Side-by-side
fastapi_service.py
View file @
837bc14f
...
...
@@ -7,6 +7,7 @@
# Change History
# Version Date By Description
# 0.01 2024-04-24 Clark Lin Initial version
# 0.02 2024-05-16 Clark Lin Added whisper service
#
# Main features summary:
# - REST API for OAuth2 Authentication
...
...
@@ -26,7 +27,8 @@ import paddleocr_service
from
paddleocr_service
import
RawImage
from
typing
import
Annotated
from
fastapi.security
import
OAuth2PasswordBearer
,
OAuth2PasswordRequestForm
from
whisper_service
import
RawAudio
import
whisper_service
# ------------------------------------------------
# Init Global Variables
...
...
@@ -57,3 +59,12 @@ def get_access_token(form_data: Annotated[OAuth2PasswordRequestForm, Depends()])
def
read_image
(
token
:
Annotated
[
str
,
Depends
(
oauth2_scheme
)],
image
:
RawImage
):
return
paddleocr_service
.
read_image
(
token
,
image
)
# ------------------------------------------------
# Call Whisper Service
# ------------------------------------------------
@
app
.
post
(
"/whisper/read-audio"
)
def
read_audio
(
token
:
Annotated
[
str
,
Depends
(
oauth2_scheme
)],
audio
:
RawAudio
):
return
whisper_service
.
read_audio
(
token
,
audio
)
whisper_service.py
0 → 100644
View file @
837bc14f
# -*- coding: utf-8 -*-
# ------------------------------------------------------------------------------
# File Name: whisper_service.py
# Original Author: Clark Lin
# Email: clark_lin@outlook.com
#
# Change History
# Version Date By Description
# 0.01 2024-05-09 Clark Lin Initial version
#
# Main features summary:
# - Implementation of Whisper automatic speech recognition (ASR)
#
# Copyright Information:
# Copyright © 2024 Oasis
# Licensed TBD
# ------------------------------------------------------------------------------
# Common fastapi part
import
fastapi_security_util
from
pydantic
import
BaseModel
from
jose
import
JWTError
,
jwt
from
fastapi
import
HTTPException
,
status
# Common log part
import
os
import
logging
from
common.script.logging_manager
import
LoggingManager
curr_module
=
os
.
path
.
basename
(
__file__
)
# Initialize logging manager
lm
=
LoggingManager
.
get_instance
()
# lm = LoggingManager()
# Whisper part
import
whisper
import
ssl
import
base64
import
io
import
random
import
string
ssl
.
_create_default_https_context
=
ssl
.
_create_unverified_context
# ingore SSL verification
# ------------------------------------------------
# Model Definition
# ------------------------------------------------
class
RawAudio
(
BaseModel
):
model
:
str
audio_b64_string
:
str
initial_prompt
:
str
class
RespAudio
(
BaseModel
):
text
:
str
error_message
:
str
# ------------------------------------------------
# Sub Function - Verify Access Token
# ------------------------------------------------
def
verify_token
(
token
:
str
):
secret_key
,
client_db
=
fastapi_security_util
.
get_credentials
(
fastapi_security_util
.
credential_file
)
try
:
payload
=
jwt
.
decode
(
token
,
secret_key
,
algorithms
=
[
fastapi_security_util
.
algorithm
])
username
:
str
=
payload
.
get
(
"sub"
)
if
username
is
None
:
return
False
return
True
except
JWTError
:
lm
.
log
(
logging
.
ERROR
,
curr_module
,
'JWTError: '
,
str
(
JWTError
))
return
False
# ------------------------------------------------
# Sub Function - Read Image
# ------------------------------------------------
def
read_audio
(
token
:
str
,
audio
:
RawAudio
):
# lm.log(logging.INFO, curr_module, "token: " + token)
lm
.
log
(
logging
.
INFO
,
curr_module
,
"read_audio start"
)
if
not
verify_token
(
token
=
token
):
raise
HTTPException
(
status_code
=
status
.
HTTP_401_UNAUTHORIZED
,
detail
=
"Authentication Failed"
,
headers
=
{
"WWW-Authenticate"
:
"Bearer"
},
)
text
=
''
resp
=
RespAudio
(
text
=
''
,
error_message
=
''
)
try
:
# Choose model
model
=
whisper
.
load_model
(
audio
.
model
)
# Decode base64 string
decoded_data
=
base64
.
b64decode
(
audio
.
audio_b64_string
)
# Write the decoded data to a file
characters
=
string
.
ascii_letters
+
string
.
digits
random_string
=
''
.
join
(
random
.
choice
(
characters
)
for
i
in
range
(
16
))
tmp_file
=
'/tmp/whisper_'
+
random_string
with
open
(
tmp_file
,
'wb'
)
as
file
:
file
.
write
(
decoded_data
)
# Transcribe the audio stream
result
=
model
.
transcribe
(
tmp_file
,
initial_prompt
=
audio
.
initial_prompt
)
text
=
result
[
"text"
]
os
.
remove
(
tmp_file
)
except
Exception
as
e
:
lm
.
log
(
logging
.
ERROR
,
curr_module
,
'Exception: '
,
str
(
e
))
resp
.
text
=
''
resp
.
error_message
=
str
(
e
)
return
resp
lm
.
log
(
logging
.
INFO
,
curr_module
,
"read_audio complete with normal"
)
resp
.
text
=
text
resp
.
error_message
=
''
return
resp
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment