Spaces:
Sleeping
Sleeping
File size: 1,932 Bytes
28a765d 3c5bab1 28a765d d16605a 28a765d 46b0abc 28a765d 3c5bab1 28a765d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import os
import gradio as gr
import numpy as np
import whisper
from openai import OpenAI
from scipy.io.wavfile import write
client = OpenAI()
def process_transaction_details(transcribed_text):
'''
Extract the transaction details from the given transcribed text and return them as a JSON
Input:
transcribed_text (str): The transcribed text to process
Output:
dict: A JSON object containing the transaction details
'''
prompt = f"Extract the transaction details from the following sentence and categorize the transaction based on the description. Format the response as JSON with fields for 'amount', 'description', and 'category'. Sentence: '{transcribed_text}'."
try:
response = client.chat.completions.create(
model="gpt-3.5-turbo-0125",
response_format={ "type": "json_object" },
messages=[
{"role": "system", "content": "You are a helpful assistant designed to output JSON."},
{"role": "user", "content": prompt}
]
)
# print(response.choices[0].message.content)
return response.choices[0].message.content
except Exception as e:
print(f"An error occurred: {e}")
return {}
def transcribe(audio):
if audio is None:
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
model = whisper.load_model("base") # or "small", "medium", "large", depending on your requirement
temp_filename = "temp_audio.wav"
write(temp_filename, sr, (y * 32767).astype(np.int16))
result = model.transcribe(temp_filename)
return process_transaction_details(result['text'])
demo = gr.Interface(
transcribe,
gr.Audio(sources=["microphone"],max_length=10),
"json",
)
if __name__ == "__main__":
demo.launch() |