Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
import numpy as np
|
4 |
+
import whisper
|
5 |
+
from openai import OpenAI
|
6 |
+
from transformers import pipeline
|
7 |
+
from scipy.io.wavfile import write
|
8 |
+
client = OpenAI()
|
9 |
+
|
10 |
+
def process_transaction_details(transcribed_text):
|
11 |
+
'''
|
12 |
+
Extract the transaction details from the given transcribed text and return them as a JSON
|
13 |
+
Input:
|
14 |
+
transcribed_text (str): The transcribed text to process
|
15 |
+
Output:
|
16 |
+
dict: A JSON object containing the transaction details
|
17 |
+
'''
|
18 |
+
prompt = f"Extract the transaction details from the following sentence and categorize the transaction based on the description. Format the response as JSON with fields for 'amount', 'description', and 'category'. Sentence: '{transcribed_text}'."
|
19 |
+
|
20 |
+
try:
|
21 |
+
response = client.chat.completions.create(
|
22 |
+
model="gpt-3.5-turbo-0125",
|
23 |
+
response_format={ "type": "json_object" },
|
24 |
+
messages=[
|
25 |
+
{"role": "system", "content": "You are a helpful assistant designed to output JSON."},
|
26 |
+
{"role": "user", "content": prompt}
|
27 |
+
]
|
28 |
+
)
|
29 |
+
# print(response.choices[0].message.content)
|
30 |
+
return response.choices[0].message.content
|
31 |
+
except Exception as e:
|
32 |
+
print(f"An error occurred: {e}")
|
33 |
+
return {}
|
34 |
+
|
35 |
+
def transcribe(audio,key):
|
36 |
+
if audio is None:
|
37 |
+
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
38 |
+
if key != "None":
|
39 |
+
print(os.environ['OPENAI_API_KEY'])
|
40 |
+
openai.api_key=key
|
41 |
+
print(os.environ['OPENAI_API_KEY'])
|
42 |
+
sr, y = audio
|
43 |
+
y = y.astype(np.float32)
|
44 |
+
y /= np.max(np.abs(y))
|
45 |
+
# model = whisper.load_model("base") # or "small", "medium", "large", depending on your requirement
|
46 |
+
# result = model.transcribe(y)
|
47 |
+
temp_filename = "temp_audio.wav"
|
48 |
+
write(temp_filename, sr, (y * 32767).astype(np.int16))
|
49 |
+
|
50 |
+
# Load and transcribe the audio using Whisper
|
51 |
+
model = whisper.load_model("base") # Adjust the model size as needed
|
52 |
+
result = model.transcribe(temp_filename)
|
53 |
+
|
54 |
+
return process_transaction_details(result['text'])
|
55 |
+
|
56 |
+
demo = gr.Interface(
|
57 |
+
transcribe,
|
58 |
+
[
|
59 |
+
gr.Audio(sources=["microphone"],max_length=10),
|
60 |
+
gr.Textbox(label="Your OpenAI Key!!",value="None"),
|
61 |
+
],
|
62 |
+
"json",
|
63 |
+
)
|
64 |
+
|
65 |
+
if __name__ == "__main__":
|
66 |
+
demo.launch()
|