boi-doingthings commited on
Commit
28a765d
·
verified ·
1 Parent(s): fcc3d7e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import numpy as np
4
+ import whisper
5
+ from openai import OpenAI
6
+ from transformers import pipeline
7
+ from scipy.io.wavfile import write
8
+ client = OpenAI()
9
+
10
+ def process_transaction_details(transcribed_text):
11
+ '''
12
+ Extract the transaction details from the given transcribed text and return them as a JSON
13
+ Input:
14
+ transcribed_text (str): The transcribed text to process
15
+ Output:
16
+ dict: A JSON object containing the transaction details
17
+ '''
18
+ prompt = f"Extract the transaction details from the following sentence and categorize the transaction based on the description. Format the response as JSON with fields for 'amount', 'description', and 'category'. Sentence: '{transcribed_text}'."
19
+
20
+ try:
21
+ response = client.chat.completions.create(
22
+ model="gpt-3.5-turbo-0125",
23
+ response_format={ "type": "json_object" },
24
+ messages=[
25
+ {"role": "system", "content": "You are a helpful assistant designed to output JSON."},
26
+ {"role": "user", "content": prompt}
27
+ ]
28
+ )
29
+ # print(response.choices[0].message.content)
30
+ return response.choices[0].message.content
31
+ except Exception as e:
32
+ print(f"An error occurred: {e}")
33
+ return {}
34
+
35
+ def transcribe(audio,key):
36
+ if audio is None:
37
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
38
+ if key != "None":
39
+ print(os.environ['OPENAI_API_KEY'])
40
+ openai.api_key=key
41
+ print(os.environ['OPENAI_API_KEY'])
42
+ sr, y = audio
43
+ y = y.astype(np.float32)
44
+ y /= np.max(np.abs(y))
45
+ # model = whisper.load_model("base") # or "small", "medium", "large", depending on your requirement
46
+ # result = model.transcribe(y)
47
+ temp_filename = "temp_audio.wav"
48
+ write(temp_filename, sr, (y * 32767).astype(np.int16))
49
+
50
+ # Load and transcribe the audio using Whisper
51
+ model = whisper.load_model("base") # Adjust the model size as needed
52
+ result = model.transcribe(temp_filename)
53
+
54
+ return process_transaction_details(result['text'])
55
+
56
+ demo = gr.Interface(
57
+ transcribe,
58
+ [
59
+ gr.Audio(sources=["microphone"],max_length=10),
60
+ gr.Textbox(label="Your OpenAI Key!!",value="None"),
61
+ ],
62
+ "json",
63
+ )
64
+
65
+ if __name__ == "__main__":
66
+ demo.launch()