Spaces:

boi-doingthings
/

expense-converter

Sleeping

App Files Files Community

boi-doingthings commited on Mar 10, 2024

Commit

28a765d

verified ·

1 Parent(s): fcc3d7e

Create app.py

Browse files

Files changed (1) hide show

app.py +66 -0

app.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import os
+import gradio as gr
+import numpy as np
+import whisper
+from openai import OpenAI
+from transformers import pipeline
+from scipy.io.wavfile import write
+client = OpenAI()
+def process_transaction_details(transcribed_text):
+    '''
+    Extract the transaction details from the given transcribed text and return them as a JSON
+    Input:
+        transcribed_text (str): The transcribed text to process
+    Output:
+        dict: A JSON object containing the transaction details
+    '''
+    prompt = f"Extract the transaction details from the following sentence and categorize the transaction based on the description. Format the response as JSON with fields for 'amount', 'description', and 'category'. Sentence: '{transcribed_text}'."
+    try:
+        response = client.chat.completions.create(
+        model="gpt-3.5-turbo-0125",
+        response_format={ "type": "json_object" },
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant designed to output JSON."},
+            {"role": "user", "content": prompt}
+        ]
+        )
+        # print(response.choices[0].message.content)
+        return response.choices[0].message.content
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return {}
+def transcribe(audio,key):
+    if audio is None:
+        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
+    if key != "None":
+        print(os.environ['OPENAI_API_KEY'])
+        openai.api_key=key
+        print(os.environ['OPENAI_API_KEY'])
+    sr, y = audio
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    # model = whisper.load_model("base")  # or "small", "medium", "large", depending on your requirement
+    # result = model.transcribe(y)
+    temp_filename = "temp_audio.wav"
+    write(temp_filename, sr, (y * 32767).astype(np.int16))
+    # Load and transcribe the audio using Whisper
+    model = whisper.load_model("base")  # Adjust the model size as needed
+    result = model.transcribe(temp_filename)
+    return process_transaction_details(result['text'])
+demo = gr.Interface(
+    transcribe,
+    [
+        gr.Audio(sources=["microphone"],max_length=10),
+        gr.Textbox(label="Your OpenAI Key!!",value="None"),
+    ],
+    "json",
+)
+if __name__ == "__main__":
+    demo.launch()