import os import gradio as gr import numpy as np import whisper from openai import OpenAI from scipy.io.wavfile import write client = OpenAI() def process_transaction_details(transcribed_text): ''' Extract the transaction details from the given transcribed text and return them as a JSON Input: transcribed_text (str): The transcribed text to process Output: dict: A JSON object containing the transaction details ''' prompt = f"Extract the transaction details from the following sentence and categorize the transaction based on the description. Format the response as JSON with fields for 'amount', 'description', and 'category'. Sentence: '{transcribed_text}'." try: response = client.chat.completions.create( model="gpt-3.5-turbo-0125", response_format={ "type": "json_object" }, messages=[ {"role": "system", "content": "You are a helpful assistant designed to output JSON."}, {"role": "user", "content": prompt} ] ) # print(response.choices[0].message.content) return response.choices[0].message.content except Exception as e: print(f"An error occurred: {e}") return {} def transcribe(audio): if audio is None: raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.") sr, y = audio y = y.astype(np.float32) y /= np.max(np.abs(y)) model = whisper.load_model("base") # or "small", "medium", "large", depending on your requirement temp_filename = "temp_audio.wav" write(temp_filename, sr, (y * 32767).astype(np.int16)) result = model.transcribe(temp_filename) return process_transaction_details(result['text']) demo = gr.Interface( transcribe, gr.Audio(sources=["microphone"],max_length=10), "json", ) if __name__ == "__main__": demo.launch()