File size: 1,932 Bytes
28a765d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c5bab1
28a765d
 
d16605a
28a765d
 
 
46b0abc
28a765d
 
 
 
 
 
 
 
3c5bab1
28a765d
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
import gradio as gr
import numpy as np
import whisper
from openai import OpenAI
from scipy.io.wavfile import write
client = OpenAI()

def process_transaction_details(transcribed_text):
    '''
    Extract the transaction details from the given transcribed text and return them as a JSON
    Input:
        transcribed_text (str): The transcribed text to process
    Output:
        dict: A JSON object containing the transaction details
    '''
    prompt = f"Extract the transaction details from the following sentence and categorize the transaction based on the description. Format the response as JSON with fields for 'amount', 'description', and 'category'. Sentence: '{transcribed_text}'."

    try:
        response = client.chat.completions.create(
        model="gpt-3.5-turbo-0125",
        response_format={ "type": "json_object" },
        messages=[
            {"role": "system", "content": "You are a helpful assistant designed to output JSON."},
            {"role": "user", "content": prompt}
        ]
        )
        # print(response.choices[0].message.content)
        return response.choices[0].message.content
    except Exception as e:
        print(f"An error occurred: {e}")
        return {}

def transcribe(audio):
    if audio is None:
        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
        
    sr, y = audio
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))
    model = whisper.load_model("base")  # or "small", "medium", "large", depending on your requirement
    temp_filename = "temp_audio.wav"
    write(temp_filename, sr, (y * 32767).astype(np.int16))
    result = model.transcribe(temp_filename)
    
    return process_transaction_details(result['text'])

demo = gr.Interface(
    transcribe,
    gr.Audio(sources=["microphone"],max_length=10),
    "json",
)

if __name__ == "__main__":
    demo.launch()