rahul-appu commited on
Commit
4c77e7e
·
verified ·
1 Parent(s): cc14a67

init commit

Browse files
Files changed (2) hide show
  1. app.py +60 -0
  2. utils.py +55 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from utils import rag_api, agent_api, base64_to_audio
4
+
5
+
6
+ # Helper functions
7
+ def assignment_part_1(input_text, language):
8
+ response = rag_api(input_text, language)
9
+ if response["audio"]:
10
+ output_audio = base64_to_audio(response["audio"])
11
+ return response, output_audio
12
+ else:
13
+ return response, None, None
14
+
15
+
16
+ def assignment_part_2(input_text, language):
17
+ response = agent_api(input_text, language)
18
+ if response["audio"]:
19
+ output_audio = base64_to_audio(response["audio"])
20
+ return response, output_audio
21
+ else:
22
+ return response, None, None
23
+
24
+
25
+ # Create the Gradio interface
26
+ with gr.Blocks() as block:
27
+
28
+ gr.Markdown("# <p style='text-align:center;'>Sarvam Intern Assignment</p>")
29
+
30
+ # User Query
31
+ input_text = gr.Textbox(label="Enter your query:")
32
+
33
+ # Language selection
34
+ language = gr.Dropdown(label="Select audio_language_code",
35
+ choices=["hi-IN", "bn-IN", "kn-IN", "ml-IN",
36
+ "mr-IN", "od-IN", "pa-IN", "ta-IN",
37
+ "te-IN", "gu-IN", "en-IN"],)
38
+
39
+ # Two buttons
40
+ with gr.Row():
41
+ button_1 = gr.Button("RAG")
42
+ button_2 = gr.Button("Agent + RAG")
43
+
44
+ # Output field
45
+ output_text = gr.JSON(label="Output")
46
+ output_audio = gr.Audio(type="numpy",
47
+ label="Output Audio",
48
+ interactive=False)
49
+
50
+ # Button click actions
51
+ button_1.click(fn=assignment_part_1,
52
+ inputs=[input_text, language],
53
+ outputs=[output_text, output_audio])
54
+
55
+ button_2.click(fn=assignment_part_2,
56
+ inputs=[input_text, language],
57
+ outputs=[output_text, output_audio])
58
+
59
+ # Launch the Gradio app
60
+ block.launch()
utils.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+
4
+ import io
5
+ import requests
6
+ import numpy as np
7
+ from scipy.io import wavfile
8
+ from dotenv import load_dotenv
9
+
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
+
14
+
15
+ URL = os.getenv("URL")
16
+
17
+
18
+ # RAG API
19
+ def rag_api(query, audio_language_code):
20
+
21
+ res = requests.post(f"{URL}/api/rag",
22
+ json={
23
+ 'query': query,
24
+ 'audio_language_code': audio_language_code
25
+ })
26
+
27
+ return res.json()
28
+
29
+
30
+ # Agent + RAG API
31
+ def agent_api(query, audio_language_code):
32
+
33
+ res = requests.post(f"{URL}/api/agent",
34
+ json={
35
+ 'query': query,
36
+ 'audio_language_code': audio_language_code
37
+ })
38
+
39
+ return res.json()
40
+
41
+
42
+ def base64_to_audio(base64_string):
43
+
44
+ try:
45
+ # Decode the base64 string
46
+ audio_data = base64.b64decode(base64_string) # Decode
47
+ audio_file = io.BytesIO(audio_data) # Convert to BytesIO object
48
+
49
+ sample_rate, samples = wavfile.read(audio_file)
50
+ samples_array = np.array(samples)
51
+
52
+ return sample_rate, samples_array
53
+ except Exception as e:
54
+ print(e)
55
+ return None, None