shukdevdatta123 commited on
Commit
0a8d0e4
·
verified ·
1 Parent(s): b042d28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -88
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from openai import OpenAI
3
  import base64
4
  from PIL import Image
5
  import io
@@ -27,7 +27,7 @@ def generate_mcq_quiz(pdf_content, num_questions, openai_api_key, model_choice):
27
  if not openai_api_key:
28
  return "Error: No API key provided."
29
 
30
- client = OpenAI(api_key=openai_api_key)
31
 
32
  # Limit content length to avoid token limits
33
  limited_content = pdf_content[:8000] if len(pdf_content) > 8000 else pdf_content
@@ -50,13 +50,9 @@ Document content:
50
  {"role": "user", "content": prompt}
51
  ]
52
 
53
- # Use appropriate model based on choice
54
- model_name = "gpt-4" if model_choice == "o1" else "gpt-3.5-turbo"
55
-
56
- response = client.chat.completions.create(
57
- model=model_name,
58
- messages=messages,
59
- max_tokens=2000
60
  )
61
 
62
  return response.choices[0].message.content
@@ -68,48 +64,42 @@ def generate_response(input_text, image, pdf_content, openai_api_key, reasoning_
68
  if not openai_api_key:
69
  return "Error: No API key provided."
70
 
71
- client = OpenAI(api_key=openai_api_key)
72
 
73
  # Process the input depending on whether it's text, image, or a PDF-related query
74
  if pdf_content and input_text:
75
  # For PDF queries, we combine the PDF content with the user's question
76
  prompt = f"Based on the following document content, please answer this question: '{input_text}'\n\nDocument content:\n{pdf_content}"
77
- messages = [{"role": "user", "content": prompt}]
78
  elif image:
79
  # Convert the image to base64 string
80
- image_base64 = get_base64_string_from_image(image)
81
- messages = [
82
- {
83
- "role": "user",
84
- "content": [
85
- {"type": "text", "text": input_text or "Please describe this image."},
86
- {
87
- "type": "image_url",
88
- "image_url": {
89
- "url": f"data:image/png;base64,{image_base64}"
90
- }
91
- }
92
- ]
93
- }
94
- ]
95
  else:
96
  # Plain text input
97
- messages = [{"role": "user", "content": input_text}]
98
 
99
- try:
100
- # Use appropriate model based on choice
101
- if model_choice == "o1" and image:
102
- model_name = "gpt-4-vision-preview"
103
- elif model_choice == "o1":
104
- model_name = "gpt-4"
105
  else:
106
- model_name = "gpt-3.5-turbo"
107
-
 
 
 
 
 
 
 
108
  # Call OpenAI API with the selected model
109
- response = client.chat.completions.create(
110
- model=model_name,
111
  messages=messages,
112
- max_tokens=2000
113
  )
114
 
115
  return response.choices[0].message.content
@@ -130,17 +120,20 @@ def transcribe_audio(audio, openai_api_key):
130
  if not openai_api_key:
131
  return "Error: No API key provided."
132
 
133
- client = OpenAI(api_key=openai_api_key)
134
 
135
  try:
136
  # Open the audio file and pass it as a file object
137
  with open(audio, 'rb') as audio_file:
138
- # Transcribe the audio to text using OpenAI's whisper model
139
- transcript = client.audio.transcriptions.create(
140
- model="whisper-1",
141
- file=audio_file
142
- )
143
- return transcript.text
 
 
 
144
  except Exception as e:
145
  return f"Error transcribing audio: {str(e)}"
146
 
@@ -195,50 +188,15 @@ def process_pdf(pdf_file):
195
  # Function to update visible components based on input type selection
196
  def update_input_type(choice):
197
  if choice == "Text":
198
- return (
199
- gr.update(visible=True),
200
- gr.update(visible=False),
201
- gr.update(visible=False),
202
- gr.update(visible=False),
203
- gr.update(visible=False),
204
- False
205
- )
206
  elif choice == "Image":
207
- return (
208
- gr.update(visible=True),
209
- gr.update(visible=True),
210
- gr.update(visible=False),
211
- gr.update(visible=False),
212
- gr.update(visible=False),
213
- False
214
- )
215
  elif choice == "Voice":
216
- return (
217
- gr.update(visible=False),
218
- gr.update(visible=False),
219
- gr.update(visible=True),
220
- gr.update(visible=False),
221
- gr.update(visible=False),
222
- False
223
- )
224
  elif choice == "PDF":
225
- return (
226
- gr.update(visible=True),
227
- gr.update(visible=False),
228
- gr.update(visible=False),
229
- gr.update(visible=True),
230
- gr.update(visible=False),
231
- False
232
- )
233
  elif choice == "PDF(QUIZ)":
234
- return (
235
- gr.update(visible=False),
236
- gr.update(visible=False),
237
- gr.update(visible=False),
238
- gr.update(visible=True),
239
- gr.update(visible=True),
240
- True
241
- )
242
 
243
  # Custom CSS styles with animations and button colors
244
  custom_css = """
@@ -486,9 +444,13 @@ def create_interface():
486
  label="Number of Quiz Questions",
487
  visible=False
488
  )
489
-
490
- # State variable for quiz mode (not visible)
491
- quiz_mode = gr.State(False)
 
 
 
 
492
 
493
  with gr.Row():
494
  reasoning_effort = gr.Dropdown(
 
1
  import gradio as gr
2
+ import openai
3
  import base64
4
  from PIL import Image
5
  import io
 
27
  if not openai_api_key:
28
  return "Error: No API key provided."
29
 
30
+ openai.api_key = openai_api_key
31
 
32
  # Limit content length to avoid token limits
33
  limited_content = pdf_content[:8000] if len(pdf_content) > 8000 else pdf_content
 
50
  {"role": "user", "content": prompt}
51
  ]
52
 
53
+ response = openai.ChatCompletion.create(
54
+ model=model_choice,
55
+ messages=messages
 
 
 
 
56
  )
57
 
58
  return response.choices[0].message.content
 
64
  if not openai_api_key:
65
  return "Error: No API key provided."
66
 
67
+ openai.api_key = openai_api_key
68
 
69
  # Process the input depending on whether it's text, image, or a PDF-related query
70
  if pdf_content and input_text:
71
  # For PDF queries, we combine the PDF content with the user's question
72
  prompt = f"Based on the following document content, please answer this question: '{input_text}'\n\nDocument content:\n{pdf_content}"
73
+ input_content = prompt
74
  elif image:
75
  # Convert the image to base64 string
76
+ image_info = get_base64_string_from_image(image)
77
+ input_content = f"data:image/png;base64,{image_info}"
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  else:
79
  # Plain text input
80
+ input_content = input_text
81
 
82
+ # Prepare the messages for OpenAI API
83
+ if model_choice == "o1":
84
+ if image and not pdf_content:
85
+ messages = [
86
+ {"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_content}}]}
87
+ ]
88
  else:
89
+ messages = [
90
+ {"role": "user", "content": input_content}
91
+ ]
92
+ elif model_choice == "o3-mini":
93
+ messages = [
94
+ {"role": "user", "content": input_content}
95
+ ]
96
+
97
+ try:
98
  # Call OpenAI API with the selected model
99
+ response = openai.ChatCompletion.create(
100
+ model=model_choice,
101
  messages=messages,
102
+ max_completion_tokens=2000
103
  )
104
 
105
  return response.choices[0].message.content
 
120
  if not openai_api_key:
121
  return "Error: No API key provided."
122
 
123
+ openai.api_key = openai_api_key
124
 
125
  try:
126
  # Open the audio file and pass it as a file object
127
  with open(audio, 'rb') as audio_file:
128
+ audio_file_content = audio_file.read()
129
+
130
+ # Use the correct transcription API call
131
+ audio_file_obj = io.BytesIO(audio_file_content)
132
+ audio_file_obj.name = 'audio.wav' # Set a name for the file object (as OpenAI expects it)
133
+
134
+ # Transcribe the audio to text using OpenAI's whisper model
135
+ audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
136
+ return audio_file_transcription.text
137
  except Exception as e:
138
  return f"Error transcribing audio: {str(e)}"
139
 
 
188
  # Function to update visible components based on input type selection
189
  def update_input_type(choice):
190
  if choice == "Text":
191
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
 
 
 
 
 
 
 
192
  elif choice == "Image":
193
+ return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
 
 
 
 
 
 
 
194
  elif choice == "Voice":
195
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
 
 
 
 
 
 
 
196
  elif choice == "PDF":
197
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(value=False)
 
 
 
 
 
 
 
198
  elif choice == "PDF(QUIZ)":
199
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(value=True)
 
 
 
 
 
 
 
200
 
201
  # Custom CSS styles with animations and button colors
202
  custom_css = """
 
444
  label="Number of Quiz Questions",
445
  visible=False
446
  )
447
+
448
+ # Hidden state for quiz mode
449
+ quiz_mode = gr.Checkbox(
450
+ label="Quiz Mode",
451
+ visible=False,
452
+ value=False
453
+ )
454
 
455
  with gr.Row():
456
  reasoning_effort = gr.Dropdown(