Multi-modal-o1-Chatbot-Secure

Sleeping

App Files Files Community

shukdevdatta123 commited on 20 days ago

Commit

0a8d0e4

verified ·

1 Parent(s): b042d28

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -88

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-from openai import OpenAI
 import base64
 from PIL import Image
 import io
@@ -27,7 +27,7 @@ def generate_mcq_quiz(pdf_content, num_questions, openai_api_key, model_choice):
     if not openai_api_key:
         return "Error: No API key provided."
-    client = OpenAI(api_key=openai_api_key)
     # Limit content length to avoid token limits
     limited_content = pdf_content[:8000] if len(pdf_content) > 8000 else pdf_content
@@ -50,13 +50,9 @@ Document content:
             {"role": "user", "content": prompt}
         ]
-        # Use appropriate model based on choice
-        model_name = "gpt-4" if model_choice == "o1" else "gpt-3.5-turbo"
-        response = client.chat.completions.create(
-            model=model_name,
-            messages=messages,
-            max_tokens=2000
         )
         return response.choices[0].message.content
@@ -68,48 +64,42 @@ def generate_response(input_text, image, pdf_content, openai_api_key, reasoning_
     if not openai_api_key:
         return "Error: No API key provided."
-    client = OpenAI(api_key=openai_api_key)
     # Process the input depending on whether it's text, image, or a PDF-related query
     if pdf_content and input_text:
         # For PDF queries, we combine the PDF content with the user's question
         prompt = f"Based on the following document content, please answer this question: '{input_text}'\n\nDocument content:\n{pdf_content}"
-        messages = [{"role": "user", "content": prompt}]
     elif image:
         # Convert the image to base64 string
-        image_base64 = get_base64_string_from_image(image)
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": input_text or "Please describe this image."},
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": f"data:image/png;base64,{image_base64}"
-                        }
-                    }
-                ]
-            }
-        ]
     else:
         # Plain text input
-        messages = [{"role": "user", "content": input_text}]
-    try:
-        # Use appropriate model based on choice
-        if model_choice == "o1" and image:
-            model_name = "gpt-4-vision-preview"
-        elif model_choice == "o1":
-            model_name = "gpt-4"
         else:
-            model_name = "gpt-3.5-turbo"
         # Call OpenAI API with the selected model
-        response = client.chat.completions.create(
-            model=model_name,
             messages=messages,
-            max_tokens=2000
         )
         return response.choices[0].message.content
@@ -130,17 +120,20 @@ def transcribe_audio(audio, openai_api_key):
     if not openai_api_key:
         return "Error: No API key provided."
-    client = OpenAI(api_key=openai_api_key)
     try:
         # Open the audio file and pass it as a file object
         with open(audio, 'rb') as audio_file:
-            # Transcribe the audio to text using OpenAI's whisper model
-            transcript = client.audio.transcriptions.create(
-                model="whisper-1",
-                file=audio_file
-            )
-            return transcript.text
     except Exception as e:
         return f"Error transcribing audio: {str(e)}"
@@ -195,50 +188,15 @@ def process_pdf(pdf_file):
 # Function to update visible components based on input type selection
 def update_input_type(choice):
     if choice == "Text":
-        return (
-            gr.update(visible=True),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            False
-        )
     elif choice == "Image":
-        return (
-            gr.update(visible=True),
-            gr.update(visible=True),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            False
-        )
     elif choice == "Voice":
-        return (
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=True),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            False
-        )
     elif choice == "PDF":
-        return (
-            gr.update(visible=True),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=True),
-            gr.update(visible=False),
-            False
-        )
     elif choice == "PDF(QUIZ)":
-        return (
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=True),
-            gr.update(visible=True),
-            True
-        )
 # Custom CSS styles with animations and button colors
 custom_css = """
@@ -486,9 +444,13 @@ def create_interface():
                 label="Number of Quiz Questions",
                 visible=False
             )
-        # State variable for quiz mode (not visible)
-        quiz_mode = gr.State(False)
         with gr.Row():
             reasoning_effort = gr.Dropdown(

 import gradio as gr
+import openai
 import base64
 from PIL import Image
 import io
     if not openai_api_key:
         return "Error: No API key provided."
+    openai.api_key = openai_api_key
     # Limit content length to avoid token limits
     limited_content = pdf_content[:8000] if len(pdf_content) > 8000 else pdf_content
             {"role": "user", "content": prompt}
         ]
+        response = openai.ChatCompletion.create(
+            model=model_choice,
+            messages=messages
         )
         return response.choices[0].message.content
     if not openai_api_key:
         return "Error: No API key provided."
+    openai.api_key = openai_api_key
     # Process the input depending on whether it's text, image, or a PDF-related query
     if pdf_content and input_text:
         # For PDF queries, we combine the PDF content with the user's question
         prompt = f"Based on the following document content, please answer this question: '{input_text}'\n\nDocument content:\n{pdf_content}"
+        input_content = prompt
     elif image:
         # Convert the image to base64 string
+        image_info = get_base64_string_from_image(image)
+        input_content = f"data:image/png;base64,{image_info}"
     else:
         # Plain text input
+        input_content = input_text
+    # Prepare the messages for OpenAI API
+    if model_choice == "o1":
+        if image and not pdf_content:
+            messages = [
+                {"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_content}}]}
+            ]
         else:
+            messages = [
+                {"role": "user", "content": input_content}
+            ]
+    elif model_choice == "o3-mini":
+        messages = [
+            {"role": "user", "content": input_content}
+        ]
+    try:
         # Call OpenAI API with the selected model
+        response = openai.ChatCompletion.create(
+            model=model_choice,
             messages=messages,
+            max_completion_tokens=2000
         )
         return response.choices[0].message.content
     if not openai_api_key:
         return "Error: No API key provided."
+    openai.api_key = openai_api_key
     try:
         # Open the audio file and pass it as a file object
         with open(audio, 'rb') as audio_file:
+            audio_file_content = audio_file.read()
+        # Use the correct transcription API call
+        audio_file_obj = io.BytesIO(audio_file_content)
+        audio_file_obj.name = 'audio.wav'  # Set a name for the file object (as OpenAI expects it)
+        # Transcribe the audio to text using OpenAI's whisper model
+        audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
+        return audio_file_transcription.text
     except Exception as e:
         return f"Error transcribing audio: {str(e)}"
 # Function to update visible components based on input type selection
 def update_input_type(choice):
     if choice == "Text":
+        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
     elif choice == "Image":
+        return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
     elif choice == "Voice":
+        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
     elif choice == "PDF":
+        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(value=False)
     elif choice == "PDF(QUIZ)":
+        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(value=True)
 # Custom CSS styles with animations and button colors
 custom_css = """
                 label="Number of Quiz Questions",
                 visible=False
             )
+            # Hidden state for quiz mode
+            quiz_mode = gr.Checkbox(
+                label="Quiz Mode",
+                visible=False,
+                value=False
+            )
         with gr.Row():
             reasoning_effort = gr.Dropdown(