Spaces:

Ayush0804
/

mathChatBot

Sleeping

App Files Files Community

Ayush0804 commited on Dec 13, 2024

Commit

958f56e

verified ·

1 Parent(s): 079ef13

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -66

app.py CHANGED Viewed

@@ -6,85 +6,87 @@ import tempfile
 from pathlib import Path
 import secrets
-# Initialising huggingface pipelines
 image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
 math_reasoning = pipeline("text2text-generation", model="google/flan-t5-large")
-# Helper function to process images
 def process_image(image, should_convert=False):
-    '''
-    Saves an uploaded image and utilises image-to-text pipeline for math-related descriptions
-    :param image:
-    :param should_convert:
-    :return: pipeline's output
-    '''
-    # creating a temporary directory for saving images
-    uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str(Path(tempfile.gettempdir()) / "gradio")
     os.makedirs(uploaded_file_dir, exist_ok=True)
     # Save the uploaded image as a temporary file
     name = f"tmp{secrets.token_hex(8)}.jpg"
     filename = os.path.join(uploaded_file_dir, name)
     if should_convert:
-        # Converts image into RGB format
-        new_img = Image.new("RGB", size=(image.height, image.width), color=(255, 255, 255))
         new_img.paste(image, (0, 0), mask=image)
         image = new_img
     image.save(filename)
     # Generate text description of the image
     description = image_to_text(Image.open(filename))[0]['generated_text']
-    # Clean up file
     os.remove(filename)
     return description
 def get_math_response(image_description, user_question):
-    '''
-    Generates a math related response based upon image description and user's question
-    :param image_description:
-    :param user_question:
-    '''
     prompt = ""
     if image_description:
-        prompt += f"Image Description :{image_description}\n"
     if user_question:
-        prompt += f"User question :{user_question}\n"
     else:
-        return "Please provide a valid description."
-    # Generate the response using the math_reasoning pipeline
     response = math_reasoning(prompt, max_length=512)[0]['generated_text']
     return response
 # Combined chatbot logic
-def math_chatbot(image, sketchpad, question, state):
-    current_tab_index = state['tab_index']
     image_description = None
     # Handle image upload
     if current_tab_index == 0:
         if image is not None:
-            image_description = process_image(image, )
     # Handle sketchpad input
     elif current_tab_index == 1:
-        if sketchpad and sketchpad['composite']:
-            image_description = process_image(sketchpad['composite'], should_convert=True)
     return get_math_response(image_description, question)
-def tabs_select(e: gr.SelectData, _state):
-    _state["tab_index"] = e.index
 css = """
 #qwen-md .katex-display { display: inline; }
 #qwen-md .katex-display>.katex { display: inline; }
 #qwen-md .katex-display>.katex>.katex-html { display: inline; }
 """
 with gr.Blocks(css=css) as demo:
     gr.HTML("""\
 <p align="center"><img src="https://huggingface.co/front/assets/huggingface_logo.svg" style="height: 60px"/><p>"""
@@ -93,23 +95,24 @@ with gr.Blocks(css=css) as demo:
 <center><font size=3>This demo uses Hugging Face models for OCR and mathematical reasoning. You can input images or text-based questions.</center>"""
             )
     state = gr.State({"tab_index": 0})
     with gr.Row():
         with gr.Column():
             with gr.Tabs() as input_tabs:
                 with gr.Tab("Upload"):
-                    input_image = gr.Image(type="pil", label="Upload"),
                 with gr.Tab("Sketch"):
-                    input_sketchpad = gr.Sketchpad(type="pil", label="Sketch", layers=False)
             input_tabs.select(fn=tabs_select, inputs=[state])
-            input_text = gr.Textbox(label="input your question")
             with gr.Row():
                 with gr.Column():
-                    clear_btn = gr.ClearButton(
-                        [*input_image, input_sketchpad, input_text])
                 with gr.Column():
                     submit_btn = gr.Button("Submit", variant="primary")
         with gr.Column():
-            output_md = gr.Markdown(label="answer",
                                     latex_delimiters=[{
                                         "left": "\\(",
                                         "right": "\\)",
@@ -118,30 +121,18 @@ with gr.Blocks(css=css) as demo:
                                         "left": "\\begin\{equation\}",
                                         "right": "\\end\{equation\}",
                                         "display": True
-                                    }, {
-                                        "left": "\\begin\{align\}",
-                                        "right": "\\end\{align\}",
-                                        "display": True
-                                    }, {
-                                        "left": "\\begin\{alignat\}",
-                                        "right": "\\end\{alignat\}",
-                                        "display": True
-                                    }, {
-                                        "left": "\\begin\{gather\}",
-                                        "right": "\\end\{gather\}",
-                                        "display": True
-                                    }, {
-                                        "left": "\\begin\{CD\}",
-                                        "right": "\\end\{CD\}",
-                                        "display": True
                                     }, {
                                         "left": "\\[",
                                         "right": "\\]",
                                         "display": True
                                     }],
                                     elem_id="qwen-md")
-        submit_btn.click(
-            fn=math_chatbot,
-            inputs=[*input_image, input_sketchpad, input_text, state],
-            outputs=output_md)
-demo.launch()

 from pathlib import Path
 import secrets
+# Initialize Hugging Face pipelines
 image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
 math_reasoning = pipeline("text2text-generation", model="google/flan-t5-large")
+# Helper function to process image
 def process_image(image, should_convert=False):
+    """
+    Saves an uploaded image and extracts math-related descriptions using the image-to-text pipeline.
+    """
+    # Create temporary directory for saving images
+    uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str(
+        Path(tempfile.gettempdir()) / "gradio"
+    )
     os.makedirs(uploaded_file_dir, exist_ok=True)
     # Save the uploaded image as a temporary file
     name = f"tmp{secrets.token_hex(8)}.jpg"
     filename = os.path.join(uploaded_file_dir, name)
     if should_convert:
+        # Convert image to RGB if required
+        new_img = Image.new('RGB', size=(image.width, image.height), color=(255, 255, 255))
         new_img.paste(image, (0, 0), mask=image)
         image = new_img
     image.save(filename)
     # Generate text description of the image
     description = image_to_text(Image.open(filename))[0]['generated_text']
+    # Clean up temporary file
     os.remove(filename)
     return description
+# Function to handle math reasoning based on question and image description
 def get_math_response(image_description, user_question):
+    """
+    Generates a math-related response using the image description and user question.
+    """
     prompt = ""
     if image_description:
+        prompt += f"Image description: {image_description}\n"
     if user_question:
+        prompt += f"User question: {user_question}\n"
     else:
+        return "Please provide a valid question."
+    # Generate a math-related response using text2text generation
     response = math_reasoning(prompt, max_length=512)[0]['generated_text']
     return response
 # Combined chatbot logic
+def math_chat_bot(image, sketchpad, question, state):
+    current_tab_index = state["tab_index"]
     image_description = None
     # Handle image upload
     if current_tab_index == 0:
         if image is not None:
+            image_description = process_image(image)
     # Handle sketchpad input
     elif current_tab_index == 1:
+        if sketchpad and sketchpad["composite"]:
+            image_description = process_image(sketchpad["composite"], should_convert=True)
+    # Get the math reasoning response
     return get_math_response(image_description, question)
+# CSS for formatting LaTeX
 css = """
 #qwen-md .katex-display { display: inline; }
 #qwen-md .katex-display>.katex { display: inline; }
 #qwen-md .katex-display>.katex>.katex-html { display: inline; }
 """
+# Tab selection callback
+def tabs_select(e: gr.SelectData, _state):
+    _state["tab_index"] = e.index
+# Gradio interface
 with gr.Blocks(css=css) as demo:
     gr.HTML("""\
 <p align="center"><img src="https://huggingface.co/front/assets/huggingface_logo.svg" style="height: 60px"/><p>"""
 <center><font size=3>This demo uses Hugging Face models for OCR and mathematical reasoning. You can input images or text-based questions.</center>"""
             )
     state = gr.State({"tab_index": 0})
     with gr.Row():
         with gr.Column():
             with gr.Tabs() as input_tabs:
                 with gr.Tab("Upload"):
+                    input_image = gr.Image(type="pil", label="Upload")
                 with gr.Tab("Sketch"):
+                    input_sketchpad = gr.Sketchpad(label="Sketch", layers=False)
             input_tabs.select(fn=tabs_select, inputs=[state])
+            input_text = gr.Textbox(label="Input your question")
             with gr.Row():
                 with gr.Column():
+                    clear_btn = gr.ClearButton([input_image, input_sketchpad, input_text])
                 with gr.Column():
                     submit_btn = gr.Button("Submit", variant="primary")
         with gr.Column():
+            output_md = gr.Markdown(label="Answer",
                                     latex_delimiters=[{
                                         "left": "\\(",
                                         "right": "\\)",
                                         "left": "\\begin\{equation\}",
                                         "right": "\\end\{equation\}",
                                         "display": True
                                     }, {
                                         "left": "\\[",
                                         "right": "\\]",
                                         "display": True
                                     }],
                                     elem_id="qwen-md")
+    submit_btn.click(
+        fn=math_chat_bot,
+        inputs=[input_image, input_sketchpad, input_text, state],
+        outputs=output_md
+    )
+# Launch Gradio app
+demo.launch()