Spaces:
Sleeping
Sleeping
File size: 5,995 Bytes
74de6d6 3415b9b 74de6d6 3415b9b 74de6d6 3415b9b 74de6d6 3415b9b 74de6d6 3415b9b 74de6d6 3415b9b 74de6d6 3415b9b 74de6d6 3415b9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import gradio as gr
import os
from transformers import pipeline
from PIL import Image
import tempfile
from pathlib import Path
import secrets
# Initialising huggingface pipelines
image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
math_reasoning = pipeline("text2text-generation", model="google/flan-t5-large")
# Helper function to process images
def process_image(image, should_convert=False):
'''
Saves an uploaded image and utilises image-to-text pipeline for math-related descriptions
:param image:
:param should_convert:
:return: pipeline's output
'''
# creating a temporary directory for saving images
uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str(Path(tempfile.gettempdir()) / "gradio")
os.makedirs(uploaded_file_dir, exist_ok=True)
# Save the uploaded image as a temporary file
name = f"tmp{secrets.token_hex(8)}.jpg"
filename = os.path.join(uploaded_file_dir, name)
if should_convert:
# Converts image into RGB format
new_img = Image.new("RGB", size=(image.height, image.width), color=(255, 255, 255))
new_img.paste(image, (0, 0), mask=image)
image = new_img
image.save(filename)
# Generate text description of the image
description = image_to_text(Image.open(filename))[0]['generated_text']
# Clean up file
os.remove(filename)
return description
def get_math_response(image_description, user_question):
'''
Generates a math related response based upon image description and user's question
:param image_description:
:param user_question:
'''
prompt = ""
if image_description:
prompt += f"Image Description :{image_description}\n"
if user_question:
prompt += f"User question :{user_question}\n"
else:
return "Please provide a valid description."
# Generate the response using the math_reasoning pipeline
response = math_reasoning(prompt, max_length=512)[0]['generated_text']
return response
# Combined chatbot logic
def math_chatbot(image, sketchpad, question, state):
current_tab_index = state['tab_index']
image_description = None
# Handle image upload
if current_tab_index == 0:
if image is not None:
image_description = process_image(image, )
# Handle sketchpad input
elif current_tab_index == 1:
if sketchpad and sketchpad['composite']:
image_description = process_image(sketchpad['composite'], should_convert=True)
return get_math_response(image_description, question)
def tabs_select(e: gr.SelectData, _state):
_state["tab_index"] = e.index
css = """
#qwen-md .katex-display { display: inline; }
#qwen-md .katex-display>.katex { display: inline; }
#qwen-md .katex-display>.katex>.katex-html { display: inline; }
"""
with gr.Blocks(css=css) as demo:
gr.HTML("""\
<p align="center"><img src="https://huggingface.co/front/assets/huggingface_logo.svg" style="height: 60px"/><p>"""
"""<center><font size=8>📖 Math Reasoning Chatbot</center>"""
"""\
<center><font size=3>This demo uses Hugging Face models for OCR and mathematical reasoning. You can input images or text-based questions.</center>"""
)
state = gr.State({"tab_index": 0})
with gr.Row():
with gr.Column():
with gr.Tabs() as input_tabs:
with gr.Tab("Upload"):
input_image = gr.Image(type="pil", label="Upload"),
with gr.Tab("Sketch"):
input_sketchpad = gr.Sketchpad(type="pil", label="Sketch", layers=False)
input_tabs.select(fn=tabs_select, inputs=[state])
input_text = gr.Textbox(label="input your question")
with gr.Row():
with gr.Column():
clear_btn = gr.ClearButton(
[*input_image, input_sketchpad, input_text])
with gr.Column():
submit_btn = gr.Button("Submit", variant="primary")
with gr.Column():
output_md = gr.Markdown(label="answer",
latex_delimiters=[{
"left": "\\(",
"right": "\\)",
"display": True
}, {
"left": "\\begin\{equation\}",
"right": "\\end\{equation\}",
"display": True
}, {
"left": "\\begin\{align\}",
"right": "\\end\{align\}",
"display": True
}, {
"left": "\\begin\{alignat\}",
"right": "\\end\{alignat\}",
"display": True
}, {
"left": "\\begin\{gather\}",
"right": "\\end\{gather\}",
"display": True
}, {
"left": "\\begin\{CD\}",
"right": "\\end\{CD\}",
"display": True
}, {
"left": "\\[",
"right": "\\]",
"display": True
}],
elem_id="qwen-md")
submit_btn.click(
fn=math_chat_bot,
inputs=[*input_image, input_sketchpad, input_text, state],
outputs=output_md)
demo.launch() |