Spaces:
Sleeping
Sleeping
File size: 6,713 Bytes
592c01e fdb9c67 592c01e 74de6d6 592c01e 74de6d6 592c01e 958f56e 592c01e fdb9c67 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEndpoint
from PIL import Image
import os
import secrets
from pathlib import Path
import tempfile
# Initialize the Hugging Face BLIP model
image_captioning_model = HuggingFaceEndpoint(
endpoint_url="https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base",
huggingfacehub_api_token=os.getenv("HUGGING_FACE_API"), # Ensure you set this in your environment
temperature=0.7,
max_new_tokens=1024,
)
math_llm=HuggingFaceEndpoint(
endpoint_url="https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-3B-Instruct",
huggingfacehub_api_token=os.getenv("HUGGING_FACE_API"), # Ensure you set this in your environment
temperature=0.7,
max_new_tokens=1024,)
# Function to process the image
def process_image(image, shouldConvert=False):
# Ensure temporary directory exists
uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str(
Path(tempfile.gettempdir()) / "gradio"
)
os.makedirs(uploaded_file_dir, exist_ok=True)
# Save the uploaded image
name = f"tmp{secrets.token_hex(20)}.jpg"
filename = os.path.join(uploaded_file_dir, name)
if shouldConvert:
# Convert image to RGB mode if it contains transparency
new_img = Image.new("RGB", size=(image.width, image.height), color=(255, 255, 255))
new_img.paste(image, (0, 0), mask=image)
image = new_img
image.save(filename)
# Define a PromptTemplate for text instruction
template = """
You are a helpful AI assistant.
Please describe the math-related content in this image, ensuring that any LaTeX formulas are correctly transcribed.
Non-mathematical details do not need to be described.
Image Path: {image}
"""
prompt_template = PromptTemplate(
input_variables=["image"], # Dynamically insert the image path
template=template
)
# Create the text instruction by rendering the prompt template
prompt = prompt_template.format(image=f"file://{filename}")
# Use the model with both the image and the generated prompt
with open(filename, "rb") as img_file:
response = image_captioning_model({
"inputs": {
"image": img_file,
"text": prompt
}
})
# Return the model's response
return response
def get_math_response(image_description, user_question):
template = """
You are a helpful AI assistant specialized in solving math reasoning problems.
Analyze the following question carefully and provide a step-by-step explanation along with the answer.
Image description : {image_description}
Question: {user_question}?
"""
prompt_template = PromptTemplate(
input_variables=["user_question","image_description"], # Define the placeholder(s) in the template
template=template
)
formatted_prompt = prompt_template.format(user_question=user_question, image_description=image_description)
# Pass the formatted prompt to the model
response = math_llm(formatted_prompt)
# Print the response
yield response
def math_chat_bot(image, sketchpad, question, state):
current_tab_index = state["tab_index"]
image_description = None
# Upload
if current_tab_index == 0:
if image is not None:
image_description = process_image(image)
# Sketch
elif current_tab_index == 1:
print(sketchpad)
if sketchpad and sketchpad["composite"]:
image_description = process_image(sketchpad["composite"], True)
yield from get_math_response(image_description, question)
css = """
#qwen-md .katex-display { display: inline; }
#qwen-md .katex-display>.katex { display: inline; }
#qwen-md .katex-display>.katex>.katex-html { display: inline; }
"""
def tabs_select(e: gr.SelectData, _state):
_state["tab_index"] = e.index
with gr.Blocks(css=css) as demo:
state = gr.State({"tab_index": 0})
with gr.Row():
with gr.Column():
with gr.Tabs() as input_tabs:
with gr.Tab("Upload"):
input_image = gr.Image(type="pil", label="Upload"),
with gr.Tab("Sketch"):
input_sketchpad = gr.Sketchpad(type="pil", label="Sketch", layers=False)
input_tabs.select(fn=tabs_select, inputs=[state])
input_text = gr.Textbox(label="input your question")
with gr.Row():
with gr.Column():
clear_btn = gr.ClearButton(
[*input_image, input_sketchpad, input_text])
with gr.Column():
submit_btn = gr.Button("Submit", variant="primary")
with gr.Column():
output_md = gr.Markdown(label="answer",
latex_delimiters=[{
"left": "\\(",
"right": "\\)",
"display": True
}, {
"left": "\\begin\{equation\}",
"right": "\\end\{equation\}",
"display": True
}, {
"left": "\\begin\{align\}",
"right": "\\end\{align\}",
"display": True
}, {
"left": "\\begin\{alignat\}",
"right": "\\end\{alignat\}",
"display": True
}, {
"left": "\\begin\{gather\}",
"right": "\\end\{gather\}",
"display": True
}, {
"left": "\\begin\{CD\}",
"right": "\\end\{CD\}",
"display": True
}, {
"left": "\\[",
"right": "\\]",
"display": True
}],
elem_id="qwen-md")
submit_btn.click(
fn=math_chat_bot,
inputs=[*input_image, input_sketchpad, input_text, state],
outputs=output_md)
demo.launch() |