Spaces:
Runtime error
Runtime error
File size: 4,014 Bytes
639e3fa 795b781 639e3fa 96c0816 639e3fa 795b781 639e3fa 795b781 32e3531 795b781 96c0816 32e3531 96c0816 32e3531 639e3fa 795b781 639e3fa 96c0816 639e3fa 96c0816 639e3fa 96c0816 639e3fa 795b781 639e3fa 795b781 639e3fa 795b781 639e3fa 795b781 639e3fa 795b781 639e3fa 795b781 639e3fa 795b781 639e3fa 795b781 639e3fa 795b781 639e3fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import streamlit as st
from PIL import Image
import torch
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, AutoModelForImageToText
from colpali_engine.models import ColPali, ColPaliProcessor
from huggingface_hub import login
import os
# Set device for computation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Get Hugging Face token from environment variables
hf_token = os.getenv('HF_TOKEN')
# Log in to Hugging Face Hub (this will authenticate globally)
login(token=hf_token)
# Load the processor and image-to-text model directly
try:
processor_img_to_text = AutoProcessor.from_pretrained("google/paligemma-3b-mix-448")
model_img_to_text = AutoModelForImageToText.from_pretrained("google/paligemma-3b-mix-448").to(device)
except Exception as e:
st.error(f"Error loading image-to-text model: {e}")
st.stop()
# Load ColPali model with Hugging Face token
try:
model_colpali = ColPali.from_pretrained("vidore/colpali-v1.2", torch_dtype=torch.bfloat16).to(device)
processor_colpali = ColPaliProcessor.from_pretrained("google/paligemma-3b-mix-448")
except Exception as e:
st.error(f"Error loading ColPali model or processor: {e}")
st.stop()
# Load Qwen model
try:
model_qwen = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct").to(device)
processor_qwen = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
except Exception as e:
st.error(f"Error loading Qwen model or processor: {e}")
st.stop()
# Streamlit UI
st.title("OCR and Document Search Web Application")
st.write("Upload an image containing text in both Hindi and English for OCR processing and keyword search.")
# File uploader for the image
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
try:
image = Image.open(uploaded_file)
st.image(image, caption='Uploaded Image.', use_column_width=True)
st.write("")
# Use the image-to-text model to extract text from the image
inputs_img_to_text = processor_img_to_text(images=image, return_tensors="pt").to(device)
with torch.no_grad():
generated_ids_img_to_text = model_img_to_text.generate(**inputs_img_to_text, max_new_tokens=128)
output_text_img_to_text = processor_img_to_text.batch_decode(generated_ids_img_to_text, skip_special_tokens=True, clean_up_tokenization_spaces=True)
st.write("Extracted Text from Image:")
st.write(output_text_img_to_text)
# Prepare input for Qwen model for image description
conversation = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": "Describe this image."}]}]
text_prompt = processor_qwen.apply_chat_template(conversation, add_generation_prompt=True)
inputs_qwen = processor_qwen(text=[text_prompt], images=[image], padding=True, return_tensors="pt").to(device)
# Generate response with Qwen model
with torch.no_grad():
output_ids_qwen = model_qwen.generate(**inputs_qwen, max_new_tokens=128)
generated_ids_qwen = [output_ids_qwen[len(input_ids):] for input_ids, output_ids_qwen in zip(inputs_qwen.input_ids, output_ids_qwen)]
output_text_qwen = processor_qwen.batch_decode(generated_ids_qwen, skip_special_tokens=True, clean_up_tokenization_spaces=True)
st.write("Qwen Model Description:")
st.write(output_text_qwen)
# Keyword search in the extracted text
keyword = st.text_input("Enter a keyword to search in the extracted text:")
if keyword:
if keyword.lower() in output_text_img_to_text[0].lower():
st.write(f"Keyword '{keyword}' found in the text.")
else:
st.write(f"Keyword '{keyword}' not found in the text.")
except Exception as e:
st.error(f"An error occurred: {e}")
if __name__ == "__main__":
st.write("Deploying the web application...")
|