Spaces:

adil9858
/

caption_generator_ai

Sleeping

App Files Files Community

adil9858 commited on Dec 7, 2024

Commit

76c5b28

verified ·

1 Parent(s): c6425bb

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -71

app.py CHANGED Viewed

@@ -1,71 +1,69 @@
-import streamlit as st
-from transformers import AutoProcessor, AutoModelForCausalLM
-from PIL import Image
-import torch
-import os
-# Load the Florence model and processor
-@st.cache_resource
-def load_model():
-    model_id = 'microsoft/Florence-2-large'
-    model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, torch_dtype='auto').eval().cuda()
-    processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
-    return model, processor
-model, processor = load_model()
-# Function to run the model
-def run_example(task_prompt, image, text_input=None):
-    if text_input is None:
-        prompt = task_prompt
-    else:
-        prompt = task_prompt + text_input
-    inputs = processor(text=prompt, images=image, return_tensors="pt").to('cuda', torch.float16)
-    generated_ids = model.generate(
-        input_ids=inputs["input_ids"].cuda(),
-        pixel_values=inputs["pixel_values"].cuda(),
-        max_new_tokens=1024,
-        early_stopping=False,
-        do_sample=False,
-        num_beams=3,
-    )
-    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
-    parsed_answer = processor.post_process_generation(
-        generated_text,
-        task=task_prompt,
-        image_size=(image.width, image.height)
-    )
-    return parsed_answer
-# Streamlit UI
-st.title("Microsoft Florence Image Captioning")
-# File uploader
-uploaded_file = st.file_uploader("Upload an image (PNG or JPG)", type=["png", "jpg", "jpeg"])
-if uploaded_file is not None:
-    # Convert and display the image
-    image = Image.open(uploaded_file).convert("RGB")
-    st.image(image, caption="Uploaded Image", use_column_width=True)
-    # Generate captions
-    st.subheader("Generated Captions")
-    with st.spinner("Generating caption..."):
-        caption = run_example('<CAPTION>', image)
-        detailed_caption = run_example('<DETAILED_CAPTION>', image)
-        more_detailed_caption = run_example('<MORE_DETAILED_CAPTION>', image)
-    st.write("**Caption:**", caption)
-    st.write("**Detailed Caption:**", detailed_caption)
-    st.write("**More Detailed Caption:**", more_detailed_caption)
-    # Option to save the output
-    if st.button("Save Captions"):
-        output_path = "captions.txt"
-        with open(output_path, "w") as file:
-            file.write(f"Caption: {caption}\n")
-            file.write(f"Detailed Caption: {detailed_caption}\n")
-            file.write(f"More Detailed Caption: {more_detailed_caption}\n")
-        st.success(f"Captions saved to {output_path}!")

+import streamlit as st
+from transformers import AutoProcessor, AutoModelForCausalLM
+from PIL import Image
+import torch
+# Load the Florence model and processor
+@st.cache_resource
+def load_model():
+    model_id = 'microsoft/Florence-2-large'
+    model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, torch_dtype='auto').eval()
+    processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
+    return model, processor
+model, processor = load_model()
+# Function to run the model
+def run_example(task_prompt, image, text_input=None):
+    if text_input is None:
+        prompt = task_prompt
+    else:
+        prompt = task_prompt + text_input
+    inputs = processor(text=prompt, images=image, return_tensors="pt").to(torch.float32)  # Ensure CPU compatibility
+    generated_ids = model.generate(
+        input_ids=inputs["input_ids"],
+        pixel_values=inputs["pixel_values"],
+        max_new_tokens=1024,
+        early_stopping=False,
+        do_sample=False,
+        num_beams=3,
+    )
+    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+    parsed_answer = processor.post_process_generation(
+        generated_text,
+        task=task_prompt,
+        image_size=(image.width, image.height)
+    )
+    return parsed_answer
+# Streamlit UI
+st.title("Microsoft Florence Image Captioning (CPU)")
+# File uploader
+uploaded_file = st.file_uploader("Upload an image (PNG or JPG)", type=["png", "jpg", "jpeg"])
+if uploaded_file is not None:
+    # Convert and display the image
+    image = Image.open(uploaded_file).convert("RGB")
+    st.image(image, caption="Uploaded Image", use_column_width=True)
+    # Generate captions
+    st.subheader("Generated Captions")
+    with st.spinner("Generating caption..."):
+        caption = run_example('<CAPTION>', image)
+        detailed_caption = run_example('<DETAILED_CAPTION>', image)
+        more_detailed_caption = run_example('<MORE_DETAILED_CAPTION>', image)
+    st.write("**Caption:**", caption)
+    st.write("**Detailed Caption:**", detailed_caption)
+    st.write("**More Detailed Caption:**", more_detailed_caption)
+    # Option to save the output
+    if st.button("Save Captions"):
+        output_path = "captions.txt"
+        with open(output_path, "w") as file:
+            file.write(f"Caption: {caption}\n")
+            file.write(f"Detailed Caption: {detailed_caption}\n")
+            file.write(f"More Detailed Caption: {more_detailed_caption}\n")
+        st.success(f"Captions saved to {output_path}!")