Spaces:

ThisHumanA
/

Art-of-Visual-Storytelling

Sleeping

App Files Files Community

ThisHumanA commited on Dec 17, 2024

Commit

c106bd2

verified ·

1 Parent(s): d190243

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -97

app.py CHANGED Viewed

@@ -1,97 +1,105 @@
-import streamlit as st
-import gc
-from PIL import Image
-import torch
-from transformers import (
-    BlipProcessor,
-    BlipForConditionalGeneration,
-    DetrImageProcessor,
-    DetrForObjectDetection,
-)
-import google.generativeai as genai
-# Configure Generative AI
-genai.configure(api_key='AIzaSyB1wxTDQcB2YT_6l2nm4MrhAmCVPzfkHNU')
-gemini_model = genai.GenerativeModel("gemini-1.5-flash")
-# Load BLIP model and processor
-@st.cache_resource
-def load_blip():
-    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
-    return processor, model
-# Load DETR model and processor
-@st.cache_resource
-def load_detr():
-    processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
-    model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
-    return processor, model
-blip_processor, blip_model = load_blip()
-detr_processor, detr_model = load_detr()
-# Streamlit app
-st.title("Art Of Visual Storytelling")
-# Dropdown menu for genre selection
-genre = st.selectbox(
-    "Select the genre of the story:",
-    ["Fantasy", "Adventure", "Sci-Fi", "Romance", "Mystery", "Horror", "Comedy", "Drama"]
-)
-# Slider for specifying story length
-story_length = st.slider(
-    "Select the desired story length (number of words):",
-    min_value=50, max_value=1000, value=200, step=50
-)
-# Image upload and processing
-uploaded_image = st.file_uploader("Upload an Image", type=['png', 'jpg', 'jpeg'])
-if uploaded_image:
-    # Display the uploaded image
-    raw_image = Image.open(uploaded_image).convert('RGB')
-    st.image(raw_image, caption='Uploaded Image', use_column_width=True)
-    # Detect objects using DETR model
-    detr_inputs = detr_processor(images=raw_image, return_tensors="pt")
-    detr_model.eval()
-    with torch.no_grad():
-        detr_outputs = detr_model(**detr_inputs)
-    target_sizes = torch.tensor([raw_image.size[::-1]])  # (height, width)
-    results = detr_processor.post_process_object_detection(detr_outputs, target_sizes=target_sizes, threshold=0.9)[0]
-    # Extract object names
-    detected_objects = []
-    for score, label in zip(results["scores"], results["labels"]):
-        label_name = detr_model.config.id2label[label.item()]
-        detected_objects.append(label_name)
-    # Display the detected objects before generating the caption
-    st.subheader("Detected Objects:")
-    st.text(", ".join(set(detected_objects)))  # Show unique objects
-    # Generate caption using BLIP model
-    blip_inputs = blip_processor(raw_image, return_tensors="pt")
-    blip_outputs = blip_model.generate(**blip_inputs)
-    caption = blip_processor.decode(blip_outputs[0], skip_special_tokens=True)
-    # Display the generated caption after detected objects
-    st.subheader("Generated Caption:")
-    st.text(caption)
-    # Submit button to generate the story
-    if st.button("Generate Story"):
-        prompt = (
-            f"I have an image. The caption is '{caption}', and the detected objects are {', '.join(set(detected_objects))}. "
-            f"Write a {genre.lower()} story with approximately {story_length} words using elements of this caption and the detected objects."
-        )
-        response = gemini_model.generate_content(prompt)
-        st.subheader("Generated Story:")
-        st.text_area("Story Output", value=response.text, height=300)
-    # Cleanup memory
-    del raw_image, detr_inputs, detr_outputs, blip_inputs, blip_outputs, results
-    gc.collect()

+import streamlit as st
+import gc
+from PIL import Image
+import torch
+from transformers import (
+    BlipProcessor,
+    BlipForConditionalGeneration,
+    DetrImageProcessor,
+    DetrForObjectDetection,
+)
+import google.generativeai as genai
+# Configure Generative AI
+genai.configure(api_key='AIzaSyB1wxTDQcB2YT_6l2nm4MrhAmCVPzfkHNU')
+gemini_model = genai.GenerativeModel("gemini-1.5-flash")
+# Load BLIP model and processor
+@st.cache_resource
+def load_blip():
+    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+    return processor, model
+# Load DETR model and processor
+@st.cache_resource
+def load_detr():
+    processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
+    model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
+    return processor, model
+blip_processor, blip_model = load_blip()
+detr_processor, detr_model = load_detr()
+# Streamlit app
+st.title("Art Of Visual Storytelling")
+# Dropdown menu for genre selection
+genre = st.selectbox(
+    "Select the genre of the story:",
+    ["Fantasy", "Adventure", "Sci-Fi", "Romance", "Mystery", "Horror", "Comedy", "Drama"]
+)
+# Slider for specifying story length
+story_length = st.slider(
+    "Select the desired story length (number of words):",
+    min_value=50, max_value=1000, value=200, step=50
+)
+# Image upload and processing
+uploaded_image = st.file_uploader("Upload an Image", type=['png', 'jpg', 'jpeg'])
+if uploaded_image:
+    # Display the uploaded image
+    raw_image = Image.open(uploaded_image).convert('RGB')
+    st.image(raw_image, caption='Uploaded Image', use_column_width=True)
+    # Detect objects using DETR model
+    detr_inputs = detr_processor(images=raw_image, return_tensors="pt")
+    detr_model.eval()
+    with torch.no_grad():
+        detr_outputs = detr_model(**detr_inputs)
+    target_sizes = torch.tensor([raw_image.size[::-1]])  # (height, width)
+    results = detr_processor.post_process_object_detection(detr_outputs, target_sizes=target_sizes, threshold=0.9)[0]
+    # Extract object names
+    detected_objects = []
+    for score, label in zip(results["scores"], results["labels"]):
+        label_name = detr_model.config.id2label[label.item()]
+        detected_objects.append(label_name)
+    # Display the detected objects before generating the caption
+    st.subheader("Detected Objects:")
+    st.text(", ".join(set(detected_objects)))  # Show unique objects
+    # Generate caption using BLIP model
+    blip_inputs = blip_processor(raw_image, return_tensors="pt")
+    blip_outputs = blip_model.generate(**blip_inputs)
+    caption = blip_processor.decode(blip_outputs[0], skip_special_tokens=True)
+    # Display the generated caption after detected objects
+    st.subheader("Generated Caption:")
+    st.text(caption)
+    # Language selection box appears after the caption
+    st.subheader("Select Story Language:")
+    language = st.selectbox(
+        "Select the language of the story:",
+        ["English", "Hindi", "Bengali", "Tamil"]
+    )
+    # Submit button to generate the story
+    if st.button("Generate Story"):
+        prompt = (
+            f"I have an image. The caption is '{caption}', and the detected objects are {', '.join(set(detected_objects))}. "
+            f"Write a {genre.lower()} story in {language.lower()} with approximately {story_length} words using elements of this caption and the detected objects."
+        )
+        response = gemini_model.generate_content(prompt)
+        st.subheader("Generated Story:")
+        st.text_area("Story Output", value=response.text, height=300)
+    # Cleanup memory
+    del raw_image, detr_inputs, detr_outputs, blip_inputs, blip_outputs, results
+    gc.collect()