Spaces:

NursNurs
/

Meme-caption-generator

Sleeping

App Files Files Community

nursulu commited on Sep 16, 2024

Commit

ceec8fc

1 Parent(s): 6c696fb

add functions

Browse files

Files changed (7) hide show

app.py +85 -8
utils/__init__.py +0 -0
utils/__pycache__/__init__.cpython-311.pyc +0 -0
utils/__pycache__/image_utils.cpython-311.pyc +0 -0
utils/__pycache__/model_utils.cpython-311.pyc +0 -0
utils/image_utils.py +148 -0
utils/model_utils.py +79 -0

app.py CHANGED Viewed

@@ -1,17 +1,94 @@
 import streamlit as st
 from PIL import Image
 import io
-x = st.slider('Select a value')
-st.write(x, 'squared is', x * x)
 st.title("Image Upload and Processing App")
-# Upload the image
-uploaded_image = st.file_uploader("Upload an Image", type=["jpg", "png", "jpeg"])
-# Process and display if image is uploaded
-if uploaded_image is not None:
-    image = Image.open(uploaded_image)
-    st.image(image, caption="Uploaded Image", use_column_width=True)

 import streamlit as st
 from PIL import Image
+import base64
+import requests
+import json
+import os
+import re
+import torch
+from peft import PeftModel, PeftConfig
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import argparse
 import io
+from utils.model_utils import get_model_caption
+from utils.image_utils import overlay_caption
+@st.cache_resource
+def load_models():
+    base_model = AutoModelForCausalLM.from_pretrained("google/gemma-2b")
+    tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
+    model_angry = PeftModel.from_pretrained(base_model, "NursNurs/outputs_gemma2b_angry")
+    model_happy = PeftModel.from_pretrained(base_model, "NursNurs/outputs_gemma2b_happy")
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    base_model.to(device)
+    model_happy.to(device)
+    model_angry.to(device)
+    # Load the adapters for specific moods
+    base_model.load_adapter("NursNurs/outputs_gemma2b_happy", "happy")
+    base_model.load_adapter("NursNurs/outputs_gemma2b_angry", "angry")
+    return base_model, tokenizer, model_happy, model_angry, device
+# x = st.slider('Select a value')
+# st.write(x, 'squared is', x * x)
+def generate_meme_from_image(img_path, base_model, tokenizer, hf_token, output_dir, device='cuda'):
+  caption = get_model_caption(img_path, base_model, tokenizer, hf_token)
+  image = overlay_caption(caption, img_path, output_dir)
+  return image, caption
 st.title("Image Upload and Processing App")
+def main():
+    st.title("Meme Generator with Mood")
+    base_model, tokenizer, model_happy, model_angry, device = load_models()
+    # Input widget to upload an image
+    uploaded_image = st.file_uploader("Upload an Image", type=["jpg", "png", "jpeg"])
+    # Input widget to add Hugging Face token
+    hf_token = st.text_input("Enter your Hugging Face Token", type="password")
+    # Dropdown to select mood
+    # mood = st.selectbox("Select Mood", options=["happy", "angry"])
+    # Directory for saving the meme (optional, but you can let users set this if needed)
+    output_dir = "results"
+    if uploaded_image is not None and hf_token:
+        # Convert uploaded image to a PIL image
+        img = Image.open(uploaded_image)
+        # Generate meme when button is pressed
+        if st.button("Generate Meme"):
+            with st.spinner('Generating meme...'):
+                image, caption = generate_meme_from_image(img, base_model, tokenizer, hf_token, device)
+                # Display the output
+                st.image(image, caption=f"Generated Meme: {caption}")
+                # Optionally allow downloading the meme
+                buf = io.BytesIO()
+                image.save(buf, format="PNG")
+                byte_im = buf.getvalue()
+                st.download_button(
+                    label="Download Meme",
+                    data=byte_im,
+                    file_name="generated_meme.png",
+                    mime="image/png"
+                )
+if __name__ == '__main__':
+    main()
+# # Upload the image
+# uploaded_image = st.file_uploader("Upload an Image", type=["jpg", "png", "jpeg"])
+# # Process and display if image is uploaded
+# if uploaded_image is not None:
+#     image = Image.open(uploaded_image)
+#     st.image(image, caption="Uploaded Image", use_column_width=True)

utils/__init__.py ADDED Viewed

File without changes

utils/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (236 Bytes). View file

utils/__pycache__/image_utils.cpython-311.pyc ADDED Viewed

Binary file (6.98 kB). View file

utils/__pycache__/model_utils.cpython-311.pyc ADDED Viewed

Binary file (5.05 kB). View file

utils/image_utils.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import os
+import re
+from PIL import Image, ImageDraw, ImageFont
+import textwrap
+def get_unique_filename(filename):
+    """
+    Generate a unique filename by appending a number if a file with the same name already exists.
+    """
+    if not os.path.exists(filename):
+        return filename
+    base, ext = os.path.splitext(filename)
+    counter = 1
+    new_filename = f"{base}_{counter}{ext}"
+    while os.path.exists(new_filename):
+        counter += 1
+        new_filename = f"{base}_{counter}{ext}"
+    return new_filename
+def save_image_with_unique_name(image, path):
+    unique_path = get_unique_filename(path)
+    image.save(unique_path)
+    print(f"Image saved as: {unique_path}")
+def find_text_in_answer(text):
+    print("Full caption:", text)
+    text = text.split("Caption:")[1]
+    text = text.replace("\n", "")
+    text = text.replace("model", "")
+    # Remove everything that lookslike <>
+    text = re.sub(r'<[^>]*>', '', text)
+    # Remove non-alphanumeric characters (keeping spaces)
+    text = re.sub(r'[^a-zA-Z0-9\?\!\s]', '', text)
+    print("Filtered caption:", text)
+    if text:
+        return text
+    else:
+        return "Me when I couldn't parse the model's answer but I still want you to smile :)"
+def draw_text(draw, text, position, font, max_width, outline_color="black", text_color="white", outline_width=2):
+    """
+    Draw text on the image with an outline, splitting it into lines if necessary and returning the total height used by the text.
+    The text is horizontally centered in the specified max_width.
+    """
+    print("Adding the caption on the image...")
+    # Split the text into multiple lines based on the max width
+    lines = []
+    words = text.split()
+    line = ''
+    for word in words:
+        test_line = f'{line} {word}'.strip()
+        bbox = draw.textbbox((0, 0), test_line, font=font)
+        width = bbox[2] - bbox[0]  # Width of the text
+        if width <= max_width:
+            line = test_line
+        else:
+            if line:  # Avoid appending empty lines
+                lines.append(line)
+            line = word
+    if line:
+        lines.append(line)
+    y = position[1]
+    # Draw the text with an outline (black) first, centered horizontally
+    for line in lines:
+        # Calculate the width of the line and adjust the x position to center it
+        bbox = draw.textbbox((0, 0), line, font=font)
+        line_width = bbox[2] - bbox[0]
+        x = (max_width - line_width) // 2 + position[0]
+        # Draw the outline by drawing the text multiple times around the original position
+        for offset_x in [-outline_width, 0, outline_width]:
+            for offset_y in [-outline_width, 0, outline_width]:
+                if offset_x != 0 or offset_y != 0:
+                    draw.text((x + offset_x, y + offset_y), line, font=font, fill=outline_color)
+        # Draw the main text (white) on top of the outline
+        draw.text((x, y), line, font=font, fill=text_color)
+        y += bbox[3] - bbox[1]  # Update y position based on line height
+    return y - position[1]  # Return the total height used by the text
+def calculate_text_height(caption, font, max_width):
+    """
+    Calculate the height of the text when drawn, given the caption, font, and maximum width.
+    """
+    image = Image.new('RGB', (max_width, 1))
+    draw = ImageDraw.Draw(image)
+    return draw_text(draw, caption, (0, 0), font, max_width)
+def add_caption(image_path, caption, output_path, top_margin=10, bottom_margin=10, max_caption_length=10, min_distance_from_bottom_mm=10):
+    image = Image.open(image_path)
+    draw = ImageDraw.Draw(image)
+    width, height = image.size
+    # Convert mm to pixels (assuming 96 DPI)
+    dpi = 96
+    min_distance_from_bottom_px = min_distance_from_bottom_mm * dpi / 25.4
+    # Split the caption into two parts if it is too long
+    if len(caption.split()) > max_caption_length:
+        font_size=20
+        total_len = len(caption.split())
+        mid = int(total_len / 2)
+        top_caption = caption.split()[:mid]
+        bottom_caption = caption.split()[mid:]
+        top_caption = " ".join(top_caption)
+        bottom_caption = " ".join(bottom_caption)
+    else:
+        top_caption = ""
+        bottom_caption = caption
+        font_size=30
+    # Load a font
+    font = ImageFont.truetype(r"fonts/Anton/Anton-Regular.ttf", font_size)
+    # Top caption
+    top_caption_position = (width // 10, top_margin)
+    draw_text(draw, top_caption, top_caption_position, font, width - 2 * (width // 10))
+    # Bottom caption
+    if bottom_caption:  # Draw bottom caption only if it's not empty
+        # Calculate the height of the bottom caption
+        bottom_caption_height = calculate_text_height(bottom_caption, font, width - 2 * (width // 10))
+        bottom_caption_position = (width // 10, height - min_distance_from_bottom_px - bottom_caption_height)
+        draw_text(draw, bottom_caption, bottom_caption_position, font, width - 2 * (width // 10))
+    save_image_with_unique_name(image, output_path)
+    return image
+def overlay_caption(text, img_path, output_dir):
+  img_name = img_path.split("/")[-1]
+  text = find_text_in_answer(text)
+  text = text.strip(".")
+  image = add_caption(img_path, text, output_dir+"/"+img_name)
+  return image

utils/model_utils.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import base64
+import requests
+import json
+import pandas as pd
+import os
+from tqdm import tqdm
+import re
+import torch
+def query_clip(data, hf_token):
+    API_URL = "https://api-inference.huggingface.co/models/openai/clip-vit-base-patch32"
+    headers = {"Authorization": f"Bearer {hf_token}"}
+    with open(data["image_path"], "rb") as f:
+        img = f.read()
+    payload={
+		"parameters": data["parameters"],
+		"inputs": base64.b64encode(img).decode("utf-8")
+	}
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+def get_sentiment(img_path, hf_token):
+    print("Getting the sentiment of the image...")
+    output = query_clip({
+        "image_path": img_path,
+        "parameters": {"candidate_labels": ["angry", "happy"]},
+    }, hf_token)
+    try:
+        print("Sentiment:", output[0]['label'])
+        return output[0]['label']
+    except:
+        print(output)
+        print("If the model is loading, try again in a minute. If you've reached a query limit (300 per hour), try within the next hour.")
+def query_blip(filename, hf_token):
+    API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
+    headers = {"Authorization": f"Bearer {hf_token}"}
+    with open(filename, "rb") as f:
+        file = f.read()
+    response = requests.post(API_URL, headers=headers, data=file)
+    return response.json()
+def get_description(img_path, hf_token):
+    print("Getting the context of the image...")
+    output = query_blip(img_path, hf_token)
+    try:
+        print("Context:", output[0]['generated_text'])
+        return output[0]['generated_text']
+    except:
+        print(output)
+        print("The model is not available right now due to query limits. Try running again now or within the next hour")
+def get_model_caption(img_path, base_model, tokenizer, hf_token, device='cuda'):
+    sentiment = get_sentiment(img_path, hf_token)
+    description = get_description(img_path, hf_token)
+    prompt_template = """
+    Below is an instruction that describes a task. Write a response that appropriately completes the request.\\n\\n
+    You are given a topic. Your task is to generate a meme caption based on the topic. Only output the meme caption and nothing more.
+    Topic: {query}
+    <end_of_turn>\\n<start_of_turn>model Caption:
+    """
+    prompt = prompt_template.format(query=description)
+    print("Generating captions...")
+    encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
+    model_inputs = encodeds.to(device)
+    base_model.set_adapter(sentiment)
+    base_model.to(device)
+    generated_ids = base_model.generate(**model_inputs, max_new_tokens=20, do_sample=True, pad_token_id=tokenizer.eos_token_id)
+    decoded = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
+    return (decoded)