|
import streamlit as st |
|
from transformers import pipeline |
|
from huggingface_hub import InferenceClient |
|
from PIL import Image |
|
import os |
|
|
|
def setup_session(): |
|
if 'app_ready' not in st.session_state: |
|
print("Powering up the Dragon Radar...") |
|
st.session_state['app_ready'] = True |
|
st.session_state['hf_token'] = os.getenv("HUGGINGFACE_TOKEN") |
|
st.session_state['client'] = InferenceClient(api_key=st.session_state['hf_token']) |
|
|
|
def main(): |
|
setup_session() |
|
|
|
st.header("Anime & Friends Image Commentary") |
|
st.write("Let your favorite characters react to any image!") |
|
|
|
character = st.selectbox( |
|
"Select your commentator", |
|
["goku", "elmo", "kirby", "pikachu"] |
|
) |
|
|
|
uploaded_img = st.file_uploader("Share your image!") |
|
|
|
if uploaded_img is not None: |
|
image = Image.open(uploaded_img) |
|
st.image(image) |
|
|
|
|
|
caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") |
|
base_caption = caption_model(image)[0]['generated_text'] |
|
|
|
|
|
character_reactions = { |
|
"goku": f"Describe this image like you're Goku from Dragon Ball Z, mentioning power levels: {base_caption}", |
|
"elmo": f"Describe this image like you're Elmo from Sesame Street, speaking in third person: {base_caption}", |
|
"kirby": f"Describe this image like you're Kirby, being cute and mentioning food: {base_caption}", |
|
"pikachu": f"Describe this image like you're Pikachu, using 'pika' frequently: {base_caption}" |
|
} |
|
|
|
messages = [ |
|
{ |
|
"role": "user", |
|
"content": character_reactions[character] |
|
} |
|
] |
|
|
|
|
|
response_stream = st.session_state['client'].chat.completions.create( |
|
model="meta-llama/Llama-3.2-3B-Instruct", |
|
messages=messages, |
|
max_tokens=500, |
|
stream=True |
|
) |
|
|
|
character_response = '' |
|
for chunk in response_stream: |
|
character_response += chunk.choices[0].delta.content |
|
|
|
st.write(character_response) |
|
|
|
if __name__ == '__main__': |
|
main() |