import streamlit as st from transformers import pipeline from huggingface_hub import InferenceClient from PIL import Image import os def setup_session(): if 'app_ready' not in st.session_state: print("Powering up the Dragon Radar...") st.session_state['app_ready'] = True st.session_state['hf_token'] = os.getenv("HUGGINGFACE_TOKEN") st.session_state['client'] = InferenceClient(api_key=st.session_state['hf_token']) def main(): setup_session() st.header("Anime & Friends Image Commentary") st.write("Let your favorite characters react to any image!") character = st.selectbox( "Select your commentator", ["goku", "elmo", "kirby", "pikachu"] ) uploaded_img = st.file_uploader("Share your image!") if uploaded_img is not None: image = Image.open(uploaded_img) st.image(image) caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") base_caption = caption_model(image)[0]['generated_text'] character_reactions = { "goku": f"Describe this image like you're Goku from Dragon Ball Z, mentioning power levels: {base_caption}", "elmo": f"Describe this image like you're Elmo from Sesame Street, speaking in third person: {base_caption}", "kirby": f"Describe this image like you're Kirby, being cute and mentioning food: {base_caption}", "pikachu": f"Describe this image like you're Pikachu, using 'pika' frequently: {base_caption}" } messages = [ { "role": "user", "content": character_reactions[character] } ] # Generate character response using Llama response_stream = st.session_state['client'].chat.completions.create( model="meta-llama/Llama-3.2-3B-Instruct", messages=messages, max_tokens=500, stream=True ) character_response = '' for chunk in response_stream: character_response += chunk.choices[0].delta.content st.write(character_response) if __name__ == '__main__': main()