Spaces:

awacke1
/

SFT-Tiny-Titans-Agentic-RAG-Party

Running

App Files Files Community

SFT-Tiny-Titans-Agentic-RAG-Party / app.py

awacke1

Create app.py

d0aa8c4 verified 4 months ago

raw

history blame

20.4 kB

	#!/usr/bin/env python3
	import os
	import shutil
	import glob
	import base64
	import streamlit as st
	import pandas as pd
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from torch.utils.data import Dataset, DataLoader
	import csv
	import time
	from dataclasses import dataclass
	from typing import Optional, Tuple
	import zipfile
	import math
	from PIL import Image
	import random
	import logging
	from datetime import datetime
	import pytz
	from diffusers import StableDiffusionPipeline # For diffusion models
	from urllib.parse import quote

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Page Configuration
	st.set_page_config(
	page_title="SFT Tiny Titans 🚀",
	page_icon="🤖",
	layout="wide",
	initial_sidebar_state="expanded",
	menu_items={
	'Get Help': 'https://huggingface.co/awacke1',
	'Report a bug': 'https://huggingface.co/spaces/awacke1',
	'About': "Tiny Titans: Small models, big dreams, and a sprinkle of chaos! 🌌"
	}
	)

	# Model Configuration Classes
	@dataclass
	class ModelConfig:
	name: str
	base_model: str
	size: str
	domain: Optional[str] = None
	model_type: str = "causal_lm"

	@property
	def model_path(self):
	return f"models/{self.name}"

	@dataclass
	class DiffusionConfig:
	name: str
	base_model: str
	size: str

	@property
	def model_path(self):
	return f"diffusion_models/{self.name}"

	# Datasets
	class SFTDataset(Dataset):
	def __init__(self, data, tokenizer, max_length=128):
	self.data = data
	self.tokenizer = tokenizer
	self.max_length = max_length

	def __len__(self):
	return len(self.data)

	def __getitem__(self, idx):
	prompt = self.data[idx]["prompt"]
	response = self.data[idx]["response"]
	full_text = f"{prompt} {response}"
	full_encoding = self.tokenizer(full_text, max_length=self.max_length, padding="max_length", truncation=True, return_tensors="pt")
	prompt_encoding = self.tokenizer(prompt, max_length=self.max_length, padding=False, truncation=True, return_tensors="pt")
	input_ids = full_encoding["input_ids"].squeeze()
	attention_mask = full_encoding["attention_mask"].squeeze()
	labels = input_ids.clone()
	prompt_len = prompt_encoding["input_ids"].shape[1]
	if prompt_len < self.max_length:
	labels[:prompt_len] = -100
	return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}

	class DiffusionDataset(Dataset):
	def __init__(self, images, texts):
	self.images = images
	self.texts = texts

	def __len__(self):
	return len(self.images)

	def __getitem__(self, idx):
	return {"image": self.images[idx], "text": self.texts[idx]}

	# Model Builder Classes
	class ModelBuilder:
	def __init__(self):
	self.config = None
	self.model = None
	self.tokenizer = None
	self.sft_data = None
	self.jokes = ["Why did the AI go to therapy? Too many layers to unpack! 😂", "Training complete! Time for a binary coffee break. ☕"]

	def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
	with st.spinner(f"Loading {model_path}... ⏳"):
	self.model = AutoModelForCausalLM.from_pretrained(model_path)
	self.tokenizer = AutoTokenizer.from_pretrained(model_path)
	if self.tokenizer.pad_token is None:
	self.tokenizer.pad_token = self.tokenizer.eos_token
	if config:
	self.config = config
	st.success(f"Model loaded! 🎉 {random.choice(self.jokes)}")
	return self

	def fine_tune_sft(self, csv_path: str, epochs: int = 3, batch_size: int = 4):
	self.sft_data = []
	with open(csv_path, "r") as f:
	reader = csv.DictReader(f)
	for row in reader:
	self.sft_data.append({"prompt": row["prompt"], "response": row["response"]})

	dataset = SFTDataset(self.sft_data, self.tokenizer)
	dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
	optimizer = torch.optim.AdamW(self.model.parameters(), lr=2e-5)

	self.model.train()
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	self.model.to(device)
	for epoch in range(epochs):
	with st.spinner(f"Training epoch {epoch + 1}/{epochs}... ⚙️"):
	total_loss = 0
	for batch in dataloader:
	optimizer.zero_grad()
	input_ids = batch["input_ids"].to(device)
	attention_mask = batch["attention_mask"].to(device)
	labels = batch["labels"].to(device)
	outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
	loss = outputs.loss
	loss.backward()
	optimizer.step()
	total_loss += loss.item()
	st.write(f"Epoch {epoch + 1} completed. Average loss: {total_loss / len(dataloader):.4f}")
	st.success(f"SFT Fine-tuning completed! 🎉 {random.choice(self.jokes)}")
	return self

	def save_model(self, path: str):
	with st.spinner("Saving model... 💾"):
	os.makedirs(os.path.dirname(path), exist_ok=True)
	self.model.save_pretrained(path)
	self.tokenizer.save_pretrained(path)
	st.success(f"Model saved at {path}! ✅")

	def evaluate(self, prompt: str, status_container=None):
	self.model.eval()
	if status_container:
	status_container.write("Preparing to evaluate... 🧠")
	try:
	with torch.no_grad():
	inputs = self.tokenizer(prompt, return_tensors="pt", max_length=128, truncation=True).to(self.model.device)
	outputs = self.model.generate(**inputs, max_new_tokens=50, do_sample=True, top_p=0.95, temperature=0.7)
	return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
	except Exception as e:
	if status_container:
	status_container.error(f"Oops! Something broke: {str(e)} 💥")
	return f"Error: {str(e)}"

	class DiffusionBuilder:
	def __init__(self):
	self.config = None
	self.pipeline = None

	def load_model(self, model_path: str, config: Optional[DiffusionConfig] = None):
	with st.spinner(f"Loading diffusion model {model_path}... ⏳"):
	self.pipeline = StableDiffusionPipeline.from_pretrained(model_path)
	self.pipeline.to("cuda" if torch.cuda.is_available() else "cpu")
	if config:
	self.config = config
	st.success(f"Diffusion model loaded! 🎨")
	return self

	def fine_tune_sft(self, images, texts, epochs=3):
	dataset = DiffusionDataset(images, texts)
	dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
	optimizer = torch.optim.AdamW(self.pipeline.unet.parameters(), lr=1e-5)

	self.pipeline.unet.train()
	for epoch in range(epochs):
	with st.spinner(f"Training diffusion epoch {epoch + 1}/{epochs}... ⚙️"):
	total_loss = 0
	for batch in dataloader:
	optimizer.zero_grad()
	image = batch["image"].to(self.pipeline.device)
	text = batch["text"]
	latents = self.pipeline.vae.encode(image).latent_dist.sample()
	noise = torch.randn_like(latents)
	timesteps = torch.randint(0, self.pipeline.scheduler.num_train_timesteps, (latents.shape[0],), device=latents.device)
	noisy_latents = self.pipeline.scheduler.add_noise(latents, noise, timesteps)
	text_embeddings = self.pipeline.text_encoder(self.pipeline.tokenizer(text, return_tensors="pt").input_ids.to(self.pipeline.device))[0]
	pred_noise = self.pipeline.unet(noisy_latents, timesteps, encoder_hidden_states=text_embeddings).sample
	loss = torch.nn.functional.mse_loss(pred_noise, noise)
	loss.backward()
	optimizer.step()
	total_loss += loss.item()
	st.write(f"Epoch {epoch + 1} completed. Average loss: {total_loss / len(dataloader):.4f}")
	st.success("Diffusion SFT Fine-tuning completed! 🎨")
	return self

	def save_model(self, path: str):
	with st.spinner("Saving diffusion model... 💾"):
	os.makedirs(os.path.dirname(path), exist_ok=True)
	self.pipeline.save_pretrained(path)
	st.success(f"Diffusion model saved at {path}! ✅")

	# Utility Functions
	def get_download_link(file_path, mime_type="text/plain", label="Download"):
	with open(file_path, 'rb') as f:
	data = f.read()
	b64 = base64.b64encode(data).decode()
	return f'<a href="data:{mime_type};base64,{b64}" download="{os.path.basename(file_path)}">{label} 📥</a>'

	def zip_directory(directory_path, zip_path):
	with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
	for root, _, files in os.walk(directory_path):
	for file in files:
	file_path = os.path.join(root, file)
	arcname = os.path.relpath(file_path, os.path.dirname(directory_path))
	zipf.write(file_path, arcname)

	def get_model_files(model_type="causal_lm"):
	path = "models/" if model_type == "causal_lm" else "diffusion_models/"
	return [d for d in glob.glob(path) if os.path.isdir(d)]

	def get_gallery_files(file_types):
	files = []
	for ext in file_types:
	files.extend(glob.glob(f"*.{ext}"))
	return sorted(files)

	def generate_filename(text_line):
	central = pytz.timezone('US/Central')
	timestamp = datetime.now(central).strftime("%Y%m%d_%I%M%S_%p")
	safe_text = ''.join(c if c.isalnum() else '_' for c in text_line[:50])
	return f"{timestamp}_{safe_text}.png"

	def display_search_links(query):
	search_urls = {
	"ArXiv": f"https://arxiv.org/search/?query={quote(query)}",
	"Wikipedia": f"https://en.wikipedia.org/wiki/{quote(query)}",
	"Google": f"https://www.google.com/search?q={quote(query)}",
	"YouTube": f"https://www.youtube.com/results?search_query={quote(query)}"
	}
	links_md = ' '.join([f"[{name}]({url})" for name, url in search_urls.items()])
	return links_md

	# Agent Class
	class PartyPlannerAgent:
	def __init__(self, model, tokenizer):
	self.model = model
	self.tokenizer = tokenizer
	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	self.model.to(self.device)

	def generate(self, prompt: str) -> str:
	self.model.eval()
	with torch.no_grad():
	inputs = self.tokenizer(prompt, return_tensors="pt", max_length=128, truncation=True).to(self.device)
	outputs = self.model.generate(**inputs, max_new_tokens=100, do_sample=True, top_p=0.95, temperature=0.7)
	return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

	def plan_party(self, task: str) -> pd.DataFrame:
	search_result = "Latest trends for 2025: Gold-plated Batman statues, VR superhero battles."
	prompt = f"Given this context: '{search_result}'\n{task}"
	plan_text = self.generate(prompt)
	st.markdown(f"Search Links: {display_search_links('superhero party trends')}", unsafe_allow_html=True)

	locations = {"Wayne Manor": (42.3601, -71.0589), "New York": (40.7128, -74.0060), "Los Angeles": (34.0522, -118.2437), "London": (51.5074, -0.1278)}
	wayne_coords = locations["Wayne Manor"]
	travel_times = {loc: calculate_cargo_travel_time(coords, wayne_coords) for loc, coords in locations.items() if loc != "Wayne Manor"}

	data = [
	{"Location": "New York", "Travel Time (hrs)": travel_times["New York"], "Luxury Idea": "Gold-plated Batman statues"},
	{"Location": "Los Angeles", "Travel Time (hrs)": travel_times["Los Angeles"], "Luxury Idea": "VR superhero battles"},
	{"Location": "London", "Travel Time (hrs)": travel_times["London"], "Luxury Idea": "Live stunt shows"},
	{"Location": "Wayne Manor", "Travel Time (hrs)": 0.0, "Luxury Idea": "Holographic displays"}
	]
	return pd.DataFrame(data)

	def calculate_cargo_travel_time(origin_coords: Tuple[float, float], destination_coords: Tuple[float, float], cruising_speed_kmh: float = 750.0) -> float:
	def to_radians(degrees: float) -> float:
	return degrees * (math.pi / 180)
	lat1, lon1 = map(to_radians, origin_coords)
	lat2, lon2 = map(to_radians, destination_coords)
	EARTH_RADIUS_KM = 6371.0
	dlon = lon2 - lon1
	dlat = lat2 - lat1
	a = (math.sin(dlat / 2) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2)
	c = 2 * math.asin(math.sqrt(a))
	distance = EARTH_RADIUS_KM * c
	actual_distance = distance * 1.1
	flight_time = (actual_distance / cruising_speed_kmh) + 1.0
	return round(flight_time, 2)

	# Main App
	st.title("SFT Tiny Titans 🚀 (Small but Mighty!)")

	# Sidebar Galleries
	st.sidebar.header("Galleries 🎨")
	for gallery_type, file_types in [
	("Image Gallery 📸", ["png", "jpg", "jpeg"]),
	("Video Gallery 🎥", ["mp4"]),
	("Audio Gallery 🎶", ["mp3"])
	]:
	st.sidebar.subheader(gallery_type)
	files = get_gallery_files(file_types)
	if files:
	cols_num = st.sidebar.slider(f"{gallery_type} Columns", 1, 5, 3, key=f"{gallery_type}_cols")
	cols = st.sidebar.columns(cols_num)
	for idx, file in enumerate(files[:cols_num * 2]):
	with cols[idx % cols_num]:
	if "Image" in gallery_type:
	st.image(Image.open(file), caption=file, use_column_width=True)
	elif "Video" in gallery_type:
	st.video(file)
	elif "Audio" in gallery_type:
	st.audio(file)

	st.sidebar.subheader("Model Management 🗂️")
	model_type = st.sidebar.selectbox("Model Type", ["Causal LM", "Diffusion"])
	model_dirs = get_model_files("causal_lm" if model_type == "Causal LM" else "diffusion")
	selected_model = st.sidebar.selectbox("Select Saved Model", ["None"] + model_dirs)
	if selected_model != "None" and st.sidebar.button("Load Model 📂"):
	if 'builder' not in st.session_state:
	st.session_state['builder'] = ModelBuilder() if model_type == "Causal LM" else DiffusionBuilder()
	config = (ModelConfig if model_type == "Causal LM" else DiffusionConfig)(name=os.path.basename(selected_model), base_model="unknown", size="small")
	st.session_state['builder'].load_model(selected_model, config)
	st.session_state['model_loaded'] = True
	st.rerun()

	# Tabs
	tab1, tab2, tab3, tab4, tab5 = st.tabs(["Build Tiny Titan 🌱", "Fine-Tune Titan 🔧", "Test Titan 🧪", "Agentic RAG Party 🌐", "Diffusion SFT 🎨"])

	with tab1:
	st.header("Build Tiny Titan 🌱")
	model_type = st.selectbox("Model Type", ["Causal LM", "Diffusion"], key="build_type")
	if model_type == "Causal LM":
	base_model = st.selectbox("Select Tiny Model", ["HuggingFaceTB/SmolLM-135M", "HuggingFaceTB/SmolLM-360M", "Qwen/Qwen1.5-0.5B-Chat"])
	else:
	base_model = st.selectbox("Select Tiny Diffusion Model", ["stabilityai/stable-diffusion-2-1", "runwayml/stable-diffusion-v1-5", "CompVis/stable-diffusion-v1-4"])
	model_name = st.text_input("Model Name", f"tiny-titan-{int(time.time())}")
	if st.button("Download Model ⬇️"):
	config = (ModelConfig if model_type == "Causal LM" else DiffusionConfig)(name=model_name, base_model=base_model, size="small")
	builder = ModelBuilder() if model_type == "Causal LM" else DiffusionBuilder()
	builder.load_model(base_model, config)
	builder.save_model(config.model_path)
	st.session_state['builder'] = builder
	st.session_state['model_loaded'] = True
	st.rerun()

	with tab2:
	st.header("Fine-Tune Titan 🔧")
	if 'builder' not in st.session_state or not st.session_state.get('model_loaded', False):
	st.warning("Please build or load a Titan first! ⚠️")
	else:
	if isinstance(st.session_state['builder'], ModelBuilder):
	uploaded_csv = st.file_uploader("Upload CSV for SFT", type="csv")
	if uploaded_csv and st.button("Fine-Tune with Uploaded CSV 🔄"):
	csv_path = f"uploaded_sft_data_{int(time.time())}.csv"
	with open(csv_path, "wb") as f:
	f.write(uploaded_csv.read())
	new_model_name = f"{st.session_state['builder'].config.name}-sft-{int(time.time())}"
	new_config = ModelConfig(name=new_model_name, base_model=st.session_state['builder'].config.base_model, size="small")
	st.session_state['builder'].config = new_config
	st.session_state['builder'].fine_tune_sft(csv_path)
	st.session_state['builder'].save_model(new_config.model_path)
	zip_path = f"{new_config.model_path}.zip"
	zip_directory(new_config.model_path, zip_path)
	st.markdown(get_download_link(zip_path, "application/zip", "Download Fine-Tuned Titan"), unsafe_allow_html=True)

	with tab3:
	st.header("Test Titan 🧪")
	if 'builder' not in st.session_state or not st.session_state.get('model_loaded', False):
	st.warning("Please build or load a Titan first! ⚠️")
	else:
	if isinstance(st.session_state['builder'], ModelBuilder):
	test_prompt = st.text_area("Enter Test Prompt", "What is AI?")
	if st.button("Run Test ▶️"):
	result = st.session_state['builder'].evaluate(test_prompt)
	st.write(f"Generated Response: {result}")

	with tab4:
	st.header("Agentic RAG Party 🌐")
	if 'builder' not in st.session_state or not st.session_state.get('model_loaded', False) or not isinstance(st.session_state['builder'], ModelBuilder):
	st.warning("Please build or load a Causal LM Titan first! ⚠️")
	else:
	if st.button("Run Agentic RAG Demo 🎉"):
	agent = PartyPlannerAgent(model=st.session_state['builder'].model, tokenizer=st.session_state['builder'].tokenizer)
	task = "Plan a luxury superhero-themed party at Wayne Manor."
	plan_df = agent.plan_party(task)
	st.dataframe(plan_df)

	with tab5:
	st.header("Diffusion SFT 🎨")
	if 'builder' not in st.session_state or not st.session_state.get('model_loaded', False) or not isinstance(st.session_state['builder'], DiffusionBuilder):
	st.warning("Please build or load a Diffusion Titan first! ⚠️")
	else:
	uploaded_files = st.file_uploader("Upload Images/Videos", type=["png", "jpg", "jpeg", "mp4", "mp3"], accept_multiple_files=True)
	text_input = st.text_area("Enter Text (one line per image)", "Line 1\nLine 2\nLine 3")
	if uploaded_files and st.button("Fine-Tune Diffusion Model 🔄"):
	images = [Image.open(f) for f in uploaded_files if f.type.startswith("image")]
	texts = text_input.splitlines()
	if len(images) > len(texts):
	texts.extend([""] * (len(images) - len(texts)))
	elif len(texts) > len(images):
	texts = texts[:len(images)]

	st.session_state['builder'].fine_tune_sft(images, texts)
	new_model_name = f"{st.session_state['builder'].config.name}-sft-{int(time.time())}"
	new_config = DiffusionConfig(name=new_model_name, base_model=st.session_state['builder'].config.base_model, size="small")
	st.session_state['builder'].config = new_config
	st.session_state['builder'].save_model(new_config.model_path)

	for img, text in zip(images, texts):
	filename = generate_filename(text)
	img.save(filename)
	st.image(img, caption=filename)
	zip_path = f"{new_config.model_path}.zip"
	zip_directory(new_config.model_path, zip_path)
	st.markdown(get_download_link(zip_path, "application/zip", "Download Fine-Tuned Diffusion Model"), unsafe_allow_html=True)