Spaces:

abdullafahem
/

trip_planner

Sleeping

trip_planner / app.py

Abdulla Fahem

Add application file

7bdf2e1 2 months ago

15.7 kB

	import os
	import sys
	import torch
	import pandas as pd
	import streamlit as st
	from datetime import datetime
	from transformers import (
	T5ForConditionalGeneration,
	T5Tokenizer,
	Trainer,
	TrainingArguments,
	DataCollatorForSeq2Seq
	)
	from torch.utils.data import Dataset
	import random

	# Ensure reproducibility
	torch.manual_seed(42)
	random.seed(42)

	# Environment setup
	os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'

	class TravelDataset(Dataset):
	def __init__(self, data, tokenizer, max_length=512):
	"""
	Initialize the dataset for travel planning

	Parameters:
	- data: DataFrame containing travel planning data
	- tokenizer: Tokenizer for encoding input and output
	- max_length: Maximum sequence length
	"""
	self.tokenizer = tokenizer
	self.data = data
	self.max_length = max_length

	# Print dataset information
	print(f"Dataset loaded with {len(data)} samples")
	print("Columns:", list(data.columns))

	def __len__(self):
	return len(self.data)

	def __getitem__(self, idx):
	"""
	Prepare an individual training sample

	Returns a dictionary with input_ids, attention_mask, and labels
	"""
	row = self.data.iloc[idx]

	# Prepare input text
	input_text = self.format_input_text(row)

	# Prepare target text (travel plan)
	target_text = row['target']

	# Tokenize inputs
	input_encodings = self.tokenizer(
	input_text,
	max_length=self.max_length,
	padding='max_length',
	truncation=True,
	return_tensors='pt'
	)

	# Tokenize targets
	target_encodings = self.tokenizer(
	target_text,
	max_length=self.max_length,
	padding='max_length',
	truncation=True,
	return_tensors='pt'
	)

	return {
	'input_ids': input_encodings['input_ids'].squeeze(),
	'attention_mask': input_encodings['attention_mask'].squeeze(),
	'labels': target_encodings['input_ids'].squeeze()
	}

	@staticmethod
	def format_input_text(row):
	"""
	Format input text for the model

	This method creates a prompt that the model will use to generate a travel plan
	"""
	# Format the input text based on available columns
	destination = row.get('destination', 'Unknown')
	days = row.get('days', 3)
	budget = row.get('budget', 'Moderate')
	interests = row.get('interests', 'Culture, Food')

	return f"Plan a trip to {destination} for {days} days with a {budget} budget. Include activities related to: {interests}"

	def load_dataset():
	"""
	Load the travel planning dataset from HuggingFace

	Returns:
	- pandas DataFrame with the dataset
	"""
	try:
	# Load dataset from CSV
	data = pd.read_csv("hf://datasets/osunlp/TravelPlanner/train.csv")

	# Basic data validation
	required_columns = ['destination', 'days', 'budget', 'interests', 'target']
	for col in required_columns:
	if col not in data.columns:
	raise ValueError(f"Missing required column: {col}")

	# Print dataset info
	print("Dataset successfully loaded")
	print(f"Total samples: {len(data)}")
	print("Columns:", list(data.columns))

	return data
	except Exception as e:
	print(f"Error loading dataset: {e}")
	sys.exit(1)

	def train_model():
	"""
	Train the T5 model for travel planning

	Returns:
	- Trained model
	- Tokenizer
	"""
	try:
	# Load dataset
	data = load_dataset()

	# Initialize model and tokenizer
	print("Initializing T5 model and tokenizer...")
	tokenizer = T5Tokenizer.from_pretrained('t5-base', legacy=False)
	model = T5ForConditionalGeneration.from_pretrained('t5-base')

	# Split data into training and validation sets
	train_size = int(0.8 * len(data))
	train_data = data[:train_size]
	val_data = data[train_size:]

	print(f"Training set size: {len(train_data)}")
	print(f"Validation set size: {len(val_data)}")

	# Create datasets
	train_dataset = TravelDataset(train_data, tokenizer)
	val_dataset = TravelDataset(val_data, tokenizer)

	# Training arguments
	training_args = TrainingArguments(
	output_dir=f"./travel_planner_model_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
	num_train_epochs=3,
	per_device_train_batch_size=4,
	per_device_eval_batch_size=4,
	warmup_steps=500,
	weight_decay=0.01,
	logging_dir="./logs",
	logging_steps=10,
	evaluation_strategy="steps",
	eval_steps=50,
	save_steps=100,
	load_best_model_at_end=True,
	)

	# Data collator
	data_collator = DataCollatorForSeq2Seq(
	tokenizer=tokenizer,
	model=model,
	padding=True
	)

	# Initialize trainer
	trainer = Trainer(
	model=model,
	args=training_args,
	train_dataset=train_dataset,
	eval_dataset=val_dataset,
	data_collator=data_collator,
	)

	# Train the model
	print("Starting model training...")
	trainer.train()

	# Save the model and tokenizer
	model_path = "./trained_travel_planner"
	model.save_pretrained(model_path)
	tokenizer.save_pretrained(model_path)

	print("Model training completed and saved!")
	return model, tokenizer

	except Exception as e:
	print(f"Error during model training: {str(e)}")
	return None, None

	def generate_travel_plan(destination, days, interests, budget, model, tokenizer):
	"""
	Generate a travel plan using the trained model

	Parameters:
	- destination: Travel destination
	- days: Trip duration
	- interests: User's interests
	- budget: Trip budget level
	- model: Trained T5 model
	- tokenizer: Model tokenizer

	Returns:
	- Generated travel plan
	"""
	try:
	# Format input prompt
	prompt = f"Plan a trip to {destination} for {days} days with a {budget} budget. Include activities related to: {', '.join(interests)}"

	# Tokenize input
	inputs = tokenizer(
	prompt,
	return_tensors="pt",
	max_length=512,
	padding="max_length",
	truncation=True
	)

	# Move to GPU if available
	if torch.cuda.is_available():
	inputs = {k: v.cuda() for k, v in inputs.items()}
	model = model.cuda()

	# Generate output
	outputs = model.generate(
	**inputs,
	max_length=512,
	num_beams=4,
	no_repeat_ngram_size=3,
	num_return_sequences=1
	)

	# Decode and return the travel plan
	travel_plan = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return travel_plan

	except Exception as e:
	print(f"Error generating travel plan: {e}")
	return "Could not generate travel plan."

	def main():
	st.set_page_config(
	page_title="AI Travel Planner",
	page_icon="✈️",
	layout="wide"
	)

	st.title("✈️ AI Travel Planner")
	st.markdown("### Plan your perfect trip with AI assistance!")

	# Add training button in sidebar only
	with st.sidebar:
	st.header("Model Management")
	if st.button("Retrain Model"):
	with st.spinner("Training new model... This will take a while..."):
	model, tokenizer = train_model()
	if model is not None:
	st.session_state['model'] = model
	st.session_state['tokenizer'] = tokenizer
	st.success("Model training completed!")

	# Add model information
	st.markdown("### Model Information")
	if 'model' in st.session_state:
	st.success("✓ Model loaded")
	st.info("""
	This model was trained on travel plans for:
	- Destinations from HuggingFace dataset
	- Flexible days duration
	- Multiple budget levels
	- Various interest combinations
	""")

	# Load or train model
	if 'model' not in st.session_state:
	with st.spinner("Loading AI model... Please wait..."):
	model, tokenizer = train_model() # Changed from load_or_train_model
	if model is None or tokenizer is None:
	st.error("Failed to load/train the AI model. Please try again.")
	return
	st.session_state.model = model
	st.session_state.tokenizer = tokenizer

	# Create two columns for input form
	col1, col2 = st.columns([2, 1])

	with col1:
	# Input form in a card-like container
	with st.container():
	st.markdown("### 🎯 Plan Your Trip")

	# Destination and Duration row
	dest_col, days_col = st.columns(2)
	with dest_col:
	destination = st.text_input(
	"🌍 Destination",
	placeholder="e.g., Paris, Tokyo, New York...",
	help="Enter the city you want to visit"
	)

	with days_col:
	days = st.slider(
	"📅 Number of days",
	min_value=1,
	max_value=14,
	value=3,
	help="Select the duration of your trip"
	)

	# Budget and Interests row
	budget_col, interests_col = st.columns(2)
	with budget_col:
	budget = st.selectbox(
	"💰 Budget Level",
	["Budget", "Moderate", "Luxury"],
	help="Select your preferred budget level"
	)

	with interests_col:
	interests = st.multiselect(
	"🎯 Interests",
	["Culture", "History", "Food", "Nature", "Shopping",
	"Adventure", "Relaxation", "Art", "Museums"],
	["Culture", "Food"],
	help="Select up to three interests to personalize your plan"
	)

	with col2:
	# Tips and information
	st.markdown("### 💡 Travel Tips")
	st.info("""
	- Choose up to 3 interests for best results
	- Consider your travel season
	- Budget levels affect activity suggestions
	- Plans are customizable after generation
	""")

	# Generate button centered
	col1, col2, col3 = st.columns([1, 2, 1])
	with col2:
	generate_button = st.button(
	"🎨 Generate Travel Plan",
	type="primary",
	use_container_width=True
	)

	if generate_button:
	if not destination:
	st.error("Please enter a destination!")
	return

	if not interests:
	st.error("Please select at least one interest!")
	return

	if len(interests) > 3:
	st.warning("For best results, please select up to 3 interests.")

	with st.spinner("🤖 Creating your personalized travel plan..."):
	travel_plan = generate_travel_plan(
	destination,
	days,
	interests,
	budget,
	st.session_state.model,
	st.session_state.tokenizer
	)

	# Create an expander for the success message with trip overview
	with st.expander("✨ Your travel plan is ready! Click to see trip overview", expanded=True):
	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("Destination", destination)
	with col2:
	if days == 1:
	st.metric("Duration", f"{days} day")
	else:
	st.metric("Duration", f"{days} days")
	with col3:
	st.metric("Budget", budget)

	st.write("Selected Interests:", ", ".join(interests))

	# Display the plan in tabs with improved styling
	plan_tab, summary_tab = st.tabs(["📋 Detailed Itinerary", "ℹ️ Trip Summary"])

	with plan_tab:
	# Add a container for better spacing
	with st.container():
	# Add trip title
	st.markdown(f"## 🌍 {days}-Day Trip to {destination}")
	st.markdown("---")

	# Display the formatted plan
	st.markdown(travel_plan)

	# Add export options in a nice container
	with st.container():
	st.markdown("---")
	col1, col2 = st.columns([1, 4])
	with col1:
	st.download_button(
	label="📥 Download Plan",
	data=travel_plan,
	file_name=f"travel_plan_{destination.lower().replace(' ', '_')}.md",
	mime="text/markdown",
	use_container_width=True
	)

	with summary_tab:
	# Create three columns for summary information with cards
	with st.container():
	st.markdown("## Trip Overview")
	sum_col1, sum_col2, sum_col3 = st.columns(3)

	with sum_col1:
	with st.container():
	st.markdown("### 📍 Destination Details")
	st.markdown(f"Location: {destination}")
	if days == 1:
	st.markdown(f"Duration: {days} day")
	else:
	st.markdown(f"Duration: {days} days")
	st.markdown(f"Budget Level: {budget}")

	with sum_col2:
	with st.container():
	st.markdown("### 🎯 Trip Focus")
	st.markdown("Selected Interests:")
	for interest in interests:
	st.markdown(f"- {interest}")

	with sum_col3:
	with st.container():
	st.markdown("### ⚠️ Travel Tips")
	st.info(
	"• Verify opening hours\n"
	"• Check current prices\n"
	"• Confirm availability\n"
	"• Consider seasonal factors"
	)

	if __name__ == "__main__":
	main()