import os import sys import torch import pandas as pd import streamlit as st from datetime import datetime from transformers import ( T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments, DataCollatorForSeq2Seq ) from torch.utils.data import Dataset import random # Ensure reproducibility torch.manual_seed(42) random.seed(42) # Environment setup os.environ['KMP_DUPLICATE_LIB_OK']='TRUE' class TravelDataset(Dataset): def __init__(self, data, tokenizer, max_length=512): """ Initialize the dataset for travel planning Parameters: - data: DataFrame containing travel planning data - tokenizer: Tokenizer for encoding input and output - max_length: Maximum sequence length """ self.tokenizer = tokenizer self.data = data self.max_length = max_length # Print dataset information print(f"Dataset loaded with {len(data)} samples") print("Columns:", list(data.columns)) def __len__(self): return len(self.data) def __getitem__(self, idx): """ Prepare an individual training sample Returns a dictionary with input_ids, attention_mask, and labels """ row = self.data.iloc[idx] # Prepare input text input_text = self.format_input_text(row) # Prepare target text (travel plan) target_text = row['target'] # Tokenize inputs input_encodings = self.tokenizer( input_text, max_length=self.max_length, padding='max_length', truncation=True, return_tensors='pt' ) # Tokenize targets target_encodings = self.tokenizer( target_text, max_length=self.max_length, padding='max_length', truncation=True, return_tensors='pt' ) return { 'input_ids': input_encodings['input_ids'].squeeze(), 'attention_mask': input_encodings['attention_mask'].squeeze(), 'labels': target_encodings['input_ids'].squeeze() } @staticmethod def format_input_text(row): """ Format input text for the model This method creates a prompt that the model will use to generate a travel plan """ # Format the input text based on available columns destination = row.get('destination', 'Unknown') days = row.get('days', 3) budget = row.get('budget', 'Moderate') interests = row.get('interests', 'Culture, Food') return f"Plan a trip to {destination} for {days} days with a {budget} budget. Include activities related to: {interests}" def load_dataset(): """ Load the travel planning dataset from HuggingFace Returns: - pandas DataFrame with the dataset """ try: # Load dataset from CSV data = pd.read_csv("hf://datasets/osunlp/TravelPlanner/train.csv") # Basic data validation required_columns = ['destination', 'days', 'budget', 'interests', 'target'] for col in required_columns: if col not in data.columns: raise ValueError(f"Missing required column: {col}") # Print dataset info print("Dataset successfully loaded") print(f"Total samples: {len(data)}") print("Columns:", list(data.columns)) return data except Exception as e: print(f"Error loading dataset: {e}") sys.exit(1) def train_model(): """ Train the T5 model for travel planning Returns: - Trained model - Tokenizer """ try: # Load dataset data = load_dataset() # Initialize model and tokenizer print("Initializing T5 model and tokenizer...") tokenizer = T5Tokenizer.from_pretrained('t5-base', legacy=False) model = T5ForConditionalGeneration.from_pretrained('t5-base') # Split data into training and validation sets train_size = int(0.8 * len(data)) train_data = data[:train_size] val_data = data[train_size:] print(f"Training set size: {len(train_data)}") print(f"Validation set size: {len(val_data)}") # Create datasets train_dataset = TravelDataset(train_data, tokenizer) val_dataset = TravelDataset(val_data, tokenizer) # Training arguments training_args = TrainingArguments( output_dir=f"./travel_planner_model_{datetime.now().strftime('%Y%m%d_%H%M%S')}", num_train_epochs=3, per_device_train_batch_size=4, per_device_eval_batch_size=4, warmup_steps=500, weight_decay=0.01, logging_dir="./logs", logging_steps=10, evaluation_strategy="steps", eval_steps=50, save_steps=100, load_best_model_at_end=True, ) # Data collator data_collator = DataCollatorForSeq2Seq( tokenizer=tokenizer, model=model, padding=True ) # Initialize trainer trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, eval_dataset=val_dataset, data_collator=data_collator, ) # Train the model print("Starting model training...") trainer.train() # Save the model and tokenizer model_path = "./trained_travel_planner" model.save_pretrained(model_path) tokenizer.save_pretrained(model_path) print("Model training completed and saved!") return model, tokenizer except Exception as e: print(f"Error during model training: {str(e)}") return None, None def generate_travel_plan(destination, days, interests, budget, model, tokenizer): """ Generate a travel plan using the trained model Parameters: - destination: Travel destination - days: Trip duration - interests: User's interests - budget: Trip budget level - model: Trained T5 model - tokenizer: Model tokenizer Returns: - Generated travel plan """ try: # Format input prompt prompt = f"Plan a trip to {destination} for {days} days with a {budget} budget. Include activities related to: {', '.join(interests)}" # Tokenize input inputs = tokenizer( prompt, return_tensors="pt", max_length=512, padding="max_length", truncation=True ) # Move to GPU if available if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items()} model = model.cuda() # Generate output outputs = model.generate( **inputs, max_length=512, num_beams=4, no_repeat_ngram_size=3, num_return_sequences=1 ) # Decode and return the travel plan travel_plan = tokenizer.decode(outputs[0], skip_special_tokens=True) return travel_plan except Exception as e: print(f"Error generating travel plan: {e}") return "Could not generate travel plan." def main(): st.set_page_config( page_title="AI Travel Planner", page_icon="✈️", layout="wide" ) st.title("✈️ AI Travel Planner") st.markdown("### Plan your perfect trip with AI assistance!") # Add training button in sidebar only with st.sidebar: st.header("Model Management") if st.button("Retrain Model"): with st.spinner("Training new model... This will take a while..."): model, tokenizer = train_model() if model is not None: st.session_state['model'] = model st.session_state['tokenizer'] = tokenizer st.success("Model training completed!") # Add model information st.markdown("### Model Information") if 'model' in st.session_state: st.success("✓ Model loaded") st.info(""" This model was trained on travel plans for: - Destinations from HuggingFace dataset - Flexible days duration - Multiple budget levels - Various interest combinations """) # Load or train model if 'model' not in st.session_state: with st.spinner("Loading AI model... Please wait..."): model, tokenizer = train_model() # Changed from load_or_train_model if model is None or tokenizer is None: st.error("Failed to load/train the AI model. Please try again.") return st.session_state.model = model st.session_state.tokenizer = tokenizer # Create two columns for input form col1, col2 = st.columns([2, 1]) with col1: # Input form in a card-like container with st.container(): st.markdown("### 🎯 Plan Your Trip") # Destination and Duration row dest_col, days_col = st.columns(2) with dest_col: destination = st.text_input( "🌍 Destination", placeholder="e.g., Paris, Tokyo, New York...", help="Enter the city you want to visit" ) with days_col: days = st.slider( "📅 Number of days", min_value=1, max_value=14, value=3, help="Select the duration of your trip" ) # Budget and Interests row budget_col, interests_col = st.columns(2) with budget_col: budget = st.selectbox( "💰 Budget Level", ["Budget", "Moderate", "Luxury"], help="Select your preferred budget level" ) with interests_col: interests = st.multiselect( "🎯 Interests", ["Culture", "History", "Food", "Nature", "Shopping", "Adventure", "Relaxation", "Art", "Museums"], ["Culture", "Food"], help="Select up to three interests to personalize your plan" ) with col2: # Tips and information st.markdown("### 💡 Travel Tips") st.info(""" - Choose up to 3 interests for best results - Consider your travel season - Budget levels affect activity suggestions - Plans are customizable after generation """) # Generate button centered col1, col2, col3 = st.columns([1, 2, 1]) with col2: generate_button = st.button( "🎨 Generate Travel Plan", type="primary", use_container_width=True ) if generate_button: if not destination: st.error("Please enter a destination!") return if not interests: st.error("Please select at least one interest!") return if len(interests) > 3: st.warning("For best results, please select up to 3 interests.") with st.spinner("🤖 Creating your personalized travel plan..."): travel_plan = generate_travel_plan( destination, days, interests, budget, st.session_state.model, st.session_state.tokenizer ) # Create an expander for the success message with trip overview with st.expander("✨ Your travel plan is ready! Click to see trip overview", expanded=True): col1, col2, col3 = st.columns(3) with col1: st.metric("Destination", destination) with col2: if days == 1: st.metric("Duration", f"{days} day") else: st.metric("Duration", f"{days} days") with col3: st.metric("Budget", budget) st.write("**Selected Interests:**", ", ".join(interests)) # Display the plan in tabs with improved styling plan_tab, summary_tab = st.tabs(["📋 Detailed Itinerary", "ℹ️ Trip Summary"]) with plan_tab: # Add a container for better spacing with st.container(): # Add trip title st.markdown(f"## 🌍 {days}-Day Trip to {destination}") st.markdown("---") # Display the formatted plan st.markdown(travel_plan) # Add export options in a nice container with st.container(): st.markdown("---") col1, col2 = st.columns([1, 4]) with col1: st.download_button( label="📥 Download Plan", data=travel_plan, file_name=f"travel_plan_{destination.lower().replace(' ', '_')}.md", mime="text/markdown", use_container_width=True ) with summary_tab: # Create three columns for summary information with cards with st.container(): st.markdown("## Trip Overview") sum_col1, sum_col2, sum_col3 = st.columns(3) with sum_col1: with st.container(): st.markdown("### 📍 Destination Details") st.markdown(f"**Location:** {destination}") if days == 1: st.markdown(f"**Duration:** {days} day") else: st.markdown(f"**Duration:** {days} days") st.markdown(f"**Budget Level:** {budget}") with sum_col2: with st.container(): st.markdown("### 🎯 Trip Focus") st.markdown("**Selected Interests:**") for interest in interests: st.markdown(f"- {interest}") with sum_col3: with st.container(): st.markdown("### ⚠️ Travel Tips") st.info( "• Verify opening hours\n" "• Check current prices\n" "• Confirm availability\n" "• Consider seasonal factors" ) if __name__ == "__main__": main()