Spaces:

abdullafahem
/

trip_planner

Sleeping

File size: 15,713 Bytes

7bdf2e1

import os
import sys
import torch
import pandas as pd
import streamlit as st
from datetime import datetime
from transformers import (
    T5ForConditionalGeneration, 
    T5Tokenizer,
    Trainer, 
    TrainingArguments,
    DataCollatorForSeq2Seq
)
from torch.utils.data import Dataset
import random

# Ensure reproducibility
torch.manual_seed(42)
random.seed(42)

# Environment setup
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'

class TravelDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=512):
        """
        Initialize the dataset for travel planning
        
        Parameters:
        - data: DataFrame containing travel planning data
        - tokenizer: Tokenizer for encoding input and output
        - max_length: Maximum sequence length
        """
        self.tokenizer = tokenizer
        self.data = data
        self.max_length = max_length
        
        # Print dataset information
        print(f"Dataset loaded with {len(data)} samples")
        print("Columns:", list(data.columns))
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        """
        Prepare an individual training sample
        
        Returns a dictionary with input_ids, attention_mask, and labels
        """
        row = self.data.iloc[idx]
        
        # Prepare input text
        input_text = self.format_input_text(row)
        
        # Prepare target text (travel plan)
        target_text = row['target']
        
        # Tokenize inputs
        input_encodings = self.tokenizer(
            input_text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        
        # Tokenize targets
        target_encodings = self.tokenizer(
            target_text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': input_encodings['input_ids'].squeeze(),
            'attention_mask': input_encodings['attention_mask'].squeeze(),
            'labels': target_encodings['input_ids'].squeeze()
        }
    
    @staticmethod
    def format_input_text(row):
        """
        Format input text for the model
        
        This method creates a prompt that the model will use to generate a travel plan
        """
        # Format the input text based on available columns
        destination = row.get('destination', 'Unknown')
        days = row.get('days', 3)
        budget = row.get('budget', 'Moderate')
        interests = row.get('interests', 'Culture, Food')
        
        return f"Plan a trip to {destination} for {days} days with a {budget} budget. Include activities related to: {interests}"

def load_dataset():
    """
    Load the travel planning dataset from HuggingFace
    
    Returns:
    - pandas DataFrame with the dataset
    """
    try:
        # Load dataset from CSV
        data = pd.read_csv("hf://datasets/osunlp/TravelPlanner/train.csv")
        
        # Basic data validation
        required_columns = ['destination', 'days', 'budget', 'interests', 'target']
        for col in required_columns:
            if col not in data.columns:
                raise ValueError(f"Missing required column: {col}")
        
        # Print dataset info
        print("Dataset successfully loaded")
        print(f"Total samples: {len(data)}")
        print("Columns:", list(data.columns))
        
        return data
    except Exception as e:
        print(f"Error loading dataset: {e}")
        sys.exit(1)

def train_model():
    """
    Train the T5 model for travel planning
    
    Returns:
    - Trained model
    - Tokenizer
    """
    try:
        # Load dataset
        data = load_dataset()
        
        # Initialize model and tokenizer
        print("Initializing T5 model and tokenizer...")
        tokenizer = T5Tokenizer.from_pretrained('t5-base', legacy=False)
        model = T5ForConditionalGeneration.from_pretrained('t5-base')
        
        # Split data into training and validation sets
        train_size = int(0.8 * len(data))
        train_data = data[:train_size]
        val_data = data[train_size:]
        
        print(f"Training set size: {len(train_data)}")
        print(f"Validation set size: {len(val_data)}")
        
        # Create datasets
        train_dataset = TravelDataset(train_data, tokenizer)
        val_dataset = TravelDataset(val_data, tokenizer)
        
        # Training arguments
        training_args = TrainingArguments(
            output_dir=f"./travel_planner_model_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
            num_train_epochs=3,
            per_device_train_batch_size=4,
            per_device_eval_batch_size=4,
            warmup_steps=500,
            weight_decay=0.01,
            logging_dir="./logs",
            logging_steps=10,
            evaluation_strategy="steps",
            eval_steps=50,
            save_steps=100,
            load_best_model_at_end=True,
        )
        
        # Data collator
        data_collator = DataCollatorForSeq2Seq(
            tokenizer=tokenizer,
            model=model,
            padding=True
        )
        
        # Initialize trainer
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            data_collator=data_collator,
        )
        
        # Train the model
        print("Starting model training...")
        trainer.train()
        
        # Save the model and tokenizer
        model_path = "./trained_travel_planner"
        model.save_pretrained(model_path)
        tokenizer.save_pretrained(model_path)
        
        print("Model training completed and saved!")
        return model, tokenizer
        
    except Exception as e:
        print(f"Error during model training: {str(e)}")
        return None, None

def generate_travel_plan(destination, days, interests, budget, model, tokenizer):
    """
    Generate a travel plan using the trained model
    
    Parameters:
    - destination: Travel destination
    - days: Trip duration
    - interests: User's interests
    - budget: Trip budget level
    - model: Trained T5 model
    - tokenizer: Model tokenizer
    
    Returns:
    - Generated travel plan
    """
    try:
        # Format input prompt
        prompt = f"Plan a trip to {destination} for {days} days with a {budget} budget. Include activities related to: {', '.join(interests)}"
        
        # Tokenize input
        inputs = tokenizer(
            prompt,
            return_tensors="pt",
            max_length=512,
            padding="max_length",
            truncation=True
        )
        
        # Move to GPU if available
        if torch.cuda.is_available():
            inputs = {k: v.cuda() for k, v in inputs.items()}
            model = model.cuda()
        
        # Generate output
        outputs = model.generate(
            **inputs,
            max_length=512,
            num_beams=4,
            no_repeat_ngram_size=3,
            num_return_sequences=1
        )
        
        # Decode and return the travel plan
        travel_plan = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return travel_plan
    
    except Exception as e:
        print(f"Error generating travel plan: {e}")
        return "Could not generate travel plan."

def main():
    st.set_page_config(
        page_title="AI Travel Planner",
        page_icon="✈️",
        layout="wide"
    )
    
    st.title("✈️ AI Travel Planner")
    st.markdown("### Plan your perfect trip with AI assistance!")
    
    # Add training button in sidebar only
    with st.sidebar:
        st.header("Model Management")
        if st.button("Retrain Model"):
            with st.spinner("Training new model... This will take a while..."):
                model, tokenizer = train_model()
                if model is not None:
                    st.session_state['model'] = model
                    st.session_state['tokenizer'] = tokenizer
                    st.success("Model training completed!")
        
        # Add model information
        st.markdown("### Model Information")
        if 'model' in st.session_state:
            st.success("✓ Model loaded")
            st.info("""
            This model was trained on travel plans for:
            - Destinations from HuggingFace dataset
            - Flexible days duration
            - Multiple budget levels
            - Various interest combinations
            """)
    
        # Load or train model
        if 'model' not in st.session_state:
            with st.spinner("Loading AI model... Please wait..."):
                model, tokenizer = train_model()  # Changed from load_or_train_model
                if model is None or tokenizer is None:
                    st.error("Failed to load/train the AI model. Please try again.")
                    return
                st.session_state.model = model
                st.session_state.tokenizer = tokenizer
    
    # Create two columns for input form
    col1, col2 = st.columns([2, 1])
    
    with col1:
        # Input form in a card-like container
        with st.container():
            st.markdown("### 🎯 Plan Your Trip")
            
            # Destination and Duration row
            dest_col, days_col = st.columns(2)
            with dest_col:
                destination = st.text_input(
                    "🌍 Destination",
                    placeholder="e.g., Paris, Tokyo, New York...",
                    help="Enter the city you want to visit"
                )
            
            with days_col:
                days = st.slider(
                    "📅 Number of days",
                    min_value=1,
                    max_value=14,
                    value=3,
                    help="Select the duration of your trip"
                )
            
            # Budget and Interests row
            budget_col, interests_col = st.columns(2)
            with budget_col:
                budget = st.selectbox(
                    "💰 Budget Level",
                    ["Budget", "Moderate", "Luxury"],
                    help="Select your preferred budget level"
                )
            
            with interests_col:
                interests = st.multiselect(
                    "🎯 Interests",
                    ["Culture", "History", "Food", "Nature", "Shopping", 
                    "Adventure", "Relaxation", "Art", "Museums"],
                    ["Culture", "Food"],
                    help="Select up to three interests to personalize your plan"
                )
    
    with col2:
        # Tips and information
        st.markdown("### 💡 Travel Tips")
        st.info("""
        - Choose up to 3 interests for best results
        - Consider your travel season
        - Budget levels affect activity suggestions
        - Plans are customizable after generation
        """)
    
    # Generate button centered
    col1, col2, col3 = st.columns([1, 2, 1])
    with col2:
        generate_button = st.button(
            "🎨 Generate Travel Plan",
            type="primary",
            use_container_width=True
        )
    
    if generate_button:
        if not destination:
            st.error("Please enter a destination!")
            return
        
        if not interests:
            st.error("Please select at least one interest!")
            return
        
        if len(interests) > 3:
            st.warning("For best results, please select up to 3 interests.")
        
        with st.spinner("🤖 Creating your personalized travel plan..."):
            travel_plan = generate_travel_plan(
                destination,
                days,
                interests,
                budget,
                st.session_state.model,
                st.session_state.tokenizer
            )
            
            # Create an expander for the success message with trip overview
            with st.expander("✨ Your travel plan is ready! Click to see trip overview", expanded=True):
                col1, col2, col3 = st.columns(3)
                with col1:
                    st.metric("Destination", destination)
                with col2:
                    if days == 1:
                        st.metric("Duration", f"{days} day")
                    else:
                        st.metric("Duration", f"{days} days")
                with col3:
                    st.metric("Budget", budget)
                
                st.write("**Selected Interests:**", ", ".join(interests))
            
            # Display the plan in tabs with improved styling
            plan_tab, summary_tab = st.tabs(["📋 Detailed Itinerary", "ℹ️ Trip Summary"])
            
            with plan_tab:
                # Add a container for better spacing
                with st.container():
                    # Add trip title
                    st.markdown(f"## 🌍 {days}-Day Trip to {destination}")
                    st.markdown("---")
                    
                    # Display the formatted plan
                    st.markdown(travel_plan)
                    
                    # Add export options in a nice container
                    with st.container():
                        st.markdown("---")
                        col1, col2 = st.columns([1, 4])
                        with col1:
                            st.download_button(
                                label="📥 Download Plan",
                                data=travel_plan,
                                file_name=f"travel_plan_{destination.lower().replace(' ', '_')}.md",
                                mime="text/markdown",
                                use_container_width=True
                            )
            
            with summary_tab:
                # Create three columns for summary information with cards
                with st.container():
                    st.markdown("## Trip Overview")
                    sum_col1, sum_col2, sum_col3 = st.columns(3)
                    
                    with sum_col1:
                        with st.container():
                            st.markdown("### 📍 Destination Details")
                            st.markdown(f"**Location:** {destination}")
                            if days == 1:
                                st.markdown(f"**Duration:** {days} day")
                            else: 
                                st.markdown(f"**Duration:** {days} days")
                            st.markdown(f"**Budget Level:** {budget}")
                    
                    with sum_col2:
                        with st.container():
                            st.markdown("### 🎯 Trip Focus")
                            st.markdown("**Selected Interests:**")
                            for interest in interests:
                                st.markdown(f"- {interest}")
                    
                    with sum_col3:
                        with st.container():
                            st.markdown("### ⚠️ Travel Tips")
                            st.info(
                                "• Verify opening hours\n"
                                "• Check current prices\n"
                                "• Confirm availability\n"
                                "• Consider seasonal factors"
                            )

if __name__ == "__main__":
    main()