Spaces:
Sleeping
Sleeping
import os | |
import sys | |
import torch | |
import pandas as pd | |
import streamlit as st | |
from datetime import datetime | |
from transformers import ( | |
T5ForConditionalGeneration, | |
T5Tokenizer, | |
Trainer, | |
TrainingArguments, | |
DataCollatorForSeq2Seq | |
) | |
from torch.utils.data import Dataset | |
import random | |
# Ensure reproducibility | |
torch.manual_seed(42) | |
random.seed(42) | |
# Environment setup | |
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE' | |
class TravelDataset(Dataset): | |
def __init__(self, data, tokenizer, max_length=512): | |
""" | |
Initialize the dataset for travel planning | |
Parameters: | |
- data: DataFrame containing travel planning data | |
- tokenizer: Tokenizer for encoding input and output | |
- max_length: Maximum sequence length | |
""" | |
self.tokenizer = tokenizer | |
self.data = data | |
self.max_length = max_length | |
# Print dataset information | |
print(f"Dataset loaded with {len(data)} samples") | |
print("Columns:", list(data.columns)) | |
def __len__(self): | |
return len(self.data) | |
def __getitem__(self, idx): | |
""" | |
Prepare an individual training sample | |
Returns a dictionary with input_ids, attention_mask, and labels | |
""" | |
row = self.data.iloc[idx] | |
# Prepare input text | |
input_text = self.format_input_text(row) | |
# Prepare target text (travel plan) | |
target_text = row['target'] | |
# Tokenize inputs | |
input_encodings = self.tokenizer( | |
input_text, | |
max_length=self.max_length, | |
padding='max_length', | |
truncation=True, | |
return_tensors='pt' | |
) | |
# Tokenize targets | |
target_encodings = self.tokenizer( | |
target_text, | |
max_length=self.max_length, | |
padding='max_length', | |
truncation=True, | |
return_tensors='pt' | |
) | |
return { | |
'input_ids': input_encodings['input_ids'].squeeze(), | |
'attention_mask': input_encodings['attention_mask'].squeeze(), | |
'labels': target_encodings['input_ids'].squeeze() | |
} | |
def format_input_text(row): | |
""" | |
Format input text for the model | |
This method creates a prompt that the model will use to generate a travel plan | |
""" | |
# Format the input text based on available columns | |
destination = row.get('destination', 'Unknown') | |
days = row.get('days', 3) | |
budget = row.get('budget', 'Moderate') | |
interests = row.get('interests', 'Culture, Food') | |
return f"Plan a trip to {destination} for {days} days with a {budget} budget. Include activities related to: {interests}" | |
def load_dataset(): | |
""" | |
Load the travel planning dataset from HuggingFace | |
Returns: | |
- pandas DataFrame with the dataset | |
""" | |
try: | |
# Load dataset from CSV | |
data = pd.read_csv("hf://datasets/osunlp/TravelPlanner/train.csv") | |
# Basic data validation | |
required_columns = ['destination', 'days', 'budget', 'interests', 'target'] | |
for col in required_columns: | |
if col not in data.columns: | |
raise ValueError(f"Missing required column: {col}") | |
# Print dataset info | |
print("Dataset successfully loaded") | |
print(f"Total samples: {len(data)}") | |
print("Columns:", list(data.columns)) | |
return data | |
except Exception as e: | |
print(f"Error loading dataset: {e}") | |
sys.exit(1) | |
def train_model(): | |
""" | |
Train the T5 model for travel planning | |
Returns: | |
- Trained model | |
- Tokenizer | |
""" | |
try: | |
# Load dataset | |
data = load_dataset() | |
# Initialize model and tokenizer | |
print("Initializing T5 model and tokenizer...") | |
tokenizer = T5Tokenizer.from_pretrained('t5-base', legacy=False) | |
model = T5ForConditionalGeneration.from_pretrained('t5-base') | |
# Split data into training and validation sets | |
train_size = int(0.8 * len(data)) | |
train_data = data[:train_size] | |
val_data = data[train_size:] | |
print(f"Training set size: {len(train_data)}") | |
print(f"Validation set size: {len(val_data)}") | |
# Create datasets | |
train_dataset = TravelDataset(train_data, tokenizer) | |
val_dataset = TravelDataset(val_data, tokenizer) | |
# Training arguments | |
training_args = TrainingArguments( | |
output_dir=f"./travel_planner_model_{datetime.now().strftime('%Y%m%d_%H%M%S')}", | |
num_train_epochs=3, | |
per_device_train_batch_size=4, | |
per_device_eval_batch_size=4, | |
warmup_steps=500, | |
weight_decay=0.01, | |
logging_dir="./logs", | |
logging_steps=10, | |
evaluation_strategy="steps", | |
eval_steps=50, | |
save_steps=100, | |
load_best_model_at_end=True, | |
) | |
# Data collator | |
data_collator = DataCollatorForSeq2Seq( | |
tokenizer=tokenizer, | |
model=model, | |
padding=True | |
) | |
# Initialize trainer | |
trainer = Trainer( | |
model=model, | |
args=training_args, | |
train_dataset=train_dataset, | |
eval_dataset=val_dataset, | |
data_collator=data_collator, | |
) | |
# Train the model | |
print("Starting model training...") | |
trainer.train() | |
# Save the model and tokenizer | |
model_path = "./trained_travel_planner" | |
model.save_pretrained(model_path) | |
tokenizer.save_pretrained(model_path) | |
print("Model training completed and saved!") | |
return model, tokenizer | |
except Exception as e: | |
print(f"Error during model training: {str(e)}") | |
return None, None | |
def generate_travel_plan(destination, days, interests, budget, model, tokenizer): | |
""" | |
Generate a travel plan using the trained model | |
Parameters: | |
- destination: Travel destination | |
- days: Trip duration | |
- interests: User's interests | |
- budget: Trip budget level | |
- model: Trained T5 model | |
- tokenizer: Model tokenizer | |
Returns: | |
- Generated travel plan | |
""" | |
try: | |
# Format input prompt | |
prompt = f"Plan a trip to {destination} for {days} days with a {budget} budget. Include activities related to: {', '.join(interests)}" | |
# Tokenize input | |
inputs = tokenizer( | |
prompt, | |
return_tensors="pt", | |
max_length=512, | |
padding="max_length", | |
truncation=True | |
) | |
# Move to GPU if available | |
if torch.cuda.is_available(): | |
inputs = {k: v.cuda() for k, v in inputs.items()} | |
model = model.cuda() | |
# Generate output | |
outputs = model.generate( | |
**inputs, | |
max_length=512, | |
num_beams=4, | |
no_repeat_ngram_size=3, | |
num_return_sequences=1 | |
) | |
# Decode and return the travel plan | |
travel_plan = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return travel_plan | |
except Exception as e: | |
print(f"Error generating travel plan: {e}") | |
return "Could not generate travel plan." | |
def main(): | |
st.set_page_config( | |
page_title="AI Travel Planner", | |
page_icon="βοΈ", | |
layout="wide" | |
) | |
st.title("βοΈ AI Travel Planner") | |
st.markdown("### Plan your perfect trip with AI assistance!") | |
# Add training button in sidebar only | |
with st.sidebar: | |
st.header("Model Management") | |
if st.button("Retrain Model"): | |
with st.spinner("Training new model... This will take a while..."): | |
model, tokenizer = train_model() | |
if model is not None: | |
st.session_state['model'] = model | |
st.session_state['tokenizer'] = tokenizer | |
st.success("Model training completed!") | |
# Add model information | |
st.markdown("### Model Information") | |
if 'model' in st.session_state: | |
st.success("β Model loaded") | |
st.info(""" | |
This model was trained on travel plans for: | |
- Destinations from HuggingFace dataset | |
- Flexible days duration | |
- Multiple budget levels | |
- Various interest combinations | |
""") | |
# Load or train model | |
if 'model' not in st.session_state: | |
with st.spinner("Loading AI model... Please wait..."): | |
model, tokenizer = train_model() # Changed from load_or_train_model | |
if model is None or tokenizer is None: | |
st.error("Failed to load/train the AI model. Please try again.") | |
return | |
st.session_state.model = model | |
st.session_state.tokenizer = tokenizer | |
# Create two columns for input form | |
col1, col2 = st.columns([2, 1]) | |
with col1: | |
# Input form in a card-like container | |
with st.container(): | |
st.markdown("### π― Plan Your Trip") | |
# Destination and Duration row | |
dest_col, days_col = st.columns(2) | |
with dest_col: | |
destination = st.text_input( | |
"π Destination", | |
placeholder="e.g., Paris, Tokyo, New York...", | |
help="Enter the city you want to visit" | |
) | |
with days_col: | |
days = st.slider( | |
"π Number of days", | |
min_value=1, | |
max_value=14, | |
value=3, | |
help="Select the duration of your trip" | |
) | |
# Budget and Interests row | |
budget_col, interests_col = st.columns(2) | |
with budget_col: | |
budget = st.selectbox( | |
"π° Budget Level", | |
["Budget", "Moderate", "Luxury"], | |
help="Select your preferred budget level" | |
) | |
with interests_col: | |
interests = st.multiselect( | |
"π― Interests", | |
["Culture", "History", "Food", "Nature", "Shopping", | |
"Adventure", "Relaxation", "Art", "Museums"], | |
["Culture", "Food"], | |
help="Select up to three interests to personalize your plan" | |
) | |
with col2: | |
# Tips and information | |
st.markdown("### π‘ Travel Tips") | |
st.info(""" | |
- Choose up to 3 interests for best results | |
- Consider your travel season | |
- Budget levels affect activity suggestions | |
- Plans are customizable after generation | |
""") | |
# Generate button centered | |
col1, col2, col3 = st.columns([1, 2, 1]) | |
with col2: | |
generate_button = st.button( | |
"π¨ Generate Travel Plan", | |
type="primary", | |
use_container_width=True | |
) | |
if generate_button: | |
if not destination: | |
st.error("Please enter a destination!") | |
return | |
if not interests: | |
st.error("Please select at least one interest!") | |
return | |
if len(interests) > 3: | |
st.warning("For best results, please select up to 3 interests.") | |
with st.spinner("π€ Creating your personalized travel plan..."): | |
travel_plan = generate_travel_plan( | |
destination, | |
days, | |
interests, | |
budget, | |
st.session_state.model, | |
st.session_state.tokenizer | |
) | |
# Create an expander for the success message with trip overview | |
with st.expander("β¨ Your travel plan is ready! Click to see trip overview", expanded=True): | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric("Destination", destination) | |
with col2: | |
if days == 1: | |
st.metric("Duration", f"{days} day") | |
else: | |
st.metric("Duration", f"{days} days") | |
with col3: | |
st.metric("Budget", budget) | |
st.write("**Selected Interests:**", ", ".join(interests)) | |
# Display the plan in tabs with improved styling | |
plan_tab, summary_tab = st.tabs(["π Detailed Itinerary", "βΉοΈ Trip Summary"]) | |
with plan_tab: | |
# Add a container for better spacing | |
with st.container(): | |
# Add trip title | |
st.markdown(f"## π {days}-Day Trip to {destination}") | |
st.markdown("---") | |
# Display the formatted plan | |
st.markdown(travel_plan) | |
# Add export options in a nice container | |
with st.container(): | |
st.markdown("---") | |
col1, col2 = st.columns([1, 4]) | |
with col1: | |
st.download_button( | |
label="π₯ Download Plan", | |
data=travel_plan, | |
file_name=f"travel_plan_{destination.lower().replace(' ', '_')}.md", | |
mime="text/markdown", | |
use_container_width=True | |
) | |
with summary_tab: | |
# Create three columns for summary information with cards | |
with st.container(): | |
st.markdown("## Trip Overview") | |
sum_col1, sum_col2, sum_col3 = st.columns(3) | |
with sum_col1: | |
with st.container(): | |
st.markdown("### π Destination Details") | |
st.markdown(f"**Location:** {destination}") | |
if days == 1: | |
st.markdown(f"**Duration:** {days} day") | |
else: | |
st.markdown(f"**Duration:** {days} days") | |
st.markdown(f"**Budget Level:** {budget}") | |
with sum_col2: | |
with st.container(): | |
st.markdown("### π― Trip Focus") | |
st.markdown("**Selected Interests:**") | |
for interest in interests: | |
st.markdown(f"- {interest}") | |
with sum_col3: | |
with st.container(): | |
st.markdown("### β οΈ Travel Tips") | |
st.info( | |
"β’ Verify opening hours\n" | |
"β’ Check current prices\n" | |
"β’ Confirm availability\n" | |
"β’ Consider seasonal factors" | |
) | |
if __name__ == "__main__": | |
main() |