import streamlit as st
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from transformers import pipeline
from PyPDF2 import PdfReader

# Helper function to load different file types
def load_file(uploaded_file):
    if uploaded_file.name.endswith('.csv'):
        return pd.read_csv(uploaded_file)
    elif uploaded_file.name.endswith('.xlsx'):
        return pd.read_excel(uploaded_file)
    elif uploaded_file.name.endswith('.pdf'):
        reader = PdfReader(uploaded_file)
        text = ''.join(page.extract_text() for page in reader.pages)
        # PDF parsing logic here (customized for table extraction)
        raise NotImplementedError("PDF parsing is not implemented.")
    else:
        raise ValueError("Unsupported file format. Please upload CSV, Excel, or PDF.")

# Train the model if it doesn't already exist
def train_model(data):
    data['Hour'] = pd.to_datetime(data['Timestamp']).dt.hour
    #data['Hour'] = pd.to_datetime(data['Timestamp']).dt.hour
    X = data[['Hour', 'Temperature', 'CloudCover']]
    #,'DATE_TIME','PLANT_ID','SOURCE_KEY','DC_POWER','AC_POWER','DAILY_YIELD','TOTAL_YIELD'
    y = data['SolarOutput']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = LinearRegression()
    model.fit(X_train, y_train)
    with open('solar_model.pkl', 'wb') as f:
        pickle.dump(model, f)
    return model

# Load the trained model
def load_model():
    try:
        with open('solar_model.pkl', 'rb') as f:
            return pickle.load(f)
    except FileNotFoundError:
        return None

# Initialize the text generation pipeline
generator = pipeline('text-generation', model='gpt2', device=-1)

# Streamlit app
st.title("Smart Home Energy Advisor")

# File uploader
uploaded_file = st.file_uploader("Upload your data file (CSV, Excel, or PDF)", type=['csv', 'xlsx', 'pdf'])
#uploaded_file = "/solar_data.csv"
if uploaded_file:
    # Load and preprocess data
    try:
        data = load_file(uploaded_file)
        st.write("Data Preview:", data.head())
    except Exception as e:
        st.error(f"Error loading file: {e}")
        data = None

    if data is not None:
        # Load or train the model
        model = load_model()
        if not model:
            st.warning("No pre-trained model found. Training a new model...")
            model = train_model(data)
            st.success("Model trained and saved successfully!")

        # Preprocess data
        data['Hour'] = pd.to_datetime(data['Timestamp']).dt.hour
        data['PredictedSolarOutput'] = model.predict(data[['Hour', 'Temperature', 'CloudCover']])

        # Determine the best hour for maximum solar energy
        best_hour = data.loc[data['PredictedSolarOutput'].idxmax(), 'Hour']

        # Generate advice
        advice_prompt = f"The best time to use your appliances is between {best_hour}:00 and {best_hour + 2}:00."
        advice = generator(advice_prompt, max_length=50)[0]['generated_text']

        # Display predictions and advice
        st.subheader("Predictions")
        st.write(data)
        st.subheader("Recommendation")
        st.write(advice)

# Chatbot feature
st.subheader("Chat with the Advisor")
user_query = st.text_input("Ask your question:")
if user_query:
    chatbot_response = generator(f"User asked: {user_query}. Response:", max_length=50)[0]['generated_text']
    st.write("Advisor Response:", chatbot_response)