import joblib
import numpy as np
import pandas as pd
import gradio as gr

# Load the saved models
encoder_file = "ordinal_encoder.joblib"
scaler_file = "min_max_scaler.joblib"
joblib_file = "best_random_forest_model_s.joblib"

loaded_encoder = joblib.load(encoder_file)
loaded_scaler = joblib.load(scaler_file)
loaded_model = joblib.load(joblib_file)

# Define the prediction function
def predict_from_csv(file_path):
    df = pd.read_csv(file_path)

    # Convert date columns to datetime
    df['appt_slot_date'] = pd.to_datetime(df['appt_slot_date'])
    df['appt_date'] = pd.to_datetime(df['appt_date'])

    # Calculate the days_difference
    df['days_difference'] = (df['appt_slot_date'] - df['appt_date']).dt.days

    input_columns = ['facilitytype', 'appt_type', 'service_name', 'facility_name', 'directorate_name', 'year', 'region', 'days_difference']
    test_data = df[input_columns]
    test_data = test_data.astype(str)

    test_data_encoded = loaded_encoder.transform(test_data)
    test_data_scaled = loaded_scaler.transform(test_data_encoded)

    predictions = loaded_model.predict(test_data_scaled)

    df['Prediction'] = predictions

    df['Prediction'] = np.where(predictions == 0, "No Show", "Show")
    print(df['Prediction'].value_counts())

    output_file = "predictions.csv"
    df.to_csv(output_file, index=False, encoding='utf-8-sig')

    return output_file

# Define the Gradio interface
interface = gr.Interface(
    fn=predict_from_csv,
    inputs=gr.File(label='Upload CSV file', type='filepath', file_count='single'),
    outputs=gr.File(label='Download CSV file with predictions'),
    title="Appointment Status Prediction in Saudi Arabian Health Care System",
    description="Upload a CSV file with the required columns to get predictions."
)

# Launch the Gradio app
if __name__ == "__main__":
    interface.launch(debug=True)