ai-lover's picture
Update app.py
4c36940 verified
# app.py
import streamlit as st
import pandas as pd
from transformers import pipeline
# Function to load data
def load_data(file):
try:
df = pd.read_csv(file)
return df
except Exception as e:
st.error(f"Error loading file: {e}")
return None
# Function to generate dataset summary
def generate_summary(df):
summary = {
'Column': df.columns,
'Data Type': [str(df[col].dtype) for col in df.columns],
'Non-Null Count': df.notnull().sum().values,
'Unique Values': [df[col].nunique() for col in df.columns]
}
return pd.DataFrame(summary)
# Function to generate insights (basic examples)
def generate_insights(df):
insights = []
if 'avg_training_score' in df.columns:
avg_score = df['avg_training_score'].mean()
insights.append(f"The average training score is {avg_score:.2f}. Consider additional training for employees below this score.")
if 'length_of_service' in df.columns:
experienced_employees = len(df[df['length_of_service'] > 5])
insights.append(f"{experienced_employees} employees have more than 5 years of service. Consider them for leadership roles.")
return insights
# RAG setup using Hugging Face summarization
def generate_query_summary(df, query):
summarizer = pipeline("summarization")
combined_text = " ".join(df.astype(str).apply(lambda x: " ".join(x), axis=1))
# Truncate text to avoid token limit issues
max_input_length = 1024
if len(combined_text) > max_input_length:
combined_text = combined_text[:max_input_length]
try:
result = summarizer(query + " " + combined_text, max_length=100, min_length=30, do_sample=False)
return result[0]['summary_text']
except IndexError:
return "Error: Unable to generate a summary. Ensure the query and data are valid."
except Exception as e:
return f"Error during summarization: {e}"
# Streamlit app
st.title("Employee Performance Dashboard")
st.markdown("Upload your cleaned dataset to generate insights and retrieve summaries.")
# File upload
uploaded_file = st.file_uploader("Upload CSV File", type="csv")
if uploaded_file is not None:
df = load_data(uploaded_file)
if df is not None:
st.markdown("### Dataset Preview")
st.dataframe(df.head())
st.markdown("### Dataset Summary")
summary = generate_summary(df)
st.dataframe(summary)
st.markdown("### Insights and Suggestions")
insights = generate_insights(df)
for insight in insights:
st.write(f"- {insight}")
st.markdown("### Query the Dataset")
query = st.text_input("Enter your query:")
if query:
summary = generate_query_summary(df, query)
st.markdown("#### Summary")
st.write(summary)
else:
st.info("Please upload a CSV file.")