|
|
|
import streamlit as st |
|
import pandas as pd |
|
from transformers import pipeline |
|
|
|
|
|
def load_data(file): |
|
try: |
|
df = pd.read_csv(file) |
|
return df |
|
except Exception as e: |
|
st.error(f"Error loading file: {e}") |
|
return None |
|
|
|
|
|
def generate_summary(df): |
|
summary = { |
|
'Column': df.columns, |
|
'Data Type': [str(df[col].dtype) for col in df.columns], |
|
'Non-Null Count': df.notnull().sum().values, |
|
'Unique Values': [df[col].nunique() for col in df.columns] |
|
} |
|
return pd.DataFrame(summary) |
|
|
|
|
|
def generate_insights(df): |
|
insights = [] |
|
|
|
if 'avg_training_score' in df.columns: |
|
avg_score = df['avg_training_score'].mean() |
|
insights.append(f"The average training score is {avg_score:.2f}. Consider additional training for employees below this score.") |
|
|
|
if 'length_of_service' in df.columns: |
|
experienced_employees = len(df[df['length_of_service'] > 5]) |
|
insights.append(f"{experienced_employees} employees have more than 5 years of service. Consider them for leadership roles.") |
|
|
|
return insights |
|
|
|
|
|
def generate_query_summary(df, query): |
|
summarizer = pipeline("summarization") |
|
combined_text = " ".join(df.astype(str).apply(lambda x: " ".join(x), axis=1)) |
|
|
|
|
|
max_input_length = 1024 |
|
if len(combined_text) > max_input_length: |
|
combined_text = combined_text[:max_input_length] |
|
|
|
try: |
|
result = summarizer(query + " " + combined_text, max_length=100, min_length=30, do_sample=False) |
|
return result[0]['summary_text'] |
|
except IndexError: |
|
return "Error: Unable to generate a summary. Ensure the query and data are valid." |
|
except Exception as e: |
|
return f"Error during summarization: {e}" |
|
|
|
|
|
st.title("Employee Performance Dashboard") |
|
st.markdown("Upload your cleaned dataset to generate insights and retrieve summaries.") |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload CSV File", type="csv") |
|
|
|
if uploaded_file is not None: |
|
df = load_data(uploaded_file) |
|
|
|
if df is not None: |
|
st.markdown("### Dataset Preview") |
|
st.dataframe(df.head()) |
|
|
|
st.markdown("### Dataset Summary") |
|
summary = generate_summary(df) |
|
st.dataframe(summary) |
|
|
|
st.markdown("### Insights and Suggestions") |
|
insights = generate_insights(df) |
|
for insight in insights: |
|
st.write(f"- {insight}") |
|
|
|
st.markdown("### Query the Dataset") |
|
query = st.text_input("Enter your query:") |
|
if query: |
|
summary = generate_query_summary(df, query) |
|
st.markdown("#### Summary") |
|
st.write(summary) |
|
else: |
|
st.info("Please upload a CSV file.") |