File size: 2,903 Bytes
f3db446
 
745810e
4be75f7
 
f3db446
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
745810e
f3db446
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c36940
 
 
 
 
 
 
 
 
 
 
 
 
f3db446
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c36940
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# app.py
import streamlit as st
import pandas as pd
from transformers import pipeline

# Function to load data
def load_data(file):
    try:
        df = pd.read_csv(file)
        return df
    except Exception as e:
        st.error(f"Error loading file: {e}")
        return None

# Function to generate dataset summary
def generate_summary(df):
    summary = {
        'Column': df.columns,
        'Data Type': [str(df[col].dtype) for col in df.columns],
        'Non-Null Count': df.notnull().sum().values,
        'Unique Values': [df[col].nunique() for col in df.columns]
    }
    return pd.DataFrame(summary)

# Function to generate insights (basic examples)
def generate_insights(df):
    insights = []

    if 'avg_training_score' in df.columns:
        avg_score = df['avg_training_score'].mean()
        insights.append(f"The average training score is {avg_score:.2f}. Consider additional training for employees below this score.")

    if 'length_of_service' in df.columns:
        experienced_employees = len(df[df['length_of_service'] > 5])
        insights.append(f"{experienced_employees} employees have more than 5 years of service. Consider them for leadership roles.")

    return insights

# RAG setup using Hugging Face summarization
def generate_query_summary(df, query):
    summarizer = pipeline("summarization")
    combined_text = " ".join(df.astype(str).apply(lambda x: " ".join(x), axis=1))

    # Truncate text to avoid token limit issues
    max_input_length = 1024
    if len(combined_text) > max_input_length:
        combined_text = combined_text[:max_input_length]

    try:
        result = summarizer(query + " " + combined_text, max_length=100, min_length=30, do_sample=False)
        return result[0]['summary_text']
    except IndexError:
        return "Error: Unable to generate a summary. Ensure the query and data are valid."
    except Exception as e:
        return f"Error during summarization: {e}"

# Streamlit app
st.title("Employee Performance Dashboard")
st.markdown("Upload your cleaned dataset to generate insights and retrieve summaries.")

# File upload
uploaded_file = st.file_uploader("Upload CSV File", type="csv")

if uploaded_file is not None:
    df = load_data(uploaded_file)

    if df is not None:
        st.markdown("### Dataset Preview")
        st.dataframe(df.head())

        st.markdown("### Dataset Summary")
        summary = generate_summary(df)
        st.dataframe(summary)

        st.markdown("### Insights and Suggestions")
        insights = generate_insights(df)
        for insight in insights:
            st.write(f"- {insight}")

        st.markdown("### Query the Dataset")
        query = st.text_input("Enter your query:")
        if query:
            summary = generate_query_summary(df, query)
            st.markdown("#### Summary")
            st.write(summary)
else:
    st.info("Please upload a CSV file.")