Spaces:

ai-lover
/

employee-performance-dashboard

Sleeping

App Files Files Community

ai-lover commited on Dec 24, 2024

Commit

f3db446

verified ·

1 Parent(s): 4715ad1

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -90

app.py CHANGED Viewed

@@ -1,94 +1,76 @@
-# Import required modules
 import pandas as pd
-import numpy as np
-import faiss
-from groqflow import EmbeddingModel
 from transformers import pipeline
-import plotly.express as px
-# Set up Groq API key
-groqflow.api_key = "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"
-# Load the dataset
-def load_dataset():
-    data = {
-        "EmployeeID": [101, 102, 103],
-        "Name": ["John Doe", "Jane Smith", "Alice Johnson"],
-        "Role": ["Developer", "Manager", "Analyst"],
-        "Department": ["IT", "HR", "Finance"],
-        "KPIs": ["95% on-time delivery", "Improved team output", "Increased report accuracy"],
-        "Feedback": [
-            "Great coding skills, always meets deadlines.",
-            "Excellent leadership, improved team productivity.",
-            "Strong analytical skills, provides valuable insights."
-        ],
-        "Projects": ["Developed new feature X.", "Managed project Y successfully.", "Optimized process Z for efficiency."]
     }
-    return pd.DataFrame(data)
-df = load_dataset()
-# Initialize Groq embedding model
-model = EmbeddingModel()
-# Generate embeddings for the dataset
-performance_data = df["Feedback"] + " " + df["Projects"]  # Combine relevant fields
-embeddings = [model.encode(text) for text in performance_data]
-# Convert embeddings to a NumPy array
-embedding_matrix = np.array(embeddings)
-# Create a FAISS index and add embeddings
-dimension = embedding_matrix.shape[1]
-index = faiss.IndexFlatL2(dimension)
-index.add(embedding_matrix)
-# Initialize summarization pipeline
-summarizer = pipeline("summarization")
-# Function to handle user queries
-def query_performance(query):
-    # Generate embedding for the query
-    query_embedding = model.encode(query)
-    # Search for relevant rows
-    D, I = index.search(np.array([query_embedding]), k=5)  # Retrieve top 5 results
-    relevant_rows = df.iloc[I[0]]
-    # Prepare context for summarization
-    context = " ".join(relevant_rows["Feedback"] + " " + relevant_rows["Projects"])
-    # Generate a summary
-    summary = summarizer(context, max_length=100, min_length=50, do_sample=False)
-    return summary[0]['summary_text'], relevant_rows
-# Visualization function
-def visualize_performance():
-    # Example: Bar chart for KPIs by Department
-    kpi_counts = df["Department"].value_counts()
-    fig = px.bar(kpi_counts, x=kpi_counts.index, y=kpi_counts.values, labels={'x': 'Department', 'y': 'Count'}, title="Performance Metrics by Department")
-    fig.show()
-# Main interactive app
-def main():
-    print("Welcome to the Employee Performance Dashboard!")
-    while True:
-        print("\nOptions:\n1. View Dashboard\n2. Query Performance\n3. Exit")
-        choice = input("Enter your choice: ")
-        if choice == "1":
-            visualize_performance()
-        elif choice == "2":
-            query = input("Enter your performance query: ")
-            summary, rows = query_performance(query)
-            print("\nSummary:\n", summary)
-            print("\nRelevant Data:\n", rows)
-        elif choice == "3":
-            print("Goodbye!")
-            break
-        else:
-            print("Invalid choice. Please try again.")
-# Run the app
-if __name__ == "__main__":
-    main()

+# app.py
+import streamlit as st
 import pandas as pd
 from transformers import pipeline
+# Function to load data
+def load_data(file):
+    try:
+        df = pd.read_csv(file)
+        return df
+    except Exception as e:
+        st.error(f"Error loading file: {e}")
+        return None
+# Function to generate dataset summary
+def generate_summary(df):
+    summary = {
+        'Column': df.columns,
+        'Data Type': [str(df[col].dtype) for col in df.columns],
+        'Non-Null Count': df.notnull().sum().values,
+        'Unique Values': [df[col].nunique() for col in df.columns]
     }
+    return pd.DataFrame(summary)
+# Function to generate insights (basic examples)
+def generate_insights(df):
+    insights = []
+    if 'avg_training_score' in df.columns:
+        avg_score = df['avg_training_score'].mean()
+        insights.append(f"The average training score is {avg_score:.2f}. Consider additional training for employees below this score.")
+    if 'length_of_service' in df.columns:
+        experienced_employees = len(df[df['length_of_service'] > 5])
+        insights.append(f"{experienced_employees} employees have more than 5 years of service. Consider them for leadership roles.")
+    return insights
+# RAG setup using Hugging Face summarization
+def generate_query_summary(df, query):
+    summarizer = pipeline("summarization")
+    combined_text = " ".join(df.astype(str).apply(lambda x: " ".join(x), axis=1))
+    result = summarizer(query + " " + combined_text, max_length=100, min_length=30, do_sample=False)
+    return result[0]['summary_text']
+# Streamlit app
+st.title("Employee Performance Dashboard")
+st.markdown("Upload your cleaned dataset to generate insights and retrieve summaries.")
+# File upload
+uploaded_file = st.file_uploader("Upload CSV File", type="csv")
+if uploaded_file is not None:
+    df = load_data(uploaded_file)
+    if df is not None:
+        st.markdown("### Dataset Preview")
+        st.dataframe(df.head())
+        st.markdown("### Dataset Summary")
+        summary = generate_summary(df)
+        st.dataframe(summary)
+        st.markdown("### Insights and Suggestions")
+        insights = generate_insights(df)
+        for insight in insights:
+            st.write(f"- {insight}")
+        st.markdown("### Query the Dataset")
+        query = st.text_input("Enter your query:")
+        if query:
+            summary = generate_query_summary(df, query)
+            st.markdown("#### Summary")
+            st.write(summary)
+else:
+    st.info("Please upload a CSV file.")