ai-lover commited on
Commit
4be75f7
·
verified ·
1 Parent(s): e5ab239

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -50
app.py CHANGED
@@ -1,65 +1,97 @@
1
- # app.py
2
- import streamlit as st
 
 
3
  import pandas as pd
4
  import numpy as np
 
 
 
 
 
 
 
5
 
6
- def load_data(file):
7
- try:
8
- df = pd.read_csv(file)
9
- return df
10
- except Exception as e:
11
- st.error(f"Error loading file: {e}")
12
- return None
13
-
14
- def generate_summary(df):
15
- summary = {
16
- 'Column': df.columns,
17
- 'Data Type': [str(df[col].dtype) for col in df.columns],
18
- 'Non-Null Count': df.notnull().sum().values,
19
- 'Unique Values': [df[col].nunique() for col in df.columns],
20
- 'Sample Value': [df[col].iloc[0] if len(df[col]) > 0 else None for col in df.columns]
21
  }
22
- return pd.DataFrame(summary)
 
 
 
 
 
 
 
 
 
23
 
24
- def generate_insights(df):
25
- insights = []
26
 
27
- # Example insights
28
- if 'avg_training_score' in df.columns:
29
- avg_score = df['avg_training_score'].mean()
30
- insights.append(f"The average training score is {avg_score:.2f}. Consider additional training for employees below this score.")
31
 
32
- if 'length_of_service' in df.columns:
33
- experienced_employees = len(df[df['length_of_service'] > 5])
34
- insights.append(f"{experienced_employees} employees have more than 5 years of service. Consider them for leadership roles.")
35
 
36
- if 'awards_won' in df.columns:
37
- award_winners = df['awards_won'].sum()
38
- insights.append(f"A total of {award_winners} awards have been won by employees.")
 
39
 
40
- return insights
 
 
41
 
42
- # Streamlit app
43
- st.title("Employee Performance Dashboard")
44
- st.markdown("Upload your cleaned dataset to generate insights and suggestions.")
45
 
46
- # File upload
47
- uploaded_file = st.file_uploader("Upload CSV File", type="csv")
 
48
 
49
- if uploaded_file is not None:
50
- df = load_data(uploaded_file)
 
 
 
 
51
 
52
- if df is not None:
53
- st.markdown("### Dataset Preview")
54
- st.dataframe(df.head())
 
 
 
55
 
56
- st.markdown("### Dataset Summary")
57
- summary = generate_summary(df)
58
- st.dataframe(summary)
 
 
 
 
 
 
 
 
 
59
 
60
- st.markdown("### Insights and Suggestions")
61
- insights = generate_insights(df)
62
- for insight in insights:
63
- st.write(f"- {insight}")
64
- else:
65
- st.info("Please upload a CSV file.")
 
1
+ # Install necessary libraries
2
+ !pip install groqflow faiss-cpu pandas transformers matplotlib plotly
3
+
4
+ # Import required modules
5
  import pandas as pd
6
  import numpy as np
7
+ import faiss
8
+ from groqflow import EmbeddingModel
9
+ from transformers import pipeline
10
+ import plotly.express as px
11
+
12
+ # Set up Groq API key
13
+ groqflow.api_key = "your_groq_api_key"
14
 
15
+ # Load the dataset
16
+ def load_dataset():
17
+ data = {
18
+ "EmployeeID": [101, 102, 103],
19
+ "Name": ["John Doe", "Jane Smith", "Alice Johnson"],
20
+ "Role": ["Developer", "Manager", "Analyst"],
21
+ "Department": ["IT", "HR", "Finance"],
22
+ "KPIs": ["95% on-time delivery", "Improved team output", "Increased report accuracy"],
23
+ "Feedback": [
24
+ "Great coding skills, always meets deadlines.",
25
+ "Excellent leadership, improved team productivity.",
26
+ "Strong analytical skills, provides valuable insights."
27
+ ],
28
+ "Projects": ["Developed new feature X.", "Managed project Y successfully.", "Optimized process Z for efficiency."]
 
29
  }
30
+ return pd.DataFrame(data)
31
+
32
+ df = load_dataset()
33
+
34
+ # Initialize Groq embedding model
35
+ model = EmbeddingModel()
36
+
37
+ # Generate embeddings for the dataset
38
+ performance_data = df["Feedback"] + " " + df["Projects"] # Combine relevant fields
39
+ embeddings = [model.encode(text) for text in performance_data]
40
 
41
+ # Convert embeddings to a NumPy array
42
+ embedding_matrix = np.array(embeddings)
43
 
44
+ # Create a FAISS index and add embeddings
45
+ dimension = embedding_matrix.shape[1]
46
+ index = faiss.IndexFlatL2(dimension)
47
+ index.add(embedding_matrix)
48
 
49
+ # Initialize summarization pipeline
50
+ summarizer = pipeline("summarization")
 
51
 
52
+ # Function to handle user queries
53
+ def query_performance(query):
54
+ # Generate embedding for the query
55
+ query_embedding = model.encode(query)
56
 
57
+ # Search for relevant rows
58
+ D, I = index.search(np.array([query_embedding]), k=5) # Retrieve top 5 results
59
+ relevant_rows = df.iloc[I[0]]
60
 
61
+ # Prepare context for summarization
62
+ context = " ".join(relevant_rows["Feedback"] + " " + relevant_rows["Projects"])
 
63
 
64
+ # Generate a summary
65
+ summary = summarizer(context, max_length=100, min_length=50, do_sample=False)
66
+ return summary[0]['summary_text'], relevant_rows
67
 
68
+ # Visualization function
69
+ def visualize_performance():
70
+ # Example: Bar chart for KPIs by Department
71
+ kpi_counts = df["Department"].value_counts()
72
+ fig = px.bar(kpi_counts, x=kpi_counts.index, y=kpi_counts.values, labels={'x': 'Department', 'y': 'Count'}, title="Performance Metrics by Department")
73
+ fig.show()
74
 
75
+ # Main interactive app
76
+ def main():
77
+ print("Welcome to the Employee Performance Dashboard!")
78
+ while True:
79
+ print("\nOptions:\n1. View Dashboard\n2. Query Performance\n3. Exit")
80
+ choice = input("Enter your choice: ")
81
 
82
+ if choice == "1":
83
+ visualize_performance()
84
+ elif choice == "2":
85
+ query = input("Enter your performance query: ")
86
+ summary, rows = query_performance(query)
87
+ print("\nSummary:\n", summary)
88
+ print("\nRelevant Data:\n", rows)
89
+ elif choice == "3":
90
+ print("Goodbye!")
91
+ break
92
+ else:
93
+ print("Invalid choice. Please try again.")
94
 
95
+ # Run the app
96
+ if __name__ == "__main__":
97
+ main()