ai-lover commited on
Commit
f3db446
·
verified ·
1 Parent(s): 4715ad1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -90
app.py CHANGED
@@ -1,94 +1,76 @@
1
- # Import required modules
 
2
  import pandas as pd
3
- import numpy as np
4
- import faiss
5
- from groqflow import EmbeddingModel
6
  from transformers import pipeline
7
- import plotly.express as px
8
 
9
- # Set up Groq API key
10
- groqflow.api_key = "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"
11
-
12
- # Load the dataset
13
- def load_dataset():
14
- data = {
15
- "EmployeeID": [101, 102, 103],
16
- "Name": ["John Doe", "Jane Smith", "Alice Johnson"],
17
- "Role": ["Developer", "Manager", "Analyst"],
18
- "Department": ["IT", "HR", "Finance"],
19
- "KPIs": ["95% on-time delivery", "Improved team output", "Increased report accuracy"],
20
- "Feedback": [
21
- "Great coding skills, always meets deadlines.",
22
- "Excellent leadership, improved team productivity.",
23
- "Strong analytical skills, provides valuable insights."
24
- ],
25
- "Projects": ["Developed new feature X.", "Managed project Y successfully.", "Optimized process Z for efficiency."]
26
  }
27
- return pd.DataFrame(data)
28
-
29
- df = load_dataset()
30
-
31
- # Initialize Groq embedding model
32
- model = EmbeddingModel()
33
-
34
- # Generate embeddings for the dataset
35
- performance_data = df["Feedback"] + " " + df["Projects"] # Combine relevant fields
36
- embeddings = [model.encode(text) for text in performance_data]
37
-
38
- # Convert embeddings to a NumPy array
39
- embedding_matrix = np.array(embeddings)
40
-
41
- # Create a FAISS index and add embeddings
42
- dimension = embedding_matrix.shape[1]
43
- index = faiss.IndexFlatL2(dimension)
44
- index.add(embedding_matrix)
45
-
46
- # Initialize summarization pipeline
47
- summarizer = pipeline("summarization")
48
-
49
- # Function to handle user queries
50
- def query_performance(query):
51
- # Generate embedding for the query
52
- query_embedding = model.encode(query)
53
-
54
- # Search for relevant rows
55
- D, I = index.search(np.array([query_embedding]), k=5) # Retrieve top 5 results
56
- relevant_rows = df.iloc[I[0]]
57
-
58
- # Prepare context for summarization
59
- context = " ".join(relevant_rows["Feedback"] + " " + relevant_rows["Projects"])
60
-
61
- # Generate a summary
62
- summary = summarizer(context, max_length=100, min_length=50, do_sample=False)
63
- return summary[0]['summary_text'], relevant_rows
64
-
65
- # Visualization function
66
- def visualize_performance():
67
- # Example: Bar chart for KPIs by Department
68
- kpi_counts = df["Department"].value_counts()
69
- fig = px.bar(kpi_counts, x=kpi_counts.index, y=kpi_counts.values, labels={'x': 'Department', 'y': 'Count'}, title="Performance Metrics by Department")
70
- fig.show()
71
-
72
- # Main interactive app
73
- def main():
74
- print("Welcome to the Employee Performance Dashboard!")
75
- while True:
76
- print("\nOptions:\n1. View Dashboard\n2. Query Performance\n3. Exit")
77
- choice = input("Enter your choice: ")
78
-
79
- if choice == "1":
80
- visualize_performance()
81
- elif choice == "2":
82
- query = input("Enter your performance query: ")
83
- summary, rows = query_performance(query)
84
- print("\nSummary:\n", summary)
85
- print("\nRelevant Data:\n", rows)
86
- elif choice == "3":
87
- print("Goodbye!")
88
- break
89
- else:
90
- print("Invalid choice. Please try again.")
91
-
92
- # Run the app
93
- if __name__ == "__main__":
94
- main()
 
1
+ # app.py
2
+ import streamlit as st
3
  import pandas as pd
 
 
 
4
  from transformers import pipeline
 
5
 
6
+ # Function to load data
7
+ def load_data(file):
8
+ try:
9
+ df = pd.read_csv(file)
10
+ return df
11
+ except Exception as e:
12
+ st.error(f"Error loading file: {e}")
13
+ return None
14
+
15
+ # Function to generate dataset summary
16
+ def generate_summary(df):
17
+ summary = {
18
+ 'Column': df.columns,
19
+ 'Data Type': [str(df[col].dtype) for col in df.columns],
20
+ 'Non-Null Count': df.notnull().sum().values,
21
+ 'Unique Values': [df[col].nunique() for col in df.columns]
 
22
  }
23
+ return pd.DataFrame(summary)
24
+
25
+ # Function to generate insights (basic examples)
26
+ def generate_insights(df):
27
+ insights = []
28
+
29
+ if 'avg_training_score' in df.columns:
30
+ avg_score = df['avg_training_score'].mean()
31
+ insights.append(f"The average training score is {avg_score:.2f}. Consider additional training for employees below this score.")
32
+
33
+ if 'length_of_service' in df.columns:
34
+ experienced_employees = len(df[df['length_of_service'] > 5])
35
+ insights.append(f"{experienced_employees} employees have more than 5 years of service. Consider them for leadership roles.")
36
+
37
+ return insights
38
+
39
+ # RAG setup using Hugging Face summarization
40
+ def generate_query_summary(df, query):
41
+ summarizer = pipeline("summarization")
42
+ combined_text = " ".join(df.astype(str).apply(lambda x: " ".join(x), axis=1))
43
+ result = summarizer(query + " " + combined_text, max_length=100, min_length=30, do_sample=False)
44
+ return result[0]['summary_text']
45
+
46
+ # Streamlit app
47
+ st.title("Employee Performance Dashboard")
48
+ st.markdown("Upload your cleaned dataset to generate insights and retrieve summaries.")
49
+
50
+ # File upload
51
+ uploaded_file = st.file_uploader("Upload CSV File", type="csv")
52
+
53
+ if uploaded_file is not None:
54
+ df = load_data(uploaded_file)
55
+
56
+ if df is not None:
57
+ st.markdown("### Dataset Preview")
58
+ st.dataframe(df.head())
59
+
60
+ st.markdown("### Dataset Summary")
61
+ summary = generate_summary(df)
62
+ st.dataframe(summary)
63
+
64
+ st.markdown("### Insights and Suggestions")
65
+ insights = generate_insights(df)
66
+ for insight in insights:
67
+ st.write(f"- {insight}")
68
+
69
+ st.markdown("### Query the Dataset")
70
+ query = st.text_input("Enter your query:")
71
+ if query:
72
+ summary = generate_query_summary(df, query)
73
+ st.markdown("#### Summary")
74
+ st.write(summary)
75
+ else:
76
+ st.info("Please upload a CSV file.")