Update app.py
Browse files
app.py
CHANGED
@@ -1,94 +1,76 @@
|
|
1 |
-
#
|
|
|
2 |
import pandas as pd
|
3 |
-
import numpy as np
|
4 |
-
import faiss
|
5 |
-
from groqflow import EmbeddingModel
|
6 |
from transformers import pipeline
|
7 |
-
import plotly.express as px
|
8 |
|
9 |
-
#
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
"
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
]
|
25 |
-
"Projects": ["Developed new feature X.", "Managed project Y successfully.", "Optimized process Z for efficiency."]
|
26 |
}
|
27 |
-
return pd.DataFrame(
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
elif choice == "2":
|
82 |
-
query = input("Enter your performance query: ")
|
83 |
-
summary, rows = query_performance(query)
|
84 |
-
print("\nSummary:\n", summary)
|
85 |
-
print("\nRelevant Data:\n", rows)
|
86 |
-
elif choice == "3":
|
87 |
-
print("Goodbye!")
|
88 |
-
break
|
89 |
-
else:
|
90 |
-
print("Invalid choice. Please try again.")
|
91 |
-
|
92 |
-
# Run the app
|
93 |
-
if __name__ == "__main__":
|
94 |
-
main()
|
|
|
1 |
+
# app.py
|
2 |
+
import streamlit as st
|
3 |
import pandas as pd
|
|
|
|
|
|
|
4 |
from transformers import pipeline
|
|
|
5 |
|
6 |
+
# Function to load data
|
7 |
+
def load_data(file):
|
8 |
+
try:
|
9 |
+
df = pd.read_csv(file)
|
10 |
+
return df
|
11 |
+
except Exception as e:
|
12 |
+
st.error(f"Error loading file: {e}")
|
13 |
+
return None
|
14 |
+
|
15 |
+
# Function to generate dataset summary
|
16 |
+
def generate_summary(df):
|
17 |
+
summary = {
|
18 |
+
'Column': df.columns,
|
19 |
+
'Data Type': [str(df[col].dtype) for col in df.columns],
|
20 |
+
'Non-Null Count': df.notnull().sum().values,
|
21 |
+
'Unique Values': [df[col].nunique() for col in df.columns]
|
|
|
22 |
}
|
23 |
+
return pd.DataFrame(summary)
|
24 |
+
|
25 |
+
# Function to generate insights (basic examples)
|
26 |
+
def generate_insights(df):
|
27 |
+
insights = []
|
28 |
+
|
29 |
+
if 'avg_training_score' in df.columns:
|
30 |
+
avg_score = df['avg_training_score'].mean()
|
31 |
+
insights.append(f"The average training score is {avg_score:.2f}. Consider additional training for employees below this score.")
|
32 |
+
|
33 |
+
if 'length_of_service' in df.columns:
|
34 |
+
experienced_employees = len(df[df['length_of_service'] > 5])
|
35 |
+
insights.append(f"{experienced_employees} employees have more than 5 years of service. Consider them for leadership roles.")
|
36 |
+
|
37 |
+
return insights
|
38 |
+
|
39 |
+
# RAG setup using Hugging Face summarization
|
40 |
+
def generate_query_summary(df, query):
|
41 |
+
summarizer = pipeline("summarization")
|
42 |
+
combined_text = " ".join(df.astype(str).apply(lambda x: " ".join(x), axis=1))
|
43 |
+
result = summarizer(query + " " + combined_text, max_length=100, min_length=30, do_sample=False)
|
44 |
+
return result[0]['summary_text']
|
45 |
+
|
46 |
+
# Streamlit app
|
47 |
+
st.title("Employee Performance Dashboard")
|
48 |
+
st.markdown("Upload your cleaned dataset to generate insights and retrieve summaries.")
|
49 |
+
|
50 |
+
# File upload
|
51 |
+
uploaded_file = st.file_uploader("Upload CSV File", type="csv")
|
52 |
+
|
53 |
+
if uploaded_file is not None:
|
54 |
+
df = load_data(uploaded_file)
|
55 |
+
|
56 |
+
if df is not None:
|
57 |
+
st.markdown("### Dataset Preview")
|
58 |
+
st.dataframe(df.head())
|
59 |
+
|
60 |
+
st.markdown("### Dataset Summary")
|
61 |
+
summary = generate_summary(df)
|
62 |
+
st.dataframe(summary)
|
63 |
+
|
64 |
+
st.markdown("### Insights and Suggestions")
|
65 |
+
insights = generate_insights(df)
|
66 |
+
for insight in insights:
|
67 |
+
st.write(f"- {insight}")
|
68 |
+
|
69 |
+
st.markdown("### Query the Dataset")
|
70 |
+
query = st.text_input("Enter your query:")
|
71 |
+
if query:
|
72 |
+
summary = generate_query_summary(df, query)
|
73 |
+
st.markdown("#### Summary")
|
74 |
+
st.write(summary)
|
75 |
+
else:
|
76 |
+
st.info("Please upload a CSV file.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|