# app.py import streamlit as st import pandas as pd from transformers import pipeline # Function to load data def load_data(file): try: df = pd.read_csv(file) return df except Exception as e: st.error(f"Error loading file: {e}") return None # Function to generate dataset summary def generate_summary(df): summary = { 'Column': df.columns, 'Data Type': [str(df[col].dtype) for col in df.columns], 'Non-Null Count': df.notnull().sum().values, 'Unique Values': [df[col].nunique() for col in df.columns] } return pd.DataFrame(summary) # Function to generate insights (basic examples) def generate_insights(df): insights = [] if 'avg_training_score' in df.columns: avg_score = df['avg_training_score'].mean() insights.append(f"The average training score is {avg_score:.2f}. Consider additional training for employees below this score.") if 'length_of_service' in df.columns: experienced_employees = len(df[df['length_of_service'] > 5]) insights.append(f"{experienced_employees} employees have more than 5 years of service. Consider them for leadership roles.") return insights # RAG setup using Hugging Face summarization def generate_query_summary(df, query): summarizer = pipeline("summarization") combined_text = " ".join(df.astype(str).apply(lambda x: " ".join(x), axis=1)) # Truncate text to avoid token limit issues max_input_length = 1024 if len(combined_text) > max_input_length: combined_text = combined_text[:max_input_length] try: result = summarizer(query + " " + combined_text, max_length=100, min_length=30, do_sample=False) return result[0]['summary_text'] except IndexError: return "Error: Unable to generate a summary. Ensure the query and data are valid." except Exception as e: return f"Error during summarization: {e}" # Streamlit app st.title("Employee Performance Dashboard") st.markdown("Upload your cleaned dataset to generate insights and retrieve summaries.") # File upload uploaded_file = st.file_uploader("Upload CSV File", type="csv") if uploaded_file is not None: df = load_data(uploaded_file) if df is not None: st.markdown("### Dataset Preview") st.dataframe(df.head()) st.markdown("### Dataset Summary") summary = generate_summary(df) st.dataframe(summary) st.markdown("### Insights and Suggestions") insights = generate_insights(df) for insight in insights: st.write(f"- {insight}") st.markdown("### Query the Dataset") query = st.text_input("Enter your query:") if query: summary = generate_query_summary(df, query) st.markdown("#### Summary") st.write(summary) else: st.info("Please upload a CSV file.")