# app.py import streamlit as st from st_aggrid import AgGrid, GridOptionsBuilder import pandas as pd import os, json from datetime import datetime from data_ingestion.ingest_data import read_document from data_ingestion.preprocess_data import preprocess_text from llm_integration.task_extraction import extract_tasks_from_text from llm_integration.task_comparison import compare_task_data from database.mongo_integration import * from notion_client import Client from pymongo import MongoClient from datetime import datetime # from database.mongo_integration import save_tasks_to_mongo os.makedirs("data/output", exist_ok=True) import pandas as pd mongo_client = MongoClient("mongodb+srv://shahid:Protondev%40456@cluster0.ruurd.mongodb.net/") # Replace with your MongoDB URI db = mongo_client["task_management"] employee_project_collection = db["employee_project"] notion = Client(auth="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") parent_page_id = "148b2f92b9948099a854e8b21a0640a3" notion_database_id = "14db2f92-b994-81fb-9132-f4e4cb46ac13" def fetch_latest_task_entry(): """ Fetch the most recent entry from MongoDB. Returns: dict: The latest task entry as a dictionary. """ latest_entry = employee_project_collection.find_one(sort=[("created_at", DESCENDING)]) if latest_entry: return latest_entry else: raise ValueError("No entries found in MongoDB.") def push_to_notion(latest_entry): """ Push tasks from the latest entry to the Notion database. Args: latest_entry (dict): The most recent task data from MongoDB. """ # Extract the tasks from the JSON tasks = latest_entry.get("consolidated_final_task", {}) created_at = latest_entry.get("created_at", None) # Step 1: Archive existing tasks in Notion database with st.spinner("Archiving existing tasks in Notion..."): try: # Query all pages in the Notion database (this will fetch the existing tasks) notion_database = notion.databases.query(database_id=notion_database_id) # Loop through the database pages and archive them for page in notion_database['results']: notion.pages.update(page_id=page['id'], archived=True) st.info("Old tasks archived in Notion successfully.") except Exception as e: st.error(f"Failed to archive tasks in Notion: {e}") # Step 2: Push new tasks to Notion with st.spinner("Pushing new tasks to Notion..."): try: # Iterate over projects and their tasks for project_name, task_list in tasks.items(): for task_id, task_details in task_list.items(): # Map MongoDB fields to Notion properties notion_task = { "parent": {"database_id": notion_database_id}, "properties": { "Project Name": {"title": [{"type": "text", "text": {"content": project_name}}]}, "Task ID": {"rich_text": [{"type": "text", "text": {"content": task_id}}]}, "Description": {"rich_text": [{"type": "text", "text": {"content": task_details.get("description", "")}}]}, "Priority": {"select": {"name": task_details.get("priority", "low")}}, "Assigned To": {"rich_text": [{"type": "text", "text": {"content": task_details.get("assigned_to", "")}}]}, # Updated to rich_text "Current Status": {"select": {"name": task_details.get("current_status", "pending")}}, "Created At": {"date": {"start": created_at.isoformat() if created_at else datetime.utcnow().isoformat()}} } } # Push each task to Notion response = notion.pages.create(**notion_task) print(f"Task pushed to Notion: {response['id']}") st.success("New tasks pushed to Notion successfully!") except Exception as e: st.error(f"Failed to push tasks to Notion: {e}") def json_to_dataframe(json_data): """ Converts a nested JSON structure into a user-friendly Pandas DataFrame for display. Args: json_data (dict): The JSON object containing projects and tasks. Returns: pd.DataFrame: A DataFrame representing the JSON data. """ data = [] for project_name, tasks in json_data.items(): for task_id, task_details in tasks.items(): data.append({ "Project": project_name, "Task Name": task_id, "Description": task_details["description"], "Priority": task_details["priority"], "Assigned To": task_details["assigned_to"], "Status": task_details["current_status"] }) return pd.DataFrame(data) def dataframe_to_json(df): """ Converts a Pandas DataFrame back into a nested JSON structure. Args: df (pd.DataFrame): The DataFrame containing projects and tasks. Returns: dict: A nested dictionary representing the original JSON data. """ json_data = {} # Iterate over each row of the DataFrame for _, row in df.iterrows(): project_name = row['Project'] task_id = row['Task Name'] # Ensure the project exists in the JSON structure if project_name not in json_data: json_data[project_name] = {} # Add or update the task under the corresponding project json_data[project_name][task_id] = { "description": row['Description'], "priority": row['Priority'], "assigned_to": row['Assigned To'], "current_status": row['Status'] } return json_data # Function to fetch the most recent tasks from Notion def fetch_recent_tasks_from_notion(): """ Fetch the most recent tasks from the Notion database and return it as a list of dicts. """ try: # Query the database to get the most recent tasks query_response = notion.databases.query( **{ "database_id": notion_database_id, "sorts": [{"property": "Created At", "direction": "descending"}], "page_size": 20 # Get the 5 most recent tasks, adjust the page size as needed } ) # Extract tasks from the query response tasks = [] for result in query_response.get("results", []): task_data = { "Project Name": result["properties"]["Project Name"]["title"][0]["text"]["content"], "Task ID": result["properties"]["Task ID"]["rich_text"][0]["text"]["content"], "Description": result["properties"]["Description"]["rich_text"][0]["text"]["content"], "Priority": result["properties"]["Priority"]["select"]["name"], "Assigned To": result["properties"]["Assigned To"]["rich_text"][0]["text"]["content"] if result["properties"]["Assigned To"]["rich_text"] else "", "Current Status": result["properties"]["Current Status"]["select"]["name"], "Created At": result["properties"]["Created At"]["date"]["start"] } tasks.append(task_data) return tasks except Exception as e: print(f"Error fetching tasks from Notion: {e}") return [] # Function to display recent tasks in DataFrame on the dashboard def display_recent_tasks_on_dashboard(): """ Fetch and display the most recent tasks from Notion in a DataFrame on the Streamlit dashboard. """ tasks = fetch_recent_tasks_from_notion() if tasks: # Convert tasks into a DataFrame df = pd.DataFrame(tasks) # Display DataFrame in Streamlit st.subheader("Most Recent Tasks from Notion") st.dataframe(df) else: st.write("No tasks found in the Notion database.") # Initialize Streamlit app st.set_page_config( page_title="Task Management", page_icon="📋", layout="wide" ) # Define session state for managing intermediate data if "processed_tasks" not in st.session_state: st.session_state.processed_tasks = None if "edited_df" not in st.session_state: st.session_state.edited_df=None if "comparison_results" not in st.session_state: st.session_state.comparison_results = None tab1, tab2,tab3 = st.tabs(["Dashboard", "Upload and Process","Review Updated Tasks"]) # # Initialize session state for tab navigation if "active_tab" not in st.session_state: st.session_state.active_tab = 0 # Function to switch tabs def switch_tab(tab_index): st.session_state.active_tab = tab_index # ------------------------------- # Tab 1: Dashboard # ------------------------------- # if st.session_state.active_tab == 0: with tab1: with st.container(): st.title("📋 Task Management Dashboard") # Display recent tasks from Notion in a DataFrame display_recent_tasks_on_dashboard() # # Quick actions # st.subheader("Quick Actions") # if st.button("Upload and Process New Tasks"): # switch_tab(1) # if st.button("Review and Approve Tasks"): # switch_tab(2) # ------------------------------- # Tab 2: Upload and Process # ------------------------------- # elif st.session_state.active_tab == 1: # Tab 2: Upload and Process with tab2: with st.container(): st.title("📤 Upload and Process Tasks") uploaded_file = st.file_uploader("Upload a .docx file", type=["docx"]) if uploaded_file is not None: with st.spinner("Processing uploaded file..."): # Step 1: Extract cleaned text raw_data = read_document(uploaded_file) cleaned_text = preprocess_text(raw_data) cleaned_text = "\n".join([f"{entry['author']}: {entry['text']}" for entry in cleaned_text]) # Step 2: Extract tasks if 'df' not in st.session_state: extracted_tasks = extract_tasks_from_text(cleaned_text) st.subheader("Processed Tasks (DataFrame View)") st.session_state.df = json_to_dataframe(extracted_tasks) # Display the DataFrame for editing edited_df = st.data_editor(st.session_state.df) st.session_state.edited_df = edited_df edited_extracted_tasks_json = dataframe_to_json(edited_df) st.session_state.processed_tasks = edited_extracted_tasks_json st.success("Tasks extracted successfully!") # Step 3: Push extracted tasks to MongoDB if st.button("Save tasks & Compare"): with st.spinner("Saving tasks to MongoDB..."): try: insert_weekly_task_data(edited_extracted_tasks_json) st.success("Tasks successfully saved to the database!") except Exception as e: st.error(f"Failed to save tasks to the database: {e}") if 'df' in st.session_state: del st.session_state['df'] st.info("Temporary data removed from session state.") # Step 4: Run comparison with st.spinner("Running task comparison..."): st.write("Running task comparison...") recent_entries = fetch_recent_two_entries() latest_entry = fetch_latest_task_entry() if len(recent_entries) >= 2: old_tasks = latest_entry.get("consolidated_final_task", {}) new_tasks = recent_entries[0]["tasks"] comparison_results = compare_task_data(old_tasks, new_tasks) st.session_state.comparison_results = comparison_results st.success("Task comparison completed! Please move to Review section") else: st.warning("Not enough data to run comparison.") # Tab 3: Review and Approve Tasks with tab3: st.title("🔍 Review and Approve Tasks") if st.session_state.comparison_results is None: st.warning("No comparison results available. Please upload and process tasks first.") else: # Display comparison results if st.session_state.comparison_results: # st.subheader("Comparison Results (DataFrame View)") if "compared_df" not in st.session_state: st.session_state.compared_df = json_to_dataframe(st.session_state.comparison_results) # st.dataframe(st.session_state.compared_df) # Inline editing of tasks st.subheader("Edit Tasks") final_edited_df = st.data_editor(st.session_state.compared_df) st.session_state.final_edited_df = final_edited_df final_extracted_tasks_json = dataframe_to_json(final_edited_df) # Approval and finalization if st.button("Approve and Finalize Tasks"): with st.spinner("Finalizing tasks..."): try: db = get_database() updated_collection = db["employee_project"] document = { "consolidated_final_task": final_extracted_tasks_json, "created_at": datetime.now() } updated_collection.insert_one(document) st.success("Finalized tasks saved successfully!") except Exception as e: st.error(f"Failed to save tasks: {e}") if st.button("Push to Notion Dashboard"): with st.spinner("Pushing to Notion..."): latest_entry = fetch_latest_task_entry() push_to_notion(latest_entry) st.success("Notion Dashboard has been updated")