Spaces:
Sleeping
Sleeping
# app.py | |
import streamlit as st | |
from st_aggrid import AgGrid, GridOptionsBuilder | |
import pandas as pd | |
import os, json | |
from datetime import datetime | |
from data_ingestion.ingest_data import read_document | |
from data_ingestion.preprocess_data import preprocess_text | |
from llm_integration.task_extraction import extract_tasks_from_text | |
from llm_integration.task_comparison import compare_task_data | |
from database.mongo_integration import * | |
from notion_client import Client | |
from pymongo import MongoClient | |
from datetime import datetime | |
# from database.mongo_integration import save_tasks_to_mongo | |
os.makedirs("data/output", exist_ok=True) | |
import pandas as pd | |
mongo_client = MongoClient("mongodb+srv://shahid:Protondev%[email protected]/") # Replace with your MongoDB URI | |
db = mongo_client["task_management"] | |
employee_project_collection = db["employee_project"] | |
notion = Client(auth="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") | |
parent_page_id = "148b2f92b9948099a854e8b21a0640a3" | |
notion_database_id = "14db2f92-b994-81fb-9132-f4e4cb46ac13" | |
def fetch_latest_task_entry(): | |
""" | |
Fetch the most recent entry from MongoDB. | |
Returns: | |
dict: The latest task entry as a dictionary. | |
""" | |
latest_entry = employee_project_collection.find_one(sort=[("created_at", DESCENDING)]) | |
if latest_entry: | |
return latest_entry | |
else: | |
raise ValueError("No entries found in MongoDB.") | |
def push_to_notion(latest_entry): | |
""" | |
Push tasks from the latest entry to the Notion database. | |
Args: | |
latest_entry (dict): The most recent task data from MongoDB. | |
""" | |
# Extract the tasks from the JSON | |
tasks = latest_entry.get("consolidated_final_task", {}) | |
created_at = latest_entry.get("created_at", None) | |
# Step 1: Archive existing tasks in Notion database | |
with st.spinner("Archiving existing tasks in Notion..."): | |
try: | |
# Query all pages in the Notion database (this will fetch the existing tasks) | |
notion_database = notion.databases.query(database_id=notion_database_id) | |
# Loop through the database pages and archive them | |
for page in notion_database['results']: | |
notion.pages.update(page_id=page['id'], archived=True) | |
st.info("Old tasks archived in Notion successfully.") | |
except Exception as e: | |
st.error(f"Failed to archive tasks in Notion: {e}") | |
# Step 2: Push new tasks to Notion | |
with st.spinner("Pushing new tasks to Notion..."): | |
try: | |
# Iterate over projects and their tasks | |
for project_name, task_list in tasks.items(): | |
for task_id, task_details in task_list.items(): | |
# Map MongoDB fields to Notion properties | |
notion_task = { | |
"parent": {"database_id": notion_database_id}, | |
"properties": { | |
"Project Name": {"title": [{"type": "text", "text": {"content": project_name}}]}, | |
"Task ID": {"rich_text": [{"type": "text", "text": {"content": task_id}}]}, | |
"Description": {"rich_text": [{"type": "text", "text": {"content": task_details.get("description", "")}}]}, | |
"Priority": {"select": {"name": task_details.get("priority", "low")}}, | |
"Assigned To": {"rich_text": [{"type": "text", "text": {"content": task_details.get("assigned_to", "")}}]}, # Updated to rich_text | |
"Current Status": {"select": {"name": task_details.get("current_status", "pending")}}, | |
"Created At": {"date": {"start": created_at.isoformat() if created_at else datetime.utcnow().isoformat()}} | |
} | |
} | |
# Push each task to Notion | |
response = notion.pages.create(**notion_task) | |
print(f"Task pushed to Notion: {response['id']}") | |
st.success("New tasks pushed to Notion successfully!") | |
except Exception as e: | |
st.error(f"Failed to push tasks to Notion: {e}") | |
def json_to_dataframe(json_data): | |
""" | |
Converts a nested JSON structure into a user-friendly Pandas DataFrame for display. | |
Args: | |
json_data (dict): The JSON object containing projects and tasks. | |
Returns: | |
pd.DataFrame: A DataFrame representing the JSON data. | |
""" | |
data = [] | |
for project_name, tasks in json_data.items(): | |
for task_id, task_details in tasks.items(): | |
data.append({ | |
"Project": project_name, | |
"Task Name": task_id, | |
"Description": task_details["description"], | |
"Priority": task_details["priority"], | |
"Assigned To": task_details["assigned_to"], | |
"Status": task_details["current_status"] | |
}) | |
return pd.DataFrame(data) | |
def dataframe_to_json(df): | |
""" | |
Converts a Pandas DataFrame back into a nested JSON structure. | |
Args: | |
df (pd.DataFrame): The DataFrame containing projects and tasks. | |
Returns: | |
dict: A nested dictionary representing the original JSON data. | |
""" | |
json_data = {} | |
# Iterate over each row of the DataFrame | |
for _, row in df.iterrows(): | |
project_name = row['Project'] | |
task_id = row['Task Name'] | |
# Ensure the project exists in the JSON structure | |
if project_name not in json_data: | |
json_data[project_name] = {} | |
# Add or update the task under the corresponding project | |
json_data[project_name][task_id] = { | |
"description": row['Description'], | |
"priority": row['Priority'], | |
"assigned_to": row['Assigned To'], | |
"current_status": row['Status'] | |
} | |
return json_data | |
# Function to fetch the most recent tasks from Notion | |
def fetch_recent_tasks_from_notion(): | |
""" | |
Fetch the most recent tasks from the Notion database and return it as a list of dicts. | |
""" | |
try: | |
# Query the database to get the most recent tasks | |
query_response = notion.databases.query( | |
**{ | |
"database_id": notion_database_id, | |
"sorts": [{"property": "Created At", "direction": "descending"}], | |
"page_size": 20 # Get the 5 most recent tasks, adjust the page size as needed | |
} | |
) | |
# Extract tasks from the query response | |
tasks = [] | |
for result in query_response.get("results", []): | |
task_data = { | |
"Project Name": result["properties"]["Project Name"]["title"][0]["text"]["content"], | |
"Task ID": result["properties"]["Task ID"]["rich_text"][0]["text"]["content"], | |
"Description": result["properties"]["Description"]["rich_text"][0]["text"]["content"], | |
"Priority": result["properties"]["Priority"]["select"]["name"], | |
"Assigned To": result["properties"]["Assigned To"]["rich_text"][0]["text"]["content"] if result["properties"]["Assigned To"]["rich_text"] else "", | |
"Current Status": result["properties"]["Current Status"]["select"]["name"], | |
"Created At": result["properties"]["Created At"]["date"]["start"] | |
} | |
tasks.append(task_data) | |
return tasks | |
except Exception as e: | |
print(f"Error fetching tasks from Notion: {e}") | |
return [] | |
# Function to display recent tasks in DataFrame on the dashboard | |
def display_recent_tasks_on_dashboard(): | |
""" | |
Fetch and display the most recent tasks from Notion in a DataFrame on the Streamlit dashboard. | |
""" | |
tasks = fetch_recent_tasks_from_notion() | |
if tasks: | |
# Convert tasks into a DataFrame | |
df = pd.DataFrame(tasks) | |
# Display DataFrame in Streamlit | |
st.subheader("Most Recent Tasks from Notion") | |
st.dataframe(df) | |
else: | |
st.write("No tasks found in the Notion database.") | |
# Initialize Streamlit app | |
st.set_page_config( | |
page_title="Task Management", | |
page_icon="π", | |
layout="wide" | |
) | |
# Define session state for managing intermediate data | |
if "processed_tasks" not in st.session_state: | |
st.session_state.processed_tasks = None | |
if "edited_df" not in st.session_state: | |
st.session_state.edited_df=None | |
if "comparison_results" not in st.session_state: | |
st.session_state.comparison_results = None | |
tab1, tab2,tab3 = st.tabs(["Dashboard", "Upload and Process","Review Updated Tasks"]) # | |
# Initialize session state for tab navigation | |
if "active_tab" not in st.session_state: | |
st.session_state.active_tab = 0 | |
# Function to switch tabs | |
def switch_tab(tab_index): | |
st.session_state.active_tab = tab_index | |
# ------------------------------- | |
# Tab 1: Dashboard | |
# ------------------------------- | |
# if st.session_state.active_tab == 0: | |
with tab1: | |
with st.container(): | |
st.title("π Task Management Dashboard") | |
# Display recent tasks from Notion in a DataFrame | |
display_recent_tasks_on_dashboard() | |
# # Quick actions | |
# st.subheader("Quick Actions") | |
# if st.button("Upload and Process New Tasks"): | |
# switch_tab(1) | |
# if st.button("Review and Approve Tasks"): | |
# switch_tab(2) | |
# ------------------------------- | |
# Tab 2: Upload and Process | |
# ------------------------------- | |
# elif st.session_state.active_tab == 1: | |
# Tab 2: Upload and Process | |
with tab2: | |
with st.container(): | |
st.title("π€ Upload and Process Tasks") | |
uploaded_file = st.file_uploader("Upload a .docx file", type=["docx"]) | |
if uploaded_file is not None: | |
with st.spinner("Processing uploaded file..."): | |
# Step 1: Extract cleaned text | |
raw_data = read_document(uploaded_file) | |
cleaned_text = preprocess_text(raw_data) | |
cleaned_text = "\n".join([f"{entry['author']}: {entry['text']}" for entry in cleaned_text]) | |
# Step 2: Extract tasks | |
if 'df' not in st.session_state: | |
extracted_tasks = extract_tasks_from_text(cleaned_text) | |
st.subheader("Processed Tasks (DataFrame View)") | |
st.session_state.df = json_to_dataframe(extracted_tasks) | |
# Display the DataFrame for editing | |
edited_df = st.data_editor(st.session_state.df) | |
st.session_state.edited_df = edited_df | |
edited_extracted_tasks_json = dataframe_to_json(edited_df) | |
st.session_state.processed_tasks = edited_extracted_tasks_json | |
st.success("Tasks extracted successfully!") | |
# Step 3: Push extracted tasks to MongoDB | |
if st.button("Save tasks & Compare"): | |
with st.spinner("Saving tasks to MongoDB..."): | |
try: | |
insert_weekly_task_data(edited_extracted_tasks_json) | |
st.success("Tasks successfully saved to the database!") | |
except Exception as e: | |
st.error(f"Failed to save tasks to the database: {e}") | |
if 'df' in st.session_state: | |
del st.session_state['df'] | |
st.info("Temporary data removed from session state.") | |
# Step 4: Run comparison | |
with st.spinner("Running task comparison..."): | |
st.write("Running task comparison...") | |
recent_entries = fetch_recent_two_entries() | |
latest_entry = fetch_latest_task_entry() | |
if len(recent_entries) >= 2: | |
old_tasks = latest_entry.get("consolidated_final_task", {}) | |
new_tasks = recent_entries[0]["tasks"] | |
comparison_results = compare_task_data(old_tasks, new_tasks) | |
st.session_state.comparison_results = comparison_results | |
st.success("Task comparison completed! Please move to Review section") | |
else: | |
st.warning("Not enough data to run comparison.") | |
# Tab 3: Review and Approve Tasks | |
with tab3: | |
st.title("π Review and Approve Tasks") | |
if st.session_state.comparison_results is None: | |
st.warning("No comparison results available. Please upload and process tasks first.") | |
else: | |
# Display comparison results | |
if st.session_state.comparison_results: | |
# st.subheader("Comparison Results (DataFrame View)") | |
if "compared_df" not in st.session_state: | |
st.session_state.compared_df = json_to_dataframe(st.session_state.comparison_results) | |
# st.dataframe(st.session_state.compared_df) | |
# Inline editing of tasks | |
st.subheader("Edit Tasks") | |
final_edited_df = st.data_editor(st.session_state.compared_df) | |
st.session_state.final_edited_df = final_edited_df | |
final_extracted_tasks_json = dataframe_to_json(final_edited_df) | |
# Approval and finalization | |
if st.button("Approve and Finalize Tasks"): | |
with st.spinner("Finalizing tasks..."): | |
try: | |
db = get_database() | |
updated_collection = db["employee_project"] | |
document = { | |
"consolidated_final_task": final_extracted_tasks_json, | |
"created_at": datetime.now() | |
} | |
updated_collection.insert_one(document) | |
st.success("Finalized tasks saved successfully!") | |
except Exception as e: | |
st.error(f"Failed to save tasks: {e}") | |
if st.button("Push to Notion Dashboard"): | |
with st.spinner("Pushing to Notion..."): | |
latest_entry = fetch_latest_task_entry() | |
push_to_notion(latest_entry) | |
st.success("Notion Dashboard has been updated") | |