Smart-Tasker / app.py
Shahid
Added first commit
af30a30
# app.py
import streamlit as st
from st_aggrid import AgGrid, GridOptionsBuilder
import pandas as pd
import os, json
from datetime import datetime
from data_ingestion.ingest_data import read_document
from data_ingestion.preprocess_data import preprocess_text
from llm_integration.task_extraction import extract_tasks_from_text
from llm_integration.task_comparison import compare_task_data
from database.mongo_integration import *
from notion_client import Client
from pymongo import MongoClient
from datetime import datetime
# from database.mongo_integration import save_tasks_to_mongo
os.makedirs("data/output", exist_ok=True)
import pandas as pd
mongo_client = MongoClient("mongodb+srv://shahid:Protondev%[email protected]/") # Replace with your MongoDB URI
db = mongo_client["task_management"]
employee_project_collection = db["employee_project"]
notion = Client(auth="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")
parent_page_id = "148b2f92b9948099a854e8b21a0640a3"
notion_database_id = "14db2f92-b994-81fb-9132-f4e4cb46ac13"
def fetch_latest_task_entry():
"""
Fetch the most recent entry from MongoDB.
Returns:
dict: The latest task entry as a dictionary.
"""
latest_entry = employee_project_collection.find_one(sort=[("created_at", DESCENDING)])
if latest_entry:
return latest_entry
else:
raise ValueError("No entries found in MongoDB.")
def push_to_notion(latest_entry):
"""
Push tasks from the latest entry to the Notion database.
Args:
latest_entry (dict): The most recent task data from MongoDB.
"""
# Extract the tasks from the JSON
tasks = latest_entry.get("consolidated_final_task", {})
created_at = latest_entry.get("created_at", None)
# Step 1: Archive existing tasks in Notion database
with st.spinner("Archiving existing tasks in Notion..."):
try:
# Query all pages in the Notion database (this will fetch the existing tasks)
notion_database = notion.databases.query(database_id=notion_database_id)
# Loop through the database pages and archive them
for page in notion_database['results']:
notion.pages.update(page_id=page['id'], archived=True)
st.info("Old tasks archived in Notion successfully.")
except Exception as e:
st.error(f"Failed to archive tasks in Notion: {e}")
# Step 2: Push new tasks to Notion
with st.spinner("Pushing new tasks to Notion..."):
try:
# Iterate over projects and their tasks
for project_name, task_list in tasks.items():
for task_id, task_details in task_list.items():
# Map MongoDB fields to Notion properties
notion_task = {
"parent": {"database_id": notion_database_id},
"properties": {
"Project Name": {"title": [{"type": "text", "text": {"content": project_name}}]},
"Task ID": {"rich_text": [{"type": "text", "text": {"content": task_id}}]},
"Description": {"rich_text": [{"type": "text", "text": {"content": task_details.get("description", "")}}]},
"Priority": {"select": {"name": task_details.get("priority", "low")}},
"Assigned To": {"rich_text": [{"type": "text", "text": {"content": task_details.get("assigned_to", "")}}]}, # Updated to rich_text
"Current Status": {"select": {"name": task_details.get("current_status", "pending")}},
"Created At": {"date": {"start": created_at.isoformat() if created_at else datetime.utcnow().isoformat()}}
}
}
# Push each task to Notion
response = notion.pages.create(**notion_task)
print(f"Task pushed to Notion: {response['id']}")
st.success("New tasks pushed to Notion successfully!")
except Exception as e:
st.error(f"Failed to push tasks to Notion: {e}")
def json_to_dataframe(json_data):
"""
Converts a nested JSON structure into a user-friendly Pandas DataFrame for display.
Args:
json_data (dict): The JSON object containing projects and tasks.
Returns:
pd.DataFrame: A DataFrame representing the JSON data.
"""
data = []
for project_name, tasks in json_data.items():
for task_id, task_details in tasks.items():
data.append({
"Project": project_name,
"Task Name": task_id,
"Description": task_details["description"],
"Priority": task_details["priority"],
"Assigned To": task_details["assigned_to"],
"Status": task_details["current_status"]
})
return pd.DataFrame(data)
def dataframe_to_json(df):
"""
Converts a Pandas DataFrame back into a nested JSON structure.
Args:
df (pd.DataFrame): The DataFrame containing projects and tasks.
Returns:
dict: A nested dictionary representing the original JSON data.
"""
json_data = {}
# Iterate over each row of the DataFrame
for _, row in df.iterrows():
project_name = row['Project']
task_id = row['Task Name']
# Ensure the project exists in the JSON structure
if project_name not in json_data:
json_data[project_name] = {}
# Add or update the task under the corresponding project
json_data[project_name][task_id] = {
"description": row['Description'],
"priority": row['Priority'],
"assigned_to": row['Assigned To'],
"current_status": row['Status']
}
return json_data
# Function to fetch the most recent tasks from Notion
def fetch_recent_tasks_from_notion():
"""
Fetch the most recent tasks from the Notion database and return it as a list of dicts.
"""
try:
# Query the database to get the most recent tasks
query_response = notion.databases.query(
**{
"database_id": notion_database_id,
"sorts": [{"property": "Created At", "direction": "descending"}],
"page_size": 20 # Get the 5 most recent tasks, adjust the page size as needed
}
)
# Extract tasks from the query response
tasks = []
for result in query_response.get("results", []):
task_data = {
"Project Name": result["properties"]["Project Name"]["title"][0]["text"]["content"],
"Task ID": result["properties"]["Task ID"]["rich_text"][0]["text"]["content"],
"Description": result["properties"]["Description"]["rich_text"][0]["text"]["content"],
"Priority": result["properties"]["Priority"]["select"]["name"],
"Assigned To": result["properties"]["Assigned To"]["rich_text"][0]["text"]["content"] if result["properties"]["Assigned To"]["rich_text"] else "",
"Current Status": result["properties"]["Current Status"]["select"]["name"],
"Created At": result["properties"]["Created At"]["date"]["start"]
}
tasks.append(task_data)
return tasks
except Exception as e:
print(f"Error fetching tasks from Notion: {e}")
return []
# Function to display recent tasks in DataFrame on the dashboard
def display_recent_tasks_on_dashboard():
"""
Fetch and display the most recent tasks from Notion in a DataFrame on the Streamlit dashboard.
"""
tasks = fetch_recent_tasks_from_notion()
if tasks:
# Convert tasks into a DataFrame
df = pd.DataFrame(tasks)
# Display DataFrame in Streamlit
st.subheader("Most Recent Tasks from Notion")
st.dataframe(df)
else:
st.write("No tasks found in the Notion database.")
# Initialize Streamlit app
st.set_page_config(
page_title="Task Management",
page_icon="πŸ“‹",
layout="wide"
)
# Define session state for managing intermediate data
if "processed_tasks" not in st.session_state:
st.session_state.processed_tasks = None
if "edited_df" not in st.session_state:
st.session_state.edited_df=None
if "comparison_results" not in st.session_state:
st.session_state.comparison_results = None
tab1, tab2,tab3 = st.tabs(["Dashboard", "Upload and Process","Review Updated Tasks"]) #
# Initialize session state for tab navigation
if "active_tab" not in st.session_state:
st.session_state.active_tab = 0
# Function to switch tabs
def switch_tab(tab_index):
st.session_state.active_tab = tab_index
# -------------------------------
# Tab 1: Dashboard
# -------------------------------
# if st.session_state.active_tab == 0:
with tab1:
with st.container():
st.title("πŸ“‹ Task Management Dashboard")
# Display recent tasks from Notion in a DataFrame
display_recent_tasks_on_dashboard()
# # Quick actions
# st.subheader("Quick Actions")
# if st.button("Upload and Process New Tasks"):
# switch_tab(1)
# if st.button("Review and Approve Tasks"):
# switch_tab(2)
# -------------------------------
# Tab 2: Upload and Process
# -------------------------------
# elif st.session_state.active_tab == 1:
# Tab 2: Upload and Process
with tab2:
with st.container():
st.title("πŸ“€ Upload and Process Tasks")
uploaded_file = st.file_uploader("Upload a .docx file", type=["docx"])
if uploaded_file is not None:
with st.spinner("Processing uploaded file..."):
# Step 1: Extract cleaned text
raw_data = read_document(uploaded_file)
cleaned_text = preprocess_text(raw_data)
cleaned_text = "\n".join([f"{entry['author']}: {entry['text']}" for entry in cleaned_text])
# Step 2: Extract tasks
if 'df' not in st.session_state:
extracted_tasks = extract_tasks_from_text(cleaned_text)
st.subheader("Processed Tasks (DataFrame View)")
st.session_state.df = json_to_dataframe(extracted_tasks)
# Display the DataFrame for editing
edited_df = st.data_editor(st.session_state.df)
st.session_state.edited_df = edited_df
edited_extracted_tasks_json = dataframe_to_json(edited_df)
st.session_state.processed_tasks = edited_extracted_tasks_json
st.success("Tasks extracted successfully!")
# Step 3: Push extracted tasks to MongoDB
if st.button("Save tasks & Compare"):
with st.spinner("Saving tasks to MongoDB..."):
try:
insert_weekly_task_data(edited_extracted_tasks_json)
st.success("Tasks successfully saved to the database!")
except Exception as e:
st.error(f"Failed to save tasks to the database: {e}")
if 'df' in st.session_state:
del st.session_state['df']
st.info("Temporary data removed from session state.")
# Step 4: Run comparison
with st.spinner("Running task comparison..."):
st.write("Running task comparison...")
recent_entries = fetch_recent_two_entries()
latest_entry = fetch_latest_task_entry()
if len(recent_entries) >= 2:
old_tasks = latest_entry.get("consolidated_final_task", {})
new_tasks = recent_entries[0]["tasks"]
comparison_results = compare_task_data(old_tasks, new_tasks)
st.session_state.comparison_results = comparison_results
st.success("Task comparison completed! Please move to Review section")
else:
st.warning("Not enough data to run comparison.")
# Tab 3: Review and Approve Tasks
with tab3:
st.title("πŸ” Review and Approve Tasks")
if st.session_state.comparison_results is None:
st.warning("No comparison results available. Please upload and process tasks first.")
else:
# Display comparison results
if st.session_state.comparison_results:
# st.subheader("Comparison Results (DataFrame View)")
if "compared_df" not in st.session_state:
st.session_state.compared_df = json_to_dataframe(st.session_state.comparison_results)
# st.dataframe(st.session_state.compared_df)
# Inline editing of tasks
st.subheader("Edit Tasks")
final_edited_df = st.data_editor(st.session_state.compared_df)
st.session_state.final_edited_df = final_edited_df
final_extracted_tasks_json = dataframe_to_json(final_edited_df)
# Approval and finalization
if st.button("Approve and Finalize Tasks"):
with st.spinner("Finalizing tasks..."):
try:
db = get_database()
updated_collection = db["employee_project"]
document = {
"consolidated_final_task": final_extracted_tasks_json,
"created_at": datetime.now()
}
updated_collection.insert_one(document)
st.success("Finalized tasks saved successfully!")
except Exception as e:
st.error(f"Failed to save tasks: {e}")
if st.button("Push to Notion Dashboard"):
with st.spinner("Pushing to Notion..."):
latest_entry = fetch_latest_task_entry()
push_to_notion(latest_entry)
st.success("Notion Dashboard has been updated")