Spaces:

Proton-Datalabs-dev
/

Smart-Tasker

Sleeping

File size: 14,248 Bytes

af30a30

# app.py
import streamlit as st
from st_aggrid import AgGrid, GridOptionsBuilder
import pandas as pd
import os, json
from datetime import datetime
from data_ingestion.ingest_data import read_document
from data_ingestion.preprocess_data import preprocess_text
from llm_integration.task_extraction import extract_tasks_from_text
from llm_integration.task_comparison import compare_task_data
from database.mongo_integration import *
from notion_client import Client
from pymongo import MongoClient
from datetime import datetime
# from database.mongo_integration import save_tasks_to_mongo
os.makedirs("data/output", exist_ok=True)
import pandas as pd

mongo_client = MongoClient("mongodb+srv://shahid:Protondev%[email protected]/")  # Replace with your MongoDB URI
db = mongo_client["task_management"]
employee_project_collection = db["employee_project"]

notion = Client(auth="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")
parent_page_id = "148b2f92b9948099a854e8b21a0640a3" 
notion_database_id = "14db2f92-b994-81fb-9132-f4e4cb46ac13"

def fetch_latest_task_entry():
    """
    Fetch the most recent entry from MongoDB.
    Returns:
        dict: The latest task entry as a dictionary.
    """
    latest_entry = employee_project_collection.find_one(sort=[("created_at", DESCENDING)])
    if latest_entry:
        return latest_entry
    else:
        raise ValueError("No entries found in MongoDB.")
    

def push_to_notion(latest_entry):
    """
    Push tasks from the latest entry to the Notion database.
    Args:
        latest_entry (dict): The most recent task data from MongoDB.
    """
    # Extract the tasks from the JSON
    tasks = latest_entry.get("consolidated_final_task", {})
    created_at = latest_entry.get("created_at", None)

    # Step 1: Archive existing tasks in Notion database
    with st.spinner("Archiving existing tasks in Notion..."):
        try:
            # Query all pages in the Notion database (this will fetch the existing tasks)
            notion_database = notion.databases.query(database_id=notion_database_id)
            
            # Loop through the database pages and archive them
            for page in notion_database['results']:
                notion.pages.update(page_id=page['id'], archived=True)
            st.info("Old tasks archived in Notion successfully.")
        except Exception as e:
            st.error(f"Failed to archive tasks in Notion: {e}")

    # Step 2: Push new tasks to Notion
    with st.spinner("Pushing new tasks to Notion..."):
        try:
            # Iterate over projects and their tasks
            for project_name, task_list in tasks.items():
                for task_id, task_details in task_list.items():
                    # Map MongoDB fields to Notion properties
                    notion_task = {
                        "parent": {"database_id": notion_database_id},
                        "properties": {
                            "Project Name": {"title": [{"type": "text", "text": {"content": project_name}}]},
                            "Task ID": {"rich_text": [{"type": "text", "text": {"content": task_id}}]},
                            "Description": {"rich_text": [{"type": "text", "text": {"content": task_details.get("description", "")}}]},
                            "Priority": {"select": {"name": task_details.get("priority", "low")}},
                            "Assigned To": {"rich_text": [{"type": "text", "text": {"content": task_details.get("assigned_to", "")}}]},  # Updated to rich_text
                            "Current Status": {"select": {"name": task_details.get("current_status", "pending")}},
                            "Created At": {"date": {"start": created_at.isoformat() if created_at else datetime.utcnow().isoformat()}}
                        }
                    }

                    # Push each task to Notion
                    response = notion.pages.create(**notion_task)
                    print(f"Task pushed to Notion: {response['id']}")
            st.success("New tasks pushed to Notion successfully!")
        except Exception as e:
            st.error(f"Failed to push tasks to Notion: {e}")


def json_to_dataframe(json_data):
    """
    Converts a nested JSON structure into a user-friendly Pandas DataFrame for display.

    Args:
        json_data (dict): The JSON object containing projects and tasks.

    Returns:
        pd.DataFrame: A DataFrame representing the JSON data.
    """
    data = []
    for project_name, tasks in json_data.items():
        for task_id, task_details in tasks.items():
            data.append({
                        "Project": project_name,
                        "Task Name": task_id,
                        "Description": task_details["description"],
                        "Priority": task_details["priority"],
                        "Assigned To": task_details["assigned_to"],
                        "Status": task_details["current_status"]
                    })

    return pd.DataFrame(data)

def dataframe_to_json(df):
    """
    Converts a Pandas DataFrame back into a nested JSON structure.

    Args:
        df (pd.DataFrame): The DataFrame containing projects and tasks.

    Returns:
        dict: A nested dictionary representing the original JSON data.
    """
    json_data = {}
    
    # Iterate over each row of the DataFrame
    for _, row in df.iterrows():
        project_name = row['Project']
        task_id = row['Task Name']
        
        # Ensure the project exists in the JSON structure
        if project_name not in json_data:
            json_data[project_name] = {}
        
        # Add or update the task under the corresponding project
        json_data[project_name][task_id] = {
            "description": row['Description'],
            "priority": row['Priority'],
            "assigned_to": row['Assigned To'],
            "current_status": row['Status']
        }
    
    return json_data

# Function to fetch the most recent tasks from Notion
def fetch_recent_tasks_from_notion():
    """
    Fetch the most recent tasks from the Notion database and return it as a list of dicts.
    """
    try:
        # Query the database to get the most recent tasks
        query_response = notion.databases.query(
            **{
                "database_id": notion_database_id,
                "sorts": [{"property": "Created At", "direction": "descending"}],
                "page_size": 20  # Get the 5 most recent tasks, adjust the page size as needed
            }
        )
        
        # Extract tasks from the query response
        tasks = []
        for result in query_response.get("results", []):
            task_data = {
                "Project Name": result["properties"]["Project Name"]["title"][0]["text"]["content"],
                "Task ID": result["properties"]["Task ID"]["rich_text"][0]["text"]["content"],
                "Description": result["properties"]["Description"]["rich_text"][0]["text"]["content"],
                "Priority": result["properties"]["Priority"]["select"]["name"],
                "Assigned To": result["properties"]["Assigned To"]["rich_text"][0]["text"]["content"] if result["properties"]["Assigned To"]["rich_text"] else "",
                "Current Status": result["properties"]["Current Status"]["select"]["name"],
                "Created At": result["properties"]["Created At"]["date"]["start"]
            }
            tasks.append(task_data)

        return tasks
    
    except Exception as e:
        print(f"Error fetching tasks from Notion: {e}")
        return []

# Function to display recent tasks in DataFrame on the dashboard
def display_recent_tasks_on_dashboard():
    """
    Fetch and display the most recent tasks from Notion in a DataFrame on the Streamlit dashboard.
    """
    tasks = fetch_recent_tasks_from_notion()

    if tasks:
        # Convert tasks into a DataFrame
        df = pd.DataFrame(tasks)

        # Display DataFrame in Streamlit
        st.subheader("Most Recent Tasks from Notion")
        st.dataframe(df)
    else:
        st.write("No tasks found in the Notion database.")

# Initialize Streamlit app
st.set_page_config(
    page_title="Task Management",
    page_icon="📋",
    layout="wide"
)

# Define session state for managing intermediate data
if "processed_tasks" not in st.session_state:
    st.session_state.processed_tasks = None

if "edited_df" not in st.session_state:
    st.session_state.edited_df=None

if "comparison_results" not in st.session_state:
    st.session_state.comparison_results = None

tab1, tab2,tab3 = st.tabs(["Dashboard", "Upload and Process","Review Updated Tasks"]) # 

# Initialize session state for tab navigation
if "active_tab" not in st.session_state:
    st.session_state.active_tab = 0

# Function to switch tabs
def switch_tab(tab_index):
    st.session_state.active_tab = tab_index

# -------------------------------
# Tab 1: Dashboard
# -------------------------------

# if st.session_state.active_tab == 0:
with tab1:
    with st.container():
        st.title("📋 Task Management Dashboard")        
        # Display recent tasks from Notion in a DataFrame
        display_recent_tasks_on_dashboard()

        # # Quick actions
        # st.subheader("Quick Actions")
        # if st.button("Upload and Process New Tasks"):
        #     switch_tab(1)
        # if st.button("Review and Approve Tasks"):
        #     switch_tab(2)

# -------------------------------
# Tab 2: Upload and Process
# -------------------------------
# elif st.session_state.active_tab == 1:

# Tab 2: Upload and Process
with tab2:
    with st.container():
        st.title("📤 Upload and Process Tasks")

        uploaded_file = st.file_uploader("Upload a .docx file", type=["docx"])

        if uploaded_file is not None:
            with st.spinner("Processing uploaded file..."):
                # Step 1: Extract cleaned text
                raw_data = read_document(uploaded_file)
                cleaned_text = preprocess_text(raw_data)
                cleaned_text = "\n".join([f"{entry['author']}: {entry['text']}" for entry in cleaned_text])
                
                # Step 2: Extract tasks
                if 'df' not in st.session_state:
                    extracted_tasks = extract_tasks_from_text(cleaned_text)
                
                    st.subheader("Processed Tasks (DataFrame View)")
                    st.session_state.df = json_to_dataframe(extracted_tasks)

            # Display the DataFrame for editing
            edited_df = st.data_editor(st.session_state.df)
            st.session_state.edited_df = edited_df

            edited_extracted_tasks_json = dataframe_to_json(edited_df)
            st.session_state.processed_tasks = edited_extracted_tasks_json
            st.success("Tasks extracted successfully!")

            # Step 3: Push extracted tasks to MongoDB
            if st.button("Save tasks & Compare"):
                with st.spinner("Saving tasks to MongoDB..."):
                    try:
                        insert_weekly_task_data(edited_extracted_tasks_json)
                        st.success("Tasks successfully saved to the database!")
                    except Exception as e:
                        st.error(f"Failed to save tasks to the database: {e}")

                if 'df' in st.session_state:
                    del st.session_state['df']
                    st.info("Temporary data removed from session state.")    

                # Step 4: Run comparison
                with st.spinner("Running task comparison..."):
                    st.write("Running task comparison...")
                    recent_entries = fetch_recent_two_entries()
                    latest_entry = fetch_latest_task_entry()
                    if len(recent_entries) >= 2:
                        old_tasks = latest_entry.get("consolidated_final_task", {}) 
                        new_tasks = recent_entries[0]["tasks"]
                        comparison_results = compare_task_data(old_tasks, new_tasks)
                        st.session_state.comparison_results = comparison_results
                        st.success("Task comparison completed! Please move to Review section")
                    else:
                        st.warning("Not enough data to run comparison.")
                
# Tab 3: Review and Approve Tasks
with tab3:
    st.title("🔍 Review and Approve Tasks")

    if st.session_state.comparison_results is None:
        st.warning("No comparison results available. Please upload and process tasks first.")
    else:
        # Display comparison results
        if st.session_state.comparison_results:
            # st.subheader("Comparison Results (DataFrame View)")
            if "compared_df" not in st.session_state:
                st.session_state.compared_df = json_to_dataframe(st.session_state.comparison_results)
            
            # st.dataframe(st.session_state.compared_df)

            # Inline editing of tasks
            st.subheader("Edit Tasks")
            final_edited_df = st.data_editor(st.session_state.compared_df)
            st.session_state.final_edited_df = final_edited_df

            final_extracted_tasks_json = dataframe_to_json(final_edited_df)

        # Approval and finalization
        if st.button("Approve and Finalize Tasks"):
            with st.spinner("Finalizing tasks..."):
                try:
                    db = get_database()
                    updated_collection = db["employee_project"]
                    document = {
                        "consolidated_final_task": final_extracted_tasks_json,
                        "created_at": datetime.now()
                    }
                    updated_collection.insert_one(document)
                    st.success("Finalized tasks saved successfully!")
                except Exception as e:
                    st.error(f"Failed to save tasks: {e}")

        if st.button("Push to Notion Dashboard"):
            with st.spinner("Pushing to Notion..."):
                latest_entry = fetch_latest_task_entry()
                push_to_notion(latest_entry)
                st.success("Notion Dashboard has been updated")