File size: 14,248 Bytes
af30a30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
# app.py
import streamlit as st
from st_aggrid import AgGrid, GridOptionsBuilder
import pandas as pd
import os, json
from datetime import datetime
from data_ingestion.ingest_data import read_document
from data_ingestion.preprocess_data import preprocess_text
from llm_integration.task_extraction import extract_tasks_from_text
from llm_integration.task_comparison import compare_task_data
from database.mongo_integration import *
from notion_client import Client
from pymongo import MongoClient
from datetime import datetime
# from database.mongo_integration import save_tasks_to_mongo
os.makedirs("data/output", exist_ok=True)
import pandas as pd

mongo_client = MongoClient("mongodb+srv://shahid:Protondev%[email protected]/")  # Replace with your MongoDB URI
db = mongo_client["task_management"]
employee_project_collection = db["employee_project"]

notion = Client(auth="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")
parent_page_id = "148b2f92b9948099a854e8b21a0640a3" 
notion_database_id = "14db2f92-b994-81fb-9132-f4e4cb46ac13"

def fetch_latest_task_entry():
    """
    Fetch the most recent entry from MongoDB.
    Returns:
        dict: The latest task entry as a dictionary.
    """
    latest_entry = employee_project_collection.find_one(sort=[("created_at", DESCENDING)])
    if latest_entry:
        return latest_entry
    else:
        raise ValueError("No entries found in MongoDB.")
    

def push_to_notion(latest_entry):
    """
    Push tasks from the latest entry to the Notion database.
    Args:
        latest_entry (dict): The most recent task data from MongoDB.
    """
    # Extract the tasks from the JSON
    tasks = latest_entry.get("consolidated_final_task", {})
    created_at = latest_entry.get("created_at", None)

    # Step 1: Archive existing tasks in Notion database
    with st.spinner("Archiving existing tasks in Notion..."):
        try:
            # Query all pages in the Notion database (this will fetch the existing tasks)
            notion_database = notion.databases.query(database_id=notion_database_id)
            
            # Loop through the database pages and archive them
            for page in notion_database['results']:
                notion.pages.update(page_id=page['id'], archived=True)
            st.info("Old tasks archived in Notion successfully.")
        except Exception as e:
            st.error(f"Failed to archive tasks in Notion: {e}")

    # Step 2: Push new tasks to Notion
    with st.spinner("Pushing new tasks to Notion..."):
        try:
            # Iterate over projects and their tasks
            for project_name, task_list in tasks.items():
                for task_id, task_details in task_list.items():
                    # Map MongoDB fields to Notion properties
                    notion_task = {
                        "parent": {"database_id": notion_database_id},
                        "properties": {
                            "Project Name": {"title": [{"type": "text", "text": {"content": project_name}}]},
                            "Task ID": {"rich_text": [{"type": "text", "text": {"content": task_id}}]},
                            "Description": {"rich_text": [{"type": "text", "text": {"content": task_details.get("description", "")}}]},
                            "Priority": {"select": {"name": task_details.get("priority", "low")}},
                            "Assigned To": {"rich_text": [{"type": "text", "text": {"content": task_details.get("assigned_to", "")}}]},  # Updated to rich_text
                            "Current Status": {"select": {"name": task_details.get("current_status", "pending")}},
                            "Created At": {"date": {"start": created_at.isoformat() if created_at else datetime.utcnow().isoformat()}}
                        }
                    }

                    # Push each task to Notion
                    response = notion.pages.create(**notion_task)
                    print(f"Task pushed to Notion: {response['id']}")
            st.success("New tasks pushed to Notion successfully!")
        except Exception as e:
            st.error(f"Failed to push tasks to Notion: {e}")


def json_to_dataframe(json_data):
    """
    Converts a nested JSON structure into a user-friendly Pandas DataFrame for display.

    Args:
        json_data (dict): The JSON object containing projects and tasks.

    Returns:
        pd.DataFrame: A DataFrame representing the JSON data.
    """
    data = []
    for project_name, tasks in json_data.items():
        for task_id, task_details in tasks.items():
            data.append({
                        "Project": project_name,
                        "Task Name": task_id,
                        "Description": task_details["description"],
                        "Priority": task_details["priority"],
                        "Assigned To": task_details["assigned_to"],
                        "Status": task_details["current_status"]
                    })

    return pd.DataFrame(data)

def dataframe_to_json(df):
    """
    Converts a Pandas DataFrame back into a nested JSON structure.

    Args:
        df (pd.DataFrame): The DataFrame containing projects and tasks.

    Returns:
        dict: A nested dictionary representing the original JSON data.
    """
    json_data = {}
    
    # Iterate over each row of the DataFrame
    for _, row in df.iterrows():
        project_name = row['Project']
        task_id = row['Task Name']
        
        # Ensure the project exists in the JSON structure
        if project_name not in json_data:
            json_data[project_name] = {}
        
        # Add or update the task under the corresponding project
        json_data[project_name][task_id] = {
            "description": row['Description'],
            "priority": row['Priority'],
            "assigned_to": row['Assigned To'],
            "current_status": row['Status']
        }
    
    return json_data

# Function to fetch the most recent tasks from Notion
def fetch_recent_tasks_from_notion():
    """
    Fetch the most recent tasks from the Notion database and return it as a list of dicts.
    """
    try:
        # Query the database to get the most recent tasks
        query_response = notion.databases.query(
            **{
                "database_id": notion_database_id,
                "sorts": [{"property": "Created At", "direction": "descending"}],
                "page_size": 20  # Get the 5 most recent tasks, adjust the page size as needed
            }
        )
        
        # Extract tasks from the query response
        tasks = []
        for result in query_response.get("results", []):
            task_data = {
                "Project Name": result["properties"]["Project Name"]["title"][0]["text"]["content"],
                "Task ID": result["properties"]["Task ID"]["rich_text"][0]["text"]["content"],
                "Description": result["properties"]["Description"]["rich_text"][0]["text"]["content"],
                "Priority": result["properties"]["Priority"]["select"]["name"],
                "Assigned To": result["properties"]["Assigned To"]["rich_text"][0]["text"]["content"] if result["properties"]["Assigned To"]["rich_text"] else "",
                "Current Status": result["properties"]["Current Status"]["select"]["name"],
                "Created At": result["properties"]["Created At"]["date"]["start"]
            }
            tasks.append(task_data)

        return tasks
    
    except Exception as e:
        print(f"Error fetching tasks from Notion: {e}")
        return []

# Function to display recent tasks in DataFrame on the dashboard
def display_recent_tasks_on_dashboard():
    """
    Fetch and display the most recent tasks from Notion in a DataFrame on the Streamlit dashboard.
    """
    tasks = fetch_recent_tasks_from_notion()

    if tasks:
        # Convert tasks into a DataFrame
        df = pd.DataFrame(tasks)

        # Display DataFrame in Streamlit
        st.subheader("Most Recent Tasks from Notion")
        st.dataframe(df)
    else:
        st.write("No tasks found in the Notion database.")

# Initialize Streamlit app
st.set_page_config(
    page_title="Task Management",
    page_icon="πŸ“‹",
    layout="wide"
)

# Define session state for managing intermediate data
if "processed_tasks" not in st.session_state:
    st.session_state.processed_tasks = None

if "edited_df" not in st.session_state:
    st.session_state.edited_df=None

if "comparison_results" not in st.session_state:
    st.session_state.comparison_results = None

tab1, tab2,tab3 = st.tabs(["Dashboard", "Upload and Process","Review Updated Tasks"]) # 

# Initialize session state for tab navigation
if "active_tab" not in st.session_state:
    st.session_state.active_tab = 0

# Function to switch tabs
def switch_tab(tab_index):
    st.session_state.active_tab = tab_index

# -------------------------------
# Tab 1: Dashboard
# -------------------------------

# if st.session_state.active_tab == 0:
with tab1:
    with st.container():
        st.title("πŸ“‹ Task Management Dashboard")        
        # Display recent tasks from Notion in a DataFrame
        display_recent_tasks_on_dashboard()

        # # Quick actions
        # st.subheader("Quick Actions")
        # if st.button("Upload and Process New Tasks"):
        #     switch_tab(1)
        # if st.button("Review and Approve Tasks"):
        #     switch_tab(2)

# -------------------------------
# Tab 2: Upload and Process
# -------------------------------
# elif st.session_state.active_tab == 1:

# Tab 2: Upload and Process
with tab2:
    with st.container():
        st.title("πŸ“€ Upload and Process Tasks")

        uploaded_file = st.file_uploader("Upload a .docx file", type=["docx"])

        if uploaded_file is not None:
            with st.spinner("Processing uploaded file..."):
                # Step 1: Extract cleaned text
                raw_data = read_document(uploaded_file)
                cleaned_text = preprocess_text(raw_data)
                cleaned_text = "\n".join([f"{entry['author']}: {entry['text']}" for entry in cleaned_text])
                
                # Step 2: Extract tasks
                if 'df' not in st.session_state:
                    extracted_tasks = extract_tasks_from_text(cleaned_text)
                
                    st.subheader("Processed Tasks (DataFrame View)")
                    st.session_state.df = json_to_dataframe(extracted_tasks)

            # Display the DataFrame for editing
            edited_df = st.data_editor(st.session_state.df)
            st.session_state.edited_df = edited_df

            edited_extracted_tasks_json = dataframe_to_json(edited_df)
            st.session_state.processed_tasks = edited_extracted_tasks_json
            st.success("Tasks extracted successfully!")

            # Step 3: Push extracted tasks to MongoDB
            if st.button("Save tasks & Compare"):
                with st.spinner("Saving tasks to MongoDB..."):
                    try:
                        insert_weekly_task_data(edited_extracted_tasks_json)
                        st.success("Tasks successfully saved to the database!")
                    except Exception as e:
                        st.error(f"Failed to save tasks to the database: {e}")

                if 'df' in st.session_state:
                    del st.session_state['df']
                    st.info("Temporary data removed from session state.")    

                # Step 4: Run comparison
                with st.spinner("Running task comparison..."):
                    st.write("Running task comparison...")
                    recent_entries = fetch_recent_two_entries()
                    latest_entry = fetch_latest_task_entry()
                    if len(recent_entries) >= 2:
                        old_tasks = latest_entry.get("consolidated_final_task", {}) 
                        new_tasks = recent_entries[0]["tasks"]
                        comparison_results = compare_task_data(old_tasks, new_tasks)
                        st.session_state.comparison_results = comparison_results
                        st.success("Task comparison completed! Please move to Review section")
                    else:
                        st.warning("Not enough data to run comparison.")
                
# Tab 3: Review and Approve Tasks
with tab3:
    st.title("πŸ” Review and Approve Tasks")

    if st.session_state.comparison_results is None:
        st.warning("No comparison results available. Please upload and process tasks first.")
    else:
        # Display comparison results
        if st.session_state.comparison_results:
            # st.subheader("Comparison Results (DataFrame View)")
            if "compared_df" not in st.session_state:
                st.session_state.compared_df = json_to_dataframe(st.session_state.comparison_results)
            
            # st.dataframe(st.session_state.compared_df)

            # Inline editing of tasks
            st.subheader("Edit Tasks")
            final_edited_df = st.data_editor(st.session_state.compared_df)
            st.session_state.final_edited_df = final_edited_df

            final_extracted_tasks_json = dataframe_to_json(final_edited_df)

        # Approval and finalization
        if st.button("Approve and Finalize Tasks"):
            with st.spinner("Finalizing tasks..."):
                try:
                    db = get_database()
                    updated_collection = db["employee_project"]
                    document = {
                        "consolidated_final_task": final_extracted_tasks_json,
                        "created_at": datetime.now()
                    }
                    updated_collection.insert_one(document)
                    st.success("Finalized tasks saved successfully!")
                except Exception as e:
                    st.error(f"Failed to save tasks: {e}")

        if st.button("Push to Notion Dashboard"):
            with st.spinner("Pushing to Notion..."):
                latest_entry = fetch_latest_task_entry()
                push_to_notion(latest_entry)
                st.success("Notion Dashboard has been updated")