File size: 7,399 Bytes
04db7e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import os
import re
import streamlit as st
import streamlit.components.v1 as components
from datetime import datetime
from werkzeug.utils import secure_filename

from src.gpp import GPP, GPPConfig
from src.qa import AnswerGenerator

class ContextAwareAnswerGenerator:
    def __init__(self, chunks):
        self.chunks = chunks
        self.original_generator = AnswerGenerator(chunks)

    def answer(self, question, conversation_context=None):
        if not conversation_context or len(conversation_context) <= 1:
            return self.original_generator.answer(question)
        context_prompt = "Based on our conversation so far:\n"
        max_history = min(len(conversation_context) - 1, 4)
        for i in range(max(0, len(conversation_context) - max_history - 1), len(conversation_context) - 1, 2):
            user_q = conversation_context[i]["content"]
            assistant_a = conversation_context[i+1]["content"]
            context_prompt += f"You were asked: '{user_q}'\n"
            context_prompt += f"You answered: '{assistant_a}'\n"
        context_prompt += f"\nNow answer this follow-up question: {question}"
        return self.original_generator.answer(context_prompt)

# --- Page Config ---
st.set_page_config(
    page_title="Document Q&A",
    page_icon="πŸ“„",
    layout="wide"
)

# --- Session State ---
if 'chat_history' not in st.session_state:
    st.session_state.chat_history = []
if 'parsed' not in st.session_state:
    st.session_state.parsed = None
if 'selected_chunks' not in st.session_state:
    st.session_state.selected_chunks = []
if 'conversation_context' not in st.session_state:
    st.session_state.conversation_context = []

# --- Global CSS ---
st.markdown(r"""
<style>
body { background-color: #ffffff; font-family: 'Helvetica Neue', sans-serif; }
/* Chat */
.chat-container { display: flex; flex-direction: column; gap: 12px; margin: 20px 0; }
.chat-message { display: flex; }
.user-message { justify-content: flex-end; }
.assistant-message { justify-content: flex-start; }
.message-content { padding: 12px 16px; border-radius: 18px; max-width: 100%; overflow-wrap: break-word; }
.user-message .message-content { background-color: #4A90E2; color: white; border-bottom-right-radius: 4px; }
.assistant-message .message-content { background-color: #f1f1f1; color: #333; border-bottom-left-radius: 4px; }
/* Input */
.stTextInput>div>div>input { border-radius: 20px; border: 1px solid #ccc; padding: 8px 12px; }
.stButton>button { background-color: #4A90E2; color: white; border-radius: 20px; padding: 8px 16px; }
.stButton>button:hover { background-color: #357ABD; }
/* Evidence */
.evidence-content { overflow-wrap: break-word; margin-bottom: 1rem; }
</style>
""", unsafe_allow_html=True)

# --- Sidebar Upload ---
with st.sidebar:
    st.title("Document Intelligence")
    st.image("https://img.icons8.com/ios-filled/50/4A90E2/document.png", width=40)
    st.caption(f"Last updated: {datetime.now():%Y-%m-%d}")
    st.markdown("---")
    st.subheader("Upload Document")
    uploaded_file = st.file_uploader("Select a PDF", type=["pdf"], help="Upload a PDF to analyze")
    if uploaded_file:
        filename = secure_filename(uploaded_file.name)
        if not re.match(r'^[\w\-. ]+$', filename):
            st.error("Invalid file name. Please rename your file.")
        else:
            if st.button("Parse PDF", use_container_width=True):
                output_dir = os.path.join("./parsed", filename)
                os.makedirs(output_dir, exist_ok=True)
                pdf_path = os.path.join(output_dir, filename)
                with open(pdf_path, "wb") as f:
                    f.write(uploaded_file.getbuffer())
                with st.spinner("Parsing document..."):
                    try:
                        gpp = GPP(GPPConfig())
                        parsed = gpp.run(pdf_path, output_dir)
                        st.session_state.parsed = parsed
                        st.session_state.chat_history.clear()
                        st.session_state.conversation_context.clear()
                        st.session_state.selected_chunks.clear()
                        st.success("Document parsed successfully!")
                    except Exception as e:
                        st.error(f"Parsing failed: {e}")
    # removed content preview

# --- Main Area ---
main_col, evidence_col = st.columns([3, 1])
with main_col:
    st.title("Document Q&A")
    if not st.session_state.parsed:
        st.info("πŸ‘ˆ Upload and parse a document to start")
    else:
        parsed = st.session_state.parsed
        layout_pdf = parsed.get("layout_pdf")
        if layout_pdf and os.path.exists(layout_pdf):
            st.subheader("Layout Preview")
            components.iframe(layout_pdf, height=300, width=400)
        # Chat display
        st.markdown("<div class='chat-container'>", unsafe_allow_html=True)
        if not st.session_state.chat_history:
            st.markdown("<p style='color:#888;'>No messages yet. Start the conversation below.</p>", unsafe_allow_html=True)
        else:
            for msg in st.session_state.chat_history:
                cls = 'user-message' if msg['role']=='user' else 'assistant-message'
                st.markdown(f"<div class='chat-message {cls}'><div class='message-content'>{msg['content']}</div></div>", unsafe_allow_html=True)
        st.markdown("</div>", unsafe_allow_html=True)
        # Input
        question = st.text_input("", key="question_input", placeholder="Type your question...", on_change=None)
        col_btn1, col_btn2 = st.columns([4, 1])
        with col_btn1:
            submit = st.button("Send", use_container_width=True)
        with col_btn2:
            clear = st.button("Clear", use_container_width=True)
        if clear:
            st.session_state.chat_history.clear()
            st.session_state.conversation_context.clear()
            st.session_state.selected_chunks.clear()
            st.experimental_rerun()
        if submit and question:
            st.session_state.chat_history.append({"role":"user","content":question})
            gen = ContextAwareAnswerGenerator(parsed['chunks'])
            answer, chunks = gen.answer(question, conversation_context=st.session_state.chat_history)
            st.session_state.chat_history.append({"role":"assistant","content":answer})
            st.session_state.selected_chunks = chunks

with evidence_col:
    if st.session_state.parsed:
        st.markdown("### Evidence")
        if not st.session_state.selected_chunks:
            st.info("Evidence appears here after asking a question.")
        else:
            for i, chunk in enumerate(st.session_state.selected_chunks,1):
                with st.expander(f"#{i}", expanded=False):
                    st.markdown(f"**Type:** {chunk.get('type','')}")
                    st.markdown(f"<div class='evidence-content'>{chunk.get('narration','')}</div>", unsafe_allow_html=True)
                    if 'table_structure' in chunk:
                        st.write(chunk['table_structure'])
                    for blk in chunk.get('blocks',[]):
                        if blk.get('type')=='img_path':
                            img_path = os.path.join(parsed['images_dir'], blk['img_path'])
                            if os.path.exists(img_path):
                                st.image(img_path, use_column_width=True)