File size: 7,399 Bytes
04db7e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import os
import re
import streamlit as st
import streamlit.components.v1 as components
from datetime import datetime
from werkzeug.utils import secure_filename
from src.gpp import GPP, GPPConfig
from src.qa import AnswerGenerator
class ContextAwareAnswerGenerator:
def __init__(self, chunks):
self.chunks = chunks
self.original_generator = AnswerGenerator(chunks)
def answer(self, question, conversation_context=None):
if not conversation_context or len(conversation_context) <= 1:
return self.original_generator.answer(question)
context_prompt = "Based on our conversation so far:\n"
max_history = min(len(conversation_context) - 1, 4)
for i in range(max(0, len(conversation_context) - max_history - 1), len(conversation_context) - 1, 2):
user_q = conversation_context[i]["content"]
assistant_a = conversation_context[i+1]["content"]
context_prompt += f"You were asked: '{user_q}'\n"
context_prompt += f"You answered: '{assistant_a}'\n"
context_prompt += f"\nNow answer this follow-up question: {question}"
return self.original_generator.answer(context_prompt)
# --- Page Config ---
st.set_page_config(
page_title="Document Q&A",
page_icon="π",
layout="wide"
)
# --- Session State ---
if 'chat_history' not in st.session_state:
st.session_state.chat_history = []
if 'parsed' not in st.session_state:
st.session_state.parsed = None
if 'selected_chunks' not in st.session_state:
st.session_state.selected_chunks = []
if 'conversation_context' not in st.session_state:
st.session_state.conversation_context = []
# --- Global CSS ---
st.markdown(r"""
<style>
body { background-color: #ffffff; font-family: 'Helvetica Neue', sans-serif; }
/* Chat */
.chat-container { display: flex; flex-direction: column; gap: 12px; margin: 20px 0; }
.chat-message { display: flex; }
.user-message { justify-content: flex-end; }
.assistant-message { justify-content: flex-start; }
.message-content { padding: 12px 16px; border-radius: 18px; max-width: 100%; overflow-wrap: break-word; }
.user-message .message-content { background-color: #4A90E2; color: white; border-bottom-right-radius: 4px; }
.assistant-message .message-content { background-color: #f1f1f1; color: #333; border-bottom-left-radius: 4px; }
/* Input */
.stTextInput>div>div>input { border-radius: 20px; border: 1px solid #ccc; padding: 8px 12px; }
.stButton>button { background-color: #4A90E2; color: white; border-radius: 20px; padding: 8px 16px; }
.stButton>button:hover { background-color: #357ABD; }
/* Evidence */
.evidence-content { overflow-wrap: break-word; margin-bottom: 1rem; }
</style>
""", unsafe_allow_html=True)
# --- Sidebar Upload ---
with st.sidebar:
st.title("Document Intelligence")
st.image("https://img.icons8.com/ios-filled/50/4A90E2/document.png", width=40)
st.caption(f"Last updated: {datetime.now():%Y-%m-%d}")
st.markdown("---")
st.subheader("Upload Document")
uploaded_file = st.file_uploader("Select a PDF", type=["pdf"], help="Upload a PDF to analyze")
if uploaded_file:
filename = secure_filename(uploaded_file.name)
if not re.match(r'^[\w\-. ]+$', filename):
st.error("Invalid file name. Please rename your file.")
else:
if st.button("Parse PDF", use_container_width=True):
output_dir = os.path.join("./parsed", filename)
os.makedirs(output_dir, exist_ok=True)
pdf_path = os.path.join(output_dir, filename)
with open(pdf_path, "wb") as f:
f.write(uploaded_file.getbuffer())
with st.spinner("Parsing document..."):
try:
gpp = GPP(GPPConfig())
parsed = gpp.run(pdf_path, output_dir)
st.session_state.parsed = parsed
st.session_state.chat_history.clear()
st.session_state.conversation_context.clear()
st.session_state.selected_chunks.clear()
st.success("Document parsed successfully!")
except Exception as e:
st.error(f"Parsing failed: {e}")
# removed content preview
# --- Main Area ---
main_col, evidence_col = st.columns([3, 1])
with main_col:
st.title("Document Q&A")
if not st.session_state.parsed:
st.info("π Upload and parse a document to start")
else:
parsed = st.session_state.parsed
layout_pdf = parsed.get("layout_pdf")
if layout_pdf and os.path.exists(layout_pdf):
st.subheader("Layout Preview")
components.iframe(layout_pdf, height=300, width=400)
# Chat display
st.markdown("<div class='chat-container'>", unsafe_allow_html=True)
if not st.session_state.chat_history:
st.markdown("<p style='color:#888;'>No messages yet. Start the conversation below.</p>", unsafe_allow_html=True)
else:
for msg in st.session_state.chat_history:
cls = 'user-message' if msg['role']=='user' else 'assistant-message'
st.markdown(f"<div class='chat-message {cls}'><div class='message-content'>{msg['content']}</div></div>", unsafe_allow_html=True)
st.markdown("</div>", unsafe_allow_html=True)
# Input
question = st.text_input("", key="question_input", placeholder="Type your question...", on_change=None)
col_btn1, col_btn2 = st.columns([4, 1])
with col_btn1:
submit = st.button("Send", use_container_width=True)
with col_btn2:
clear = st.button("Clear", use_container_width=True)
if clear:
st.session_state.chat_history.clear()
st.session_state.conversation_context.clear()
st.session_state.selected_chunks.clear()
st.experimental_rerun()
if submit and question:
st.session_state.chat_history.append({"role":"user","content":question})
gen = ContextAwareAnswerGenerator(parsed['chunks'])
answer, chunks = gen.answer(question, conversation_context=st.session_state.chat_history)
st.session_state.chat_history.append({"role":"assistant","content":answer})
st.session_state.selected_chunks = chunks
with evidence_col:
if st.session_state.parsed:
st.markdown("### Evidence")
if not st.session_state.selected_chunks:
st.info("Evidence appears here after asking a question.")
else:
for i, chunk in enumerate(st.session_state.selected_chunks,1):
with st.expander(f"#{i}", expanded=False):
st.markdown(f"**Type:** {chunk.get('type','')}")
st.markdown(f"<div class='evidence-content'>{chunk.get('narration','')}</div>", unsafe_allow_html=True)
if 'table_structure' in chunk:
st.write(chunk['table_structure'])
for blk in chunk.get('blocks',[]):
if blk.get('type')=='img_path':
img_path = os.path.join(parsed['images_dir'], blk['img_path'])
if os.path.exists(img_path):
st.image(img_path, use_column_width=True)
|