import os import re import streamlit as st import streamlit.components.v1 as components from datetime import datetime from werkzeug.utils import secure_filename from src.gpp import GPP, GPPConfig from src.qa import AnswerGenerator class ContextAwareAnswerGenerator: def __init__(self, chunks): self.chunks = chunks self.original_generator = AnswerGenerator(chunks) def answer(self, question, conversation_context=None): if not conversation_context or len(conversation_context) <= 1: return self.original_generator.answer(question) context_prompt = "Based on our conversation so far:\n" max_history = min(len(conversation_context) - 1, 4) for i in range(max(0, len(conversation_context) - max_history - 1), len(conversation_context) - 1, 2): user_q = conversation_context[i]["content"] assistant_a = conversation_context[i+1]["content"] context_prompt += f"You were asked: '{user_q}'\n" context_prompt += f"You answered: '{assistant_a}'\n" context_prompt += f"\nNow answer this follow-up question: {question}" return self.original_generator.answer(context_prompt) # --- Page Config --- st.set_page_config( page_title="Document Q&A", page_icon="📄", layout="wide" ) # --- Session State --- if 'chat_history' not in st.session_state: st.session_state.chat_history = [] if 'parsed' not in st.session_state: st.session_state.parsed = None if 'selected_chunks' not in st.session_state: st.session_state.selected_chunks = [] if 'conversation_context' not in st.session_state: st.session_state.conversation_context = [] # --- Global CSS --- st.markdown(r""" """, unsafe_allow_html=True) # --- Sidebar Upload --- with st.sidebar: st.title("Document Intelligence") st.image("https://img.icons8.com/ios-filled/50/4A90E2/document.png", width=40) st.caption(f"Last updated: {datetime.now():%Y-%m-%d}") st.markdown("---") st.subheader("Upload Document") uploaded_file = st.file_uploader("Select a PDF", type=["pdf"], help="Upload a PDF to analyze") if uploaded_file: filename = secure_filename(uploaded_file.name) if not re.match(r'^[\w\-. ]+$', filename): st.error("Invalid file name. Please rename your file.") else: if st.button("Parse PDF", use_container_width=True): output_dir = os.path.join("./parsed", filename) os.makedirs(output_dir, exist_ok=True) pdf_path = os.path.join(output_dir, filename) with open(pdf_path, "wb") as f: f.write(uploaded_file.getbuffer()) with st.spinner("Parsing document..."): try: gpp = GPP(GPPConfig()) parsed = gpp.run(pdf_path, output_dir) st.session_state.parsed = parsed st.session_state.chat_history.clear() st.session_state.conversation_context.clear() st.session_state.selected_chunks.clear() st.success("Document parsed successfully!") except Exception as e: st.error(f"Parsing failed: {e}") # removed content preview # --- Main Area --- main_col, evidence_col = st.columns([3, 1]) with main_col: st.title("Document Q&A") if not st.session_state.parsed: st.info("👈 Upload and parse a document to start") else: parsed = st.session_state.parsed layout_pdf = parsed.get("layout_pdf") if layout_pdf and os.path.exists(layout_pdf): st.subheader("Layout Preview") components.iframe(layout_pdf, height=300, width=400) # Chat display st.markdown("
No messages yet. Start the conversation below.
", unsafe_allow_html=True) else: for msg in st.session_state.chat_history: cls = 'user-message' if msg['role']=='user' else 'assistant-message' st.markdown(f" ", unsafe_allow_html=True) st.markdown("