mgbam's picture
Update app.py
9794246 verified
raw
history blame
5.48 kB
import streamlit as st
import transformers
import altair as alt
import pandas as pd
from difflib import SequenceMatcher
# ------------------------------
# Simple Authentication Setup
# ------------------------------
# Define a simple password for demonstration purposes.
PASSWORD = "password123"
# Initialize authentication state
if 'authenticated' not in st.session_state:
st.session_state['authenticated'] = False
# Simple password input in the sidebar for authentication
if not st.session_state['authenticated']:
st.sidebar.title("Login")
password_input = st.sidebar.text_input("Enter password:", type="password")
if st.sidebar.button("Login"):
if password_input == PASSWORD:
st.session_state['authenticated'] = True
st.sidebar.success("Authenticated!")
else:
st.sidebar.error("Incorrect password. Please try again.")
st.stop() # Stop app execution until authenticated
st.sidebar.write("Welcome!")
# ------------------------------
# Load Models
# ------------------------------
@st.cache_resource
def load_qwen():
return transformers.pipeline(
"text2text-generation",
model="Qwen/Qwen2.5-14B",
device_map="auto"
)
@st.cache_resource
def load_phi():
return transformers.pipeline(
"text-generation",
model="microsoft/phi-4",
model_kwargs={"torch_dtype": "auto"},
device_map="auto"
)
qwen_pipeline = load_qwen()
phi_pipeline = load_phi()
# ------------------------------
# Utility Functions
# ------------------------------
def summarize_document(document_text):
prompt = f"Summarize the following document and highlight key insights:\n\n{document_text}"
summary = qwen_pipeline(prompt, max_new_tokens=1024)[0]['generated_text']
return summary
def answer_question(summary, question):
prompt = f"Based on the following summary:\n\n{summary}\n\nAnswer the question: {question}"
answer = phi_pipeline(prompt, max_new_tokens=256)[0]['generated_text']
return answer
def find_similar_chunks(original, output):
matcher = SequenceMatcher(None, original, output)
segments = []
left = 0
for _, j, n in matcher.get_matching_blocks():
if left < j:
segments.append({'text': output[left:j], 'match': False})
segments.append({'text': output[j:j+n], 'match': True})
left = j+n
return segments
# ------------------------------
# Streamlit App Layout
# ------------------------------
st.title("SmartDoc Analyzer")
st.markdown("Analyze Financial & Health Documents with AI")
# Tabs for different functionalities
tabs = st.tabs(["Document Summarization", "Interactive Q&A", "Visualization & Data Extraction"])
# -------- Document Summarization Tab --------
with tabs[0]:
st.header("Document Summarization")
document_text = st.text_area("Paste Document Text:", height=300)
if st.button("Summarize Document"):
if document_text:
summary = summarize_document(document_text)
st.subheader("Summary")
st.write(summary)
# Save summary in session for use in Q&A tab
st.session_state['last_summary'] = summary
else:
st.warning("Please paste document text to summarize.")
# -------- Interactive Q&A Tab --------
with tabs[1]:
st.header("Interactive Q&A")
default_summary = st.session_state.get('last_summary', '')
summary_context = st.text_area("Summary Context:", value=default_summary, height=150)
question = st.text_input("Enter your question about the document:")
if st.button("Get Answer"):
if summary_context and question:
answer = answer_question(summary_context, question)
st.subheader("Answer")
st.write(answer)
else:
st.warning("Please provide both a summary context and a question.")
# -------- Visualization & Data Extraction Tab --------
with tabs[2]:
st.header("Visualization & Data Extraction")
st.subheader("Visualization Placeholder")
st.markdown("An interactive chart can be displayed here using Altair or Plotly.")
# Example static Altair chart (replace with dynamic data extraction logic)
data = pd.DataFrame({
'Year': [2019, 2020, 2021, 2022],
'Revenue': [150, 200, 250, 300]
})
chart = alt.Chart(data).mark_line(point=True).encode(
x='Year:O',
y='Revenue:Q',
tooltip=['Year', 'Revenue']
).interactive()
st.altair_chart(chart, use_container_width=True)
st.subheader("Data Extraction Placeholder")
st.markdown("Implement NLP techniques or model prompts to extract structured data here.")
uploaded_file = st.file_uploader("Upload a document file for extraction", type=["pdf", "docx", "txt"])
if uploaded_file is not None:
st.info("File uploaded successfully. Data extraction logic would process this file.")
# Add logic to extract tables, key figures, etc. from the uploaded file.
# ------------------------------
# Safety & Compliance Layer (Placeholder)
# ------------------------------
st.sidebar.markdown("### Safety & Compliance")
st.sidebar.info(
"This tool provides AI-driven insights. "
"Please note that summaries and answers are for informational purposes only and should not be "
"considered professional financial or medical advice."
)
# ------------------------------
# End of Application
# ------------------------------