Spaces:
Sleeping
Sleeping
import streamlit as st | |
import transformers | |
import altair as alt | |
import pandas as pd | |
from difflib import SequenceMatcher | |
# ------------------------------ | |
# Simple Authentication Setup | |
# ------------------------------ | |
# Define a simple password for demonstration purposes. | |
PASSWORD = "password123" | |
# Initialize authentication state | |
if 'authenticated' not in st.session_state: | |
st.session_state['authenticated'] = False | |
# Simple password input in the sidebar for authentication | |
if not st.session_state['authenticated']: | |
st.sidebar.title("Login") | |
password_input = st.sidebar.text_input("Enter password:", type="password") | |
if st.sidebar.button("Login"): | |
if password_input == PASSWORD: | |
st.session_state['authenticated'] = True | |
st.sidebar.success("Authenticated!") | |
else: | |
st.sidebar.error("Incorrect password. Please try again.") | |
st.stop() # Stop app execution until authenticated | |
st.sidebar.write("Welcome!") | |
# ------------------------------ | |
# Load Models | |
# ------------------------------ | |
def load_qwen(): | |
return transformers.pipeline( | |
"text2text-generation", | |
model="Qwen/Qwen2.5-14B", | |
device_map="auto" | |
) | |
def load_phi(): | |
return transformers.pipeline( | |
"text-generation", | |
model="microsoft/phi-4", | |
model_kwargs={"torch_dtype": "auto"}, | |
device_map="auto" | |
) | |
qwen_pipeline = load_qwen() | |
phi_pipeline = load_phi() | |
# ------------------------------ | |
# Utility Functions | |
# ------------------------------ | |
def summarize_document(document_text): | |
prompt = f"Summarize the following document and highlight key insights:\n\n{document_text}" | |
summary = qwen_pipeline(prompt, max_new_tokens=1024)[0]['generated_text'] | |
return summary | |
def answer_question(summary, question): | |
prompt = f"Based on the following summary:\n\n{summary}\n\nAnswer the question: {question}" | |
answer = phi_pipeline(prompt, max_new_tokens=256)[0]['generated_text'] | |
return answer | |
def find_similar_chunks(original, output): | |
matcher = SequenceMatcher(None, original, output) | |
segments = [] | |
left = 0 | |
for _, j, n in matcher.get_matching_blocks(): | |
if left < j: | |
segments.append({'text': output[left:j], 'match': False}) | |
segments.append({'text': output[j:j+n], 'match': True}) | |
left = j+n | |
return segments | |
# ------------------------------ | |
# Streamlit App Layout | |
# ------------------------------ | |
st.title("SmartDoc Analyzer") | |
st.markdown("Analyze Financial & Health Documents with AI") | |
# Tabs for different functionalities | |
tabs = st.tabs(["Document Summarization", "Interactive Q&A", "Visualization & Data Extraction"]) | |
# -------- Document Summarization Tab -------- | |
with tabs[0]: | |
st.header("Document Summarization") | |
document_text = st.text_area("Paste Document Text:", height=300) | |
if st.button("Summarize Document"): | |
if document_text: | |
summary = summarize_document(document_text) | |
st.subheader("Summary") | |
st.write(summary) | |
# Save summary in session for use in Q&A tab | |
st.session_state['last_summary'] = summary | |
else: | |
st.warning("Please paste document text to summarize.") | |
# -------- Interactive Q&A Tab -------- | |
with tabs[1]: | |
st.header("Interactive Q&A") | |
default_summary = st.session_state.get('last_summary', '') | |
summary_context = st.text_area("Summary Context:", value=default_summary, height=150) | |
question = st.text_input("Enter your question about the document:") | |
if st.button("Get Answer"): | |
if summary_context and question: | |
answer = answer_question(summary_context, question) | |
st.subheader("Answer") | |
st.write(answer) | |
else: | |
st.warning("Please provide both a summary context and a question.") | |
# -------- Visualization & Data Extraction Tab -------- | |
with tabs[2]: | |
st.header("Visualization & Data Extraction") | |
st.subheader("Visualization Placeholder") | |
st.markdown("An interactive chart can be displayed here using Altair or Plotly.") | |
# Example static Altair chart (replace with dynamic data extraction logic) | |
data = pd.DataFrame({ | |
'Year': [2019, 2020, 2021, 2022], | |
'Revenue': [150, 200, 250, 300] | |
}) | |
chart = alt.Chart(data).mark_line(point=True).encode( | |
x='Year:O', | |
y='Revenue:Q', | |
tooltip=['Year', 'Revenue'] | |
).interactive() | |
st.altair_chart(chart, use_container_width=True) | |
st.subheader("Data Extraction Placeholder") | |
st.markdown("Implement NLP techniques or model prompts to extract structured data here.") | |
uploaded_file = st.file_uploader("Upload a document file for extraction", type=["pdf", "docx", "txt"]) | |
if uploaded_file is not None: | |
st.info("File uploaded successfully. Data extraction logic would process this file.") | |
# Add logic to extract tables, key figures, etc. from the uploaded file. | |
# ------------------------------ | |
# Safety & Compliance Layer (Placeholder) | |
# ------------------------------ | |
st.sidebar.markdown("### Safety & Compliance") | |
st.sidebar.info( | |
"This tool provides AI-driven insights. " | |
"Please note that summaries and answers are for informational purposes only and should not be " | |
"considered professional financial or medical advice." | |
) | |
# ------------------------------ | |
# End of Application | |
# ------------------------------ | |