Spaces:

mgbam
/

SmartDocAnalyzer

Sleeping

App Files Files Community

mgbam commited on Jan 9

Commit

05989bf

verified ·

1 Parent(s): 7017824

Create app.py

Browse files

Files changed (1) hide show

app.py +172 -0

app.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import streamlit as st
+import transformers
+import altair as alt
+import pandas as pd
+import streamlit_authenticator as stauth
+from difflib import SequenceMatcher
+# ------------------------------
+# User Authentication Setup
+# ------------------------------
+# Sample configuration for authentication
+config = {
+    'credentials': {
+        'usernames': {
+            'demo_user': {
+                'name': 'Demo User',
+                'password': stauth.Hasher(['password123']).generate()[0]  # hashed password
+            }
+        }
+    },
+    'cookie': {
+        'expiry_days': 30,
+        'key': 'some_signature_key',
+        'name': 'some_cookie_name'
+    },
+    'preauthorized': {
+        'emails': []
+    }
+}
+authenticator = stauth.Authenticate(
+    config['credentials'],
+    config['cookie']['name'],
+    config['cookie']['key'],
+    config['cookie']['expiry_days']
+)
+name, authentication_status, username = authenticator.login('Login', 'main')
+if not authentication_status:
+    st.error('Authentication failed. Please refresh and try again.')
+    st.stop()
+st.sidebar.write(f"Welcome *{name}*")
+authenticator.logout('Logout', 'sidebar')
+# ------------------------------
+# Load Models
+# ------------------------------
+@st.cache_resource
+def load_qwen():
+    return transformers.pipeline(
+        "text2text-generation",
+        model="Qwen/Qwen2.5-14B",
+        device_map="auto"
+    )
+@st.cache_resource
+def load_phi():
+    return transformers.pipeline(
+        "text-generation",
+        model="microsoft/phi-4",
+        model_kwargs={"torch_dtype": "auto"},
+        device_map="auto"
+    )
+qwen_pipeline = load_qwen()
+phi_pipeline = load_phi()
+# ------------------------------
+# Utility Functions
+# ------------------------------
+def summarize_document(document_text):
+    prompt = f"Summarize the following document and highlight key insights:\n\n{document_text}"
+    summary = qwen_pipeline(prompt, max_new_tokens=1024)[0]['generated_text']
+    return summary
+def answer_question(summary, question):
+    prompt = f"Based on the following summary:\n\n{summary}\n\nAnswer the question: {question}"
+    answer = phi_pipeline(prompt, max_new_tokens=256)[0]['generated_text']
+    return answer
+def find_similar_chunks(original, output):
+    matcher = SequenceMatcher(None, original, output)
+    segments = []
+    left = 0
+    for _, j, n in matcher.get_matching_blocks():
+        if left < j:
+            segments.append({'text': output[left:j], 'match': False})
+        segments.append({'text': output[j:j+n], 'match': True})
+        left = j+n
+    return segments
+# ------------------------------
+# Streamlit App Layout
+# ------------------------------
+st.title("SmartDoc Analyzer")
+st.markdown("Analyze Financial & Health Documents with AI")
+# Tabs for different functionalities
+tabs = st.tabs(["Document Summarization", "Interactive Q&A", "Visualization & Data Extraction"])
+# -------- Document Summarization Tab --------
+with tabs[0]:
+    st.header("Document Summarization")
+    document_text = st.text_area("Paste Document Text:", height=300)
+    if st.button("Summarize Document"):
+        if document_text:
+            summary = summarize_document(document_text)
+            st.subheader("Summary")
+            st.write(summary)
+            # Save summary in session for use in Q&A tab
+            st.session_state['last_summary'] = summary
+        else:
+            st.warning("Please paste document text to summarize.")
+# -------- Interactive Q&A Tab --------
+with tabs[1]:
+    st.header("Interactive Q&A")
+    default_summary = st.session_state.get('last_summary', '')
+    summary_context = st.text_area("Summary Context:", value=default_summary, height=150)
+    question = st.text_input("Enter your question about the document:")
+    if st.button("Get Answer"):
+        if summary_context and question:
+            answer = answer_question(summary_context, question)
+            st.subheader("Answer")
+            st.write(answer)
+            # For session saving, one could store Q&A pairs in st.session_state or database.
+        else:
+            st.warning("Please provide both a summary context and a question.")
+# -------- Visualization & Data Extraction Tab --------
+with tabs[2]:
+    st.header("Visualization & Data Extraction")
+    st.subheader("Visualization Placeholder")
+    st.markdown("An interactive chart can be displayed here using Altair or Plotly.")
+    # Example static Altair chart (replace with dynamic data extraction logic)
+    data = pd.DataFrame({
+        'Year': [2019, 2020, 2021, 2022],
+        'Revenue': [150, 200, 250, 300]
+    })
+    chart = alt.Chart(data).mark_line(point=True).encode(
+        x='Year:O',
+        y='Revenue:Q',
+        tooltip=['Year', 'Revenue']
+    ).interactive()
+    st.altair_chart(chart, use_container_width=True)
+    st.subheader("Data Extraction Placeholder")
+    st.markdown("Implement NLP techniques or model prompts to extract structured data here.")
+    # File uploader example for future data extraction features
+    uploaded_file = st.file_uploader("Upload a document file for extraction", type=["pdf", "docx", "txt"])
+    if uploaded_file is not None:
+        st.info("File uploaded successfully. Data extraction logic would process this file.")
+        # Add logic to extract tables, key figures, etc. from the uploaded file.
+# ------------------------------
+# Safety & Compliance Layer (Placeholder)
+# ------------------------------
+st.sidebar.markdown("### Safety & Compliance")
+st.sidebar.info(
+    "This tool provides AI-driven insights. "
+    "Please note that summaries and answers are for informational purposes only and should not be "
+    "considered professional financial or medical advice."
+)
+# ------------------------------
+# End of Application
+# ------------------------------