mgbam commited on
Commit
05989bf
·
verified ·
1 Parent(s): 7017824

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -0
app.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import transformers
3
+ import altair as alt
4
+ import pandas as pd
5
+ import streamlit_authenticator as stauth
6
+ from difflib import SequenceMatcher
7
+
8
+ # ------------------------------
9
+ # User Authentication Setup
10
+ # ------------------------------
11
+ # Sample configuration for authentication
12
+ config = {
13
+ 'credentials': {
14
+ 'usernames': {
15
+ 'demo_user': {
16
+ 'name': 'Demo User',
17
+ 'password': stauth.Hasher(['password123']).generate()[0] # hashed password
18
+ }
19
+ }
20
+ },
21
+ 'cookie': {
22
+ 'expiry_days': 30,
23
+ 'key': 'some_signature_key',
24
+ 'name': 'some_cookie_name'
25
+ },
26
+ 'preauthorized': {
27
+ 'emails': []
28
+ }
29
+ }
30
+
31
+ authenticator = stauth.Authenticate(
32
+ config['credentials'],
33
+ config['cookie']['name'],
34
+ config['cookie']['key'],
35
+ config['cookie']['expiry_days']
36
+ )
37
+
38
+ name, authentication_status, username = authenticator.login('Login', 'main')
39
+
40
+ if not authentication_status:
41
+ st.error('Authentication failed. Please refresh and try again.')
42
+ st.stop()
43
+
44
+ st.sidebar.write(f"Welcome *{name}*")
45
+ authenticator.logout('Logout', 'sidebar')
46
+
47
+ # ------------------------------
48
+ # Load Models
49
+ # ------------------------------
50
+ @st.cache_resource
51
+ def load_qwen():
52
+ return transformers.pipeline(
53
+ "text2text-generation",
54
+ model="Qwen/Qwen2.5-14B",
55
+ device_map="auto"
56
+ )
57
+
58
+ @st.cache_resource
59
+ def load_phi():
60
+ return transformers.pipeline(
61
+ "text-generation",
62
+ model="microsoft/phi-4",
63
+ model_kwargs={"torch_dtype": "auto"},
64
+ device_map="auto"
65
+ )
66
+
67
+ qwen_pipeline = load_qwen()
68
+ phi_pipeline = load_phi()
69
+
70
+ # ------------------------------
71
+ # Utility Functions
72
+ # ------------------------------
73
+ def summarize_document(document_text):
74
+ prompt = f"Summarize the following document and highlight key insights:\n\n{document_text}"
75
+ summary = qwen_pipeline(prompt, max_new_tokens=1024)[0]['generated_text']
76
+ return summary
77
+
78
+ def answer_question(summary, question):
79
+ prompt = f"Based on the following summary:\n\n{summary}\n\nAnswer the question: {question}"
80
+ answer = phi_pipeline(prompt, max_new_tokens=256)[0]['generated_text']
81
+ return answer
82
+
83
+ def find_similar_chunks(original, output):
84
+ matcher = SequenceMatcher(None, original, output)
85
+ segments = []
86
+ left = 0
87
+ for _, j, n in matcher.get_matching_blocks():
88
+ if left < j:
89
+ segments.append({'text': output[left:j], 'match': False})
90
+ segments.append({'text': output[j:j+n], 'match': True})
91
+ left = j+n
92
+ return segments
93
+
94
+ # ------------------------------
95
+ # Streamlit App Layout
96
+ # ------------------------------
97
+ st.title("SmartDoc Analyzer")
98
+ st.markdown("Analyze Financial & Health Documents with AI")
99
+
100
+ # Tabs for different functionalities
101
+ tabs = st.tabs(["Document Summarization", "Interactive Q&A", "Visualization & Data Extraction"])
102
+
103
+ # -------- Document Summarization Tab --------
104
+ with tabs[0]:
105
+ st.header("Document Summarization")
106
+ document_text = st.text_area("Paste Document Text:", height=300)
107
+ if st.button("Summarize Document"):
108
+ if document_text:
109
+ summary = summarize_document(document_text)
110
+ st.subheader("Summary")
111
+ st.write(summary)
112
+ # Save summary in session for use in Q&A tab
113
+ st.session_state['last_summary'] = summary
114
+ else:
115
+ st.warning("Please paste document text to summarize.")
116
+
117
+ # -------- Interactive Q&A Tab --------
118
+ with tabs[1]:
119
+ st.header("Interactive Q&A")
120
+ default_summary = st.session_state.get('last_summary', '')
121
+ summary_context = st.text_area("Summary Context:", value=default_summary, height=150)
122
+ question = st.text_input("Enter your question about the document:")
123
+ if st.button("Get Answer"):
124
+ if summary_context and question:
125
+ answer = answer_question(summary_context, question)
126
+ st.subheader("Answer")
127
+ st.write(answer)
128
+ # For session saving, one could store Q&A pairs in st.session_state or database.
129
+ else:
130
+ st.warning("Please provide both a summary context and a question.")
131
+
132
+ # -------- Visualization & Data Extraction Tab --------
133
+ with tabs[2]:
134
+ st.header("Visualization & Data Extraction")
135
+
136
+ st.subheader("Visualization Placeholder")
137
+ st.markdown("An interactive chart can be displayed here using Altair or Plotly.")
138
+
139
+ # Example static Altair chart (replace with dynamic data extraction logic)
140
+ data = pd.DataFrame({
141
+ 'Year': [2019, 2020, 2021, 2022],
142
+ 'Revenue': [150, 200, 250, 300]
143
+ })
144
+ chart = alt.Chart(data).mark_line(point=True).encode(
145
+ x='Year:O',
146
+ y='Revenue:Q',
147
+ tooltip=['Year', 'Revenue']
148
+ ).interactive()
149
+ st.altair_chart(chart, use_container_width=True)
150
+
151
+ st.subheader("Data Extraction Placeholder")
152
+ st.markdown("Implement NLP techniques or model prompts to extract structured data here.")
153
+
154
+ # File uploader example for future data extraction features
155
+ uploaded_file = st.file_uploader("Upload a document file for extraction", type=["pdf", "docx", "txt"])
156
+ if uploaded_file is not None:
157
+ st.info("File uploaded successfully. Data extraction logic would process this file.")
158
+ # Add logic to extract tables, key figures, etc. from the uploaded file.
159
+
160
+ # ------------------------------
161
+ # Safety & Compliance Layer (Placeholder)
162
+ # ------------------------------
163
+ st.sidebar.markdown("### Safety & Compliance")
164
+ st.sidebar.info(
165
+ "This tool provides AI-driven insights. "
166
+ "Please note that summaries and answers are for informational purposes only and should not be "
167
+ "considered professional financial or medical advice."
168
+ )
169
+
170
+ # ------------------------------
171
+ # End of Application
172
+ # ------------------------------