Navya-Sree commited on
Commit
1ad20cb
Β·
verified Β·
1 Parent(s): 75eb84b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +540 -0
app.py ADDED
@@ -0,0 +1,540 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ PDF Chat Web Application - Hugging Face Spaces Version
4
+ ======================================================
5
+ A Streamlit web app for chatting with PDF documents using OpenAI.
6
+ Deployed on Hugging Face Spaces for public access.
7
+ """
8
+
9
+ import streamlit as st
10
+ import os
11
+ import tempfile
12
+ import PyPDF2
13
+ from io import BytesIO
14
+ import requests
15
+ import json
16
+
17
+ # Page configuration
18
+ st.set_page_config(
19
+ page_title="PDF Chat Assistant",
20
+ page_icon="πŸ“„",
21
+ layout="wide",
22
+ initial_sidebar_state="expanded"
23
+ )
24
+
25
+ # Custom CSS for modern dark theme
26
+ st.markdown("""
27
+ <style>
28
+ .stApp {
29
+ background: linear-gradient(135deg, #1e1e1e 0%, #2d2d2d 100%);
30
+ color: #ffffff;
31
+ }
32
+
33
+ .main-header {
34
+ background: linear-gradient(135deg, #2d2d2d 0%, #3d3d3d 100%);
35
+ padding: 2rem;
36
+ border-radius: 15px;
37
+ text-align: center;
38
+ margin-bottom: 2rem;
39
+ box-shadow: 0 4px 6px rgba(0,0,0,0.3);
40
+ border: 1px solid #404040;
41
+ }
42
+
43
+ .feature-card {
44
+ background: linear-gradient(135deg, #2d2d2d 0%, #3d3d3d 100%);
45
+ padding: 1.5rem;
46
+ border-radius: 10px;
47
+ margin: 1rem 0;
48
+ border: 1px solid #404040;
49
+ box-shadow: 0 2px 4px rgba(0,0,0,0.2);
50
+ }
51
+
52
+ .user-message {
53
+ background: linear-gradient(135deg, #007bff 0%, #0056b3 100%);
54
+ color: white;
55
+ padding: 1rem;
56
+ border-radius: 15px;
57
+ margin: 0.5rem 0;
58
+ margin-left: 15%;
59
+ box-shadow: 0 2px 4px rgba(0,123,255,0.3);
60
+ }
61
+
62
+ .ai-message {
63
+ background: linear-gradient(135deg, #495057 0%, #343a40 100%);
64
+ color: white;
65
+ padding: 1rem;
66
+ border-radius: 15px;
67
+ margin: 0.5rem 0;
68
+ margin-right: 15%;
69
+ box-shadow: 0 2px 4px rgba(73,80,87,0.3);
70
+ }
71
+
72
+ .success-message {
73
+ background: linear-gradient(135deg, #28a745 0%, #20c997 100%);
74
+ color: white;
75
+ padding: 1rem;
76
+ border-radius: 10px;
77
+ margin: 1rem 0;
78
+ box-shadow: 0 2px 4px rgba(40,167,69,0.3);
79
+ }
80
+
81
+ .warning-message {
82
+ background: linear-gradient(135deg, #ffc107 0%, #fd7e14 100%);
83
+ color: #212529;
84
+ padding: 1rem;
85
+ border-radius: 10px;
86
+ margin: 1rem 0;
87
+ box-shadow: 0 2px 4px rgba(255,193,7,0.3);
88
+ }
89
+
90
+ .info-message {
91
+ background: linear-gradient(135deg, #17a2b8 0%, #20c997 100%);
92
+ color: white;
93
+ padding: 1rem;
94
+ border-radius: 10px;
95
+ margin: 1rem 0;
96
+ box-shadow: 0 2px 4px rgba(23,162,184,0.3);
97
+ }
98
+
99
+ .stTextInput > div > div > input {
100
+ background-color: #3d3d3d;
101
+ color: white;
102
+ border: 2px solid #555;
103
+ border-radius: 10px;
104
+ padding: 0.5rem;
105
+ }
106
+
107
+ .stTextInput > div > div > input:focus {
108
+ border-color: #007bff;
109
+ box-shadow: 0 0 0 0.2rem rgba(0,123,255,0.25);
110
+ }
111
+
112
+ .stButton > button {
113
+ background: linear-gradient(135deg, #007bff 0%, #0056b3 100%);
114
+ color: white;
115
+ border: none;
116
+ border-radius: 10px;
117
+ padding: 0.5rem 1rem;
118
+ font-weight: 600;
119
+ transition: all 0.3s ease;
120
+ }
121
+
122
+ .stButton > button:hover {
123
+ background: linear-gradient(135deg, #0056b3 0%, #004085 100%);
124
+ transform: translateY(-2px);
125
+ box-shadow: 0 4px 8px rgba(0,123,255,0.3);
126
+ }
127
+
128
+ .upload-area {
129
+ border: 2px dashed #007bff;
130
+ border-radius: 15px;
131
+ padding: 2rem;
132
+ text-align: center;
133
+ background: rgba(0,123,255,0.1);
134
+ margin: 1rem 0;
135
+ }
136
+
137
+ .stats-container {
138
+ display: grid;
139
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
140
+ gap: 1rem;
141
+ margin: 1rem 0;
142
+ }
143
+
144
+ .stat-card {
145
+ background: linear-gradient(135deg, #343a40 0%, #495057 100%);
146
+ padding: 1rem;
147
+ border-radius: 10px;
148
+ text-align: center;
149
+ border: 1px solid #404040;
150
+ }
151
+ </style>
152
+ """, unsafe_allow_html=True)
153
+
154
+ class PDFChatBot:
155
+ def __init__(self):
156
+ self.pdf_text = ""
157
+ self.conversation_history = []
158
+ self.pdf_pages = 0
159
+ self.pdf_chars = 0
160
+
161
+ def extract_pdf_text(self, pdf_file):
162
+ """Extract text from PDF using PyPDF2"""
163
+ try:
164
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
165
+ text = ""
166
+ page_count = len(pdf_reader.pages)
167
+
168
+ for page in pdf_reader.pages:
169
+ text += page.extract_text() + "\n"
170
+
171
+ if not text.strip():
172
+ return False, "Could not extract text from PDF. The PDF might contain only images or be password protected."
173
+
174
+ self.pdf_text = text
175
+ self.pdf_pages = page_count
176
+ self.pdf_chars = len(text)
177
+
178
+ return True, f"Successfully extracted text from {page_count} pages ({len(text):,} characters)!"
179
+
180
+ except Exception as e:
181
+ return False, f"Error reading PDF: {str(e)}"
182
+
183
+ def ask_openai(self, question, api_key):
184
+ """Ask OpenAI directly using the API"""
185
+ try:
186
+ # Limit context to prevent token limits
187
+ context_limit = 3000
188
+ context = f"Based on the following document content, please answer the question accurately and concisely.\n\nDocument:\n{self.pdf_text[:context_limit]}\n\nQuestion: {question}"
189
+
190
+ headers = {
191
+ "Authorization": f"Bearer {api_key}",
192
+ "Content-Type": "application/json"
193
+ }
194
+
195
+ data = {
196
+ "model": "gpt-3.5-turbo",
197
+ "messages": [
198
+ {"role": "system", "content": "You are a helpful AI assistant that answers questions about documents. Be accurate, concise, and helpful. If you cannot find the answer in the document, say so clearly."},
199
+ {"role": "user", "content": context}
200
+ ],
201
+ "max_tokens": 1000,
202
+ "temperature": 0.1
203
+ }
204
+
205
+ response = requests.post(
206
+ "https://api.openai.com/v1/chat/completions",
207
+ headers=headers,
208
+ json=data,
209
+ timeout=30
210
+ )
211
+
212
+ if response.status_code == 200:
213
+ result = response.json()
214
+ answer = result['choices'][0]['message']['content']
215
+
216
+ # Store in conversation history
217
+ self.conversation_history.append({"question": question, "answer": answer})
218
+
219
+ return answer
220
+ elif response.status_code == 401:
221
+ return "❌ Invalid API key. Please check your OpenAI API key and try again."
222
+ elif response.status_code == 429:
223
+ return "⏳ Rate limit exceeded. Please wait a moment and try again."
224
+ else:
225
+ return f"❌ API Error: {response.status_code} - Please check your API key and try again."
226
+
227
+ except requests.exceptions.Timeout:
228
+ return "⏳ Request timed out. Please try again."
229
+ except requests.exceptions.ConnectionError:
230
+ return "🌐 Connection error. Please check your internet connection."
231
+ except Exception as e:
232
+ return f"❌ Error: {str(e)}"
233
+
234
+ def main():
235
+ # Initialize session state
236
+ if 'bot' not in st.session_state:
237
+ st.session_state.bot = PDFChatBot()
238
+ if 'messages' not in st.session_state:
239
+ st.session_state.messages = []
240
+ if 'pdf_processed' not in st.session_state:
241
+ st.session_state.pdf_processed = False
242
+ if 'uploaded_file_name' not in st.session_state:
243
+ st.session_state.uploaded_file_name = ""
244
+
245
+ # Header
246
+ st.markdown("""
247
+ <div class="main-header">
248
+ <h1>πŸ“„ PDF Chat Assistant</h1>
249
+ <p style="font-size: 1.2em; margin: 0.5rem 0;">Upload any PDF and start an intelligent conversation with your document!</p>
250
+ <p style="color: #aaa; margin-top: 1rem;">Powered by OpenAI GPT β€’ Built with ❀️ for Hugging Face Spaces</p>
251
+ </div>
252
+ """, unsafe_allow_html=True)
253
+
254
+ # Sidebar
255
+ with st.sidebar:
256
+ st.markdown("### πŸ”‘ Configuration")
257
+
258
+ api_key = st.text_input(
259
+ "OpenAI API Key",
260
+ type="password",
261
+ help="Enter your OpenAI API key to start chatting with PDFs",
262
+ placeholder="sk-..."
263
+ )
264
+
265
+ if api_key:
266
+ st.success("βœ… API Key Provided")
267
+ else:
268
+ st.error("❌ API Key Required")
269
+
270
+ st.markdown("---")
271
+
272
+ st.markdown("### πŸ“š How to Use")
273
+ st.markdown("""
274
+ 1. **πŸ”‘ Enter your OpenAI API key** above
275
+ 2. **πŸ“€ Upload a PDF file** using the uploader
276
+ 3. **⏳ Wait** for text extraction (few seconds)
277
+ 4. **πŸ’¬ Ask questions** about your document
278
+ 5. **🧠 Get AI-powered answers** instantly!
279
+ """)
280
+
281
+ st.markdown("---")
282
+
283
+ st.markdown("### 🎯 Features")
284
+ st.markdown("""
285
+ β€’ πŸ“„ **PDF Text Extraction**
286
+ β€’ πŸ€– **AI-Powered Q&A**
287
+ β€’ πŸ’Ύ **Conversation Memory**
288
+ β€’ 🎨 **Beautiful Interface**
289
+ β€’ πŸš€ **Fast & Responsive**
290
+ β€’ πŸ”’ **Privacy Focused**
291
+ """)
292
+
293
+ st.markdown("---")
294
+
295
+ if st.button("πŸ—‘οΈ Clear Chat History", use_container_width=True):
296
+ st.session_state.messages = []
297
+ st.session_state.bot = PDFChatBot()
298
+ st.session_state.pdf_processed = False
299
+ st.session_state.uploaded_file_name = ""
300
+ st.success("βœ… Chat history cleared!")
301
+ st.rerun()
302
+
303
+ # API Key Info
304
+ with st.expander("ℹ️ Get OpenAI API Key"):
305
+ st.markdown("""
306
+ **How to get your API key:**
307
+
308
+ 1. Go to [OpenAI Platform](https://platform.openai.com)
309
+ 2. Sign up or log in to your account
310
+ 3. Navigate to **API Keys** section
311
+ 4. Click **"Create new secret key"**
312
+ 5. Copy the key (starts with `sk-`)
313
+ 6. Paste it in the field above
314
+
315
+ **Note:** Your API key is only used for this session and is not stored anywhere.
316
+ """)
317
+
318
+ # Status
319
+ st.markdown("---")
320
+ st.markdown("### πŸ“Š Status")
321
+
322
+ if st.session_state.pdf_processed:
323
+ st.success("βœ… PDF Ready")
324
+ st.info(f"πŸ“„ {st.session_state.uploaded_file_name}")
325
+
326
+ # Display PDF stats
327
+ bot = st.session_state.bot
328
+ st.markdown(f"""
329
+ **πŸ“ˆ Document Stats:**
330
+ - Pages: {bot.pdf_pages}
331
+ - Characters: {bot.pdf_chars:,}
332
+ - Conversations: {len(bot.conversation_history)}
333
+ """)
334
+ else:
335
+ st.warning("⏳ No PDF loaded")
336
+
337
+ if api_key:
338
+ st.success("βœ… API Connected")
339
+ else:
340
+ st.error("❌ API Key Missing")
341
+
342
+ # Main content
343
+ col1, col2 = st.columns([1, 2])
344
+
345
+ with col1:
346
+ st.markdown("### πŸ“€ Upload Your PDF")
347
+
348
+ uploaded_file = st.file_uploader(
349
+ "Choose a PDF file",
350
+ type="pdf",
351
+ help="Upload any PDF document (max 200MB)",
352
+ label_visibility="collapsed"
353
+ )
354
+
355
+ if not uploaded_file:
356
+ st.markdown("""
357
+ <div class="upload-area">
358
+ <h3>πŸ“ Drag & Drop Your PDF Here</h3>
359
+ <p>Or click "Browse files" above to select a PDF</p>
360
+ <br>
361
+ <p><small>πŸ“‹ Supported: PDF files up to 200MB<br>
362
+ πŸ”’ Your files are processed securely and not stored</small></p>
363
+ </div>
364
+ """, unsafe_allow_html=True)
365
+
366
+ if uploaded_file and api_key:
367
+ if not st.session_state.pdf_processed or st.session_state.uploaded_file_name != uploaded_file.name:
368
+ with st.spinner("πŸ“– Extracting text from your PDF..."):
369
+ success, message = st.session_state.bot.extract_pdf_text(uploaded_file)
370
+
371
+ if success:
372
+ st.markdown(f"""
373
+ <div class="success-message">
374
+ <h4>βœ… PDF Processed Successfully!</h4>
375
+ <p>{message}</p>
376
+ </div>
377
+ """, unsafe_allow_html=True)
378
+
379
+ st.session_state.pdf_processed = True
380
+ st.session_state.uploaded_file_name = uploaded_file.name
381
+
382
+ # Show file details
383
+ file_size = uploaded_file.size / 1024 # KB
384
+ bot = st.session_state.bot
385
+
386
+ st.markdown(f"""
387
+ <div class="info-message">
388
+ <strong>πŸ“„ File:</strong> {uploaded_file.name}<br>
389
+ <strong>πŸ“Š Size:</strong> {file_size:.1f} KB<br>
390
+ <strong>πŸ“ƒ Pages:</strong> {bot.pdf_pages}<br>
391
+ <strong>πŸ“ Characters:</strong> {bot.pdf_chars:,}<br>
392
+ <strong>🎯 Status:</strong> Ready for questions!
393
+ </div>
394
+ """, unsafe_allow_html=True)
395
+ else:
396
+ st.markdown(f"""
397
+ <div class="warning-message">
398
+ <h4>⚠️ Processing Failed</h4>
399
+ <p>{message}</p>
400
+ </div>
401
+ """, unsafe_allow_html=True)
402
+
403
+ elif uploaded_file and not api_key:
404
+ st.markdown("""
405
+ <div class="warning-message">
406
+ <h4>⚠️ API Key Required</h4>
407
+ <p>Please enter your OpenAI API key in the sidebar to process the PDF.</p>
408
+ </div>
409
+ """, unsafe_allow_html=True)
410
+
411
+ # Example questions
412
+ if st.session_state.pdf_processed:
413
+ st.markdown("### πŸ’‘ Try These Questions")
414
+
415
+ example_questions = [
416
+ "πŸ“‹ What is this document about?",
417
+ "πŸ“ Summarize the main points",
418
+ "πŸ” What are the key details?",
419
+ "πŸ“Š Give me important information",
420
+ "❓ What questions can I ask?"
421
+ ]
422
+
423
+ for question in example_questions:
424
+ if st.button(question, key=f"example_{question}", use_container_width=True):
425
+ # Trigger the question
426
+ question_text = question.split(" ", 1)[1] # Remove emoji
427
+ st.session_state.pending_question = question_text
428
+ st.rerun()
429
+
430
+ with col2:
431
+ st.markdown("### πŸ’¬ Chat with Your PDF")
432
+
433
+ # Chat container
434
+ chat_container = st.container()
435
+
436
+ with chat_container:
437
+ if st.session_state.messages:
438
+ for message in st.session_state.messages:
439
+ if message["role"] == "user":
440
+ st.markdown(f"""
441
+ <div class="user-message">
442
+ <strong>πŸ§‘ You:</strong> {message["content"]}
443
+ </div>
444
+ """, unsafe_allow_html=True)
445
+ else:
446
+ st.markdown(f"""
447
+ <div class="ai-message">
448
+ <strong>πŸ€– AI:</strong> {message["content"]}
449
+ </div>
450
+ """, unsafe_allow_html=True)
451
+ else:
452
+ if st.session_state.pdf_processed:
453
+ st.markdown("""
454
+ <div class="ai-message">
455
+ <strong>πŸ€– AI:</strong> Hello! I've analyzed your PDF document. What would you like to know about it? Feel free to ask any questions!
456
+ </div>
457
+ """, unsafe_allow_html=True)
458
+ else:
459
+ st.markdown("""
460
+ <div class="feature-card" style="text-align: center; padding: 3rem;">
461
+ <h3>πŸ‘‹ Welcome to PDF Chat Assistant!</h3>
462
+ <p style="font-size: 1.1em; margin: 1rem 0;">Transform any PDF into an interactive conversation</p>
463
+ <br>
464
+ <div class="stats-container">
465
+ <div class="stat-card">
466
+ <h4>πŸ“„ Smart</h4>
467
+ <p>AI understands your documents</p>
468
+ </div>
469
+ <div class="stat-card">
470
+ <h4>⚑ Fast</h4>
471
+ <p>Instant answers to your questions</p>
472
+ </div>
473
+ <div class="stat-card">
474
+ <h4>πŸ”’ Secure</h4>
475
+ <p>Your data stays private</p>
476
+ </div>
477
+ </div>
478
+ <br>
479
+ <p><strong>Get started:</strong> Add your API key and upload a PDF!</p>
480
+ </div>
481
+ """, unsafe_allow_html=True)
482
+
483
+ # Input area
484
+ st.markdown("---")
485
+
486
+ # Check for pending question from example buttons
487
+ if hasattr(st.session_state, 'pending_question'):
488
+ user_question = st.session_state.pending_question
489
+ del st.session_state.pending_question
490
+ else:
491
+ user_question = st.text_input(
492
+ "Ask a question about your PDF:",
493
+ placeholder="e.g., What are the main topics discussed in this document?",
494
+ disabled=not st.session_state.pdf_processed,
495
+ key="user_input"
496
+ )
497
+
498
+ col_btn1, col_btn2, col_btn3 = st.columns([2, 1, 1])
499
+
500
+ with col_btn1:
501
+ send_button = st.button("πŸ“€ Send Message", disabled=not st.session_state.pdf_processed, use_container_width=True)
502
+
503
+ with col_btn2:
504
+ if st.button("πŸ”„ Refresh", disabled=not st.session_state.pdf_processed, use_container_width=True):
505
+ st.rerun()
506
+
507
+ # Process question
508
+ if (send_button or hasattr(st.session_state, 'pending_question')) and user_question and st.session_state.pdf_processed and api_key:
509
+ # Add user message
510
+ st.session_state.messages.append({"role": "user", "content": user_question})
511
+
512
+ # Get AI response
513
+ with st.spinner("πŸ€– AI is analyzing your question..."):
514
+ ai_response = st.session_state.bot.ask_openai(user_question, api_key)
515
+
516
+ # Add AI response
517
+ st.session_state.messages.append({"role": "assistant", "content": ai_response})
518
+
519
+ st.rerun()
520
+
521
+ elif send_button and not st.session_state.pdf_processed:
522
+ st.warning("⚠️ Please upload and process a PDF first!")
523
+
524
+ elif send_button and not api_key:
525
+ st.warning("⚠️ Please enter your OpenAI API key in the sidebar!")
526
+
527
+ # Footer
528
+ st.markdown("---")
529
+ st.markdown("""
530
+ <div style="text-align: center; color: #888; padding: 2rem;">
531
+ <h4>πŸš€ PDF Chat Assistant</h4>
532
+ <p>Made with ❀️ using Streamlit β€’ Powered by OpenAI GPT-3.5 β€’ Hosted on πŸ€— Hugging Face Spaces</p>
533
+ <p><small>πŸ“„ Upload PDFs β€’ πŸ’¬ Ask Questions β€’ 🧠 Get AI Answers β€’ πŸ”’ Privacy First</small></p>
534
+ <br>
535
+ <p><small>⭐ Like this app? Give it a star on Hugging Face Spaces!</small></p>
536
+ </div>
537
+ """, unsafe_allow_html=True)
538
+
539
+ if __name__ == "__main__":
540
+ main()