shukdevdatta123 commited on
Commit
55299b7
Β·
verified Β·
1 Parent(s): ffa9bdb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +377 -71
app.py CHANGED
@@ -1,10 +1,14 @@
1
  import gradio as gr
2
  import PyPDF2
 
3
  import io
4
  import os
5
  from groq import Groq
6
  import tempfile
7
  import traceback
 
 
 
8
 
9
  def extract_text_from_pdf(pdf_file):
10
  """Extract text from uploaded PDF file"""
@@ -30,6 +34,74 @@ def extract_text_from_pdf(pdf_file):
30
  except Exception as e:
31
  return None, f"Error reading PDF: {str(e)}"
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  def summarize_with_groq(api_key, text):
34
  """Generate summary using Groq API"""
35
  try:
@@ -44,11 +116,8 @@ def summarize_with_groq(api_key, text):
44
 
45
  # System prompt for summarization
46
  system_prompt = """You are a highly capable language model specialized in document summarization. Your task is to read and understand the full content of a multi-page PDF document and generate a clear, accurate, and detailed summary of the entire document.
47
-
48
  Focus on capturing all main ideas, key points, arguments, findings, and conclusions presented throughout the document. If the document is technical, legal, academic, or contains structured sections (e.g., introduction, methods, results, discussion), maintain the logical flow and structure while expressing the content in a comprehensive and accessible manner.
49
-
50
  Avoid unnecessary simplification. Include important details, supporting evidence, and nuanced insights that reflect the depth of the original material. Do not copy the text verbatim.
51
-
52
  Output only the summary. Do not explain your process. Use a neutral, professional, and informative tone. The summary should provide a full understanding of the document to someone who has not read it."""
53
 
54
  # Create completion with optimal hyperparameters
@@ -80,8 +149,83 @@ Output only the summary. Do not explain your process. Use a neutral, professiona
80
  error_msg += "\n\nPlease check your Groq API key and ensure it's valid."
81
  return error_msg
82
 
83
- def process_pdf_and_summarize(api_key, pdf_file, progress=gr.Progress()):
84
- """Main function to process PDF and generate summary"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  try:
86
  if not api_key or not api_key.strip():
87
  return "❌ Please enter your Groq API key", "", ""
@@ -114,14 +258,78 @@ def process_pdf_and_summarize(api_key, pdf_file, progress=gr.Progress()):
114
  error_traceback = traceback.format_exc()
115
  return f"❌ Unexpected error: {str(e)}\n\nTraceback:\n{error_traceback}", "", ""
116
 
117
- def clear_all():
118
- """Clear all fields"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  return "", None, "", "", ""
120
 
 
 
 
 
121
  # Custom CSS for better styling
122
  css = """
123
  .gradio-container {
124
- max-width: 1200px !important;
125
  margin: auto !important;
126
  }
127
  .main-header {
@@ -141,19 +349,51 @@ css = """
141
  border-left: 4px solid #007bff;
142
  margin: 1rem 0;
143
  }
 
 
 
 
 
 
 
144
  """
145
 
146
  # Create Gradio interface
147
- with gr.Blocks(css=css, title="PDF Summarizer with Groq AI") as demo:
148
  # Header
149
  gr.HTML("""
150
  <div class="main-header">
151
- <h1>πŸ“„ PDF Summarizer with Groq AI</h1>
152
- <p>Upload any PDF document and get an AI-powered summary using Groq's Llama model</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  </div>
154
  """)
155
 
156
- # Info box
157
  gr.HTML("""
158
  <div class="info-box">
159
  <strong>πŸ”‘ How to get your Groq API Key:</strong><br>
@@ -165,62 +405,115 @@ with gr.Blocks(css=css, title="PDF Summarizer with Groq AI") as demo:
165
  </div>
166
  """)
167
 
168
- with gr.Row():
169
- with gr.Column(scale=1):
170
- # Input section
171
- gr.Markdown("## πŸ”§ Configuration")
172
- api_key_input = gr.Textbox(
173
- label="Groq API Key",
174
- placeholder="Enter your Groq API key here...",
175
- type="password"
176
- )
177
- gr.Markdown("*Your API key is not stored and only used for this session*")
178
-
179
- pdf_file_input = gr.File(
180
- label="Upload PDF Document",
181
- file_types=[".pdf"]
182
- )
183
- gr.Markdown("*Upload any PDF file to summarize*")
184
-
185
  with gr.Row():
186
- summarize_btn = gr.Button("πŸ“‹ Generate Summary", variant="primary", size="lg")
187
- clear_btn = gr.Button("πŸ—‘οΈ Clear All", variant="secondary")
188
-
189
- with gr.Column(scale=2):
190
- # Output section
191
- gr.Markdown("## πŸ“Š Results")
192
- status_output = gr.Textbox(
193
- label="Status",
194
- interactive=False,
195
- show_label=True
196
- )
197
-
198
- with gr.Tabs():
199
- with gr.TabItem("πŸ“ Summary"):
200
- summary_output = gr.Textbox(
201
- label="AI Generated Summary",
202
- lines=15,
203
- interactive=False,
204
- placeholder="Your PDF summary will appear here..."
205
  )
 
 
 
 
206
 
207
- with gr.TabItem("πŸ“„ Extracted Text Preview"):
208
- text_preview_output = gr.Textbox(
209
- label="Extracted Text (First 500 characters)",
210
- lines=10,
211
  interactive=False,
212
- placeholder="Preview of extracted text will appear here..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  )
214
 
215
  # Usage instructions
216
  gr.HTML("""
217
  <div class="info-box">
218
  <strong>πŸ“‹ Usage Instructions:</strong><br>
219
- 1. Enter your Groq API key in the field above<br>
220
- 2. Upload a PDF document (any size, any content)<br>
221
- 3. Click "Generate Summary" to process your document<br>
222
- 4. View the AI-generated summary and extracted text preview<br>
223
- 5. Use "Clear All" to reset all fields
 
 
 
 
 
 
 
 
 
 
 
 
224
  </div>
225
  """)
226
 
@@ -228,31 +521,44 @@ with gr.Blocks(css=css, title="PDF Summarizer with Groq AI") as demo:
228
  gr.HTML("""
229
  <div style="margin-top: 2rem; padding: 1rem; background-color: #e9ecef; border-radius: 0.5rem;">
230
  <strong>πŸ€– Model Information:</strong><br>
231
- β€’ Model: Llama-3.3-70B-Versatile (via Groq)<br>
232
- β€’ Temperature: 0.3 (focused, factual summaries)<br>
233
- β€’ Max Tokens: 2048 (comprehensive summaries)<br>
234
- β€’ Top-p: 0.9 (balanced creativity and accuracy)
235
  </div>
236
  """)
237
 
238
- # Event handlers
239
  summarize_btn.click(
240
- fn=process_pdf_and_summarize,
241
- inputs=[api_key_input, pdf_file_input],
242
- outputs=[status_output, text_preview_output, summary_output],
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  show_progress=True
244
  )
245
 
246
- clear_btn.click(
247
- fn=clear_all,
248
- outputs=[api_key_input, pdf_file_input, status_output, text_preview_output, summary_output]
249
  )
250
 
251
  # Launch the app
252
  if __name__ == "__main__":
253
- print("πŸš€ Starting PDF Summarizer App...")
254
  print("πŸ“‹ Make sure you have the required packages installed:")
255
- print(" pip install gradio groq PyPDF2")
256
  print("\nπŸ”‘ Don't forget to get your Groq API key from: https://console.groq.com/")
257
 
258
  demo.launch(
 
1
  import gradio as gr
2
  import PyPDF2
3
+ import fitz # PyMuPDF
4
  import io
5
  import os
6
  from groq import Groq
7
  import tempfile
8
  import traceback
9
+ import base64
10
+ from io import BytesIO
11
+ from PIL import Image
12
 
13
  def extract_text_from_pdf(pdf_file):
14
  """Extract text from uploaded PDF file"""
 
34
  except Exception as e:
35
  return None, f"Error reading PDF: {str(e)}"
36
 
37
+ def extract_images_from_pdf(pdf_file_path):
38
+ """Extract all images from PDF and return them as PIL Images with page info"""
39
+ images = []
40
+
41
+ # Open the PDF
42
+ pdf_document = fitz.open(pdf_file_path)
43
+
44
+ for page_num in range(len(pdf_document)):
45
+ page = pdf_document.load_page(page_num)
46
+
47
+ # Get image list from the page
48
+ image_list = page.get_images(full=True)
49
+
50
+ for img_index, img in enumerate(image_list):
51
+ # Get the XREF of the image
52
+ xref = img[0]
53
+
54
+ # Extract the image bytes
55
+ base_image = pdf_document.extract_image(xref)
56
+ image_bytes = base_image["image"]
57
+
58
+ # Convert to PIL Image
59
+ pil_image = Image.open(BytesIO(image_bytes))
60
+
61
+ images.append({
62
+ 'image': pil_image,
63
+ 'page': page_num + 1,
64
+ 'index': img_index + 1,
65
+ 'format': base_image["ext"]
66
+ })
67
+
68
+ pdf_document.close()
69
+ return images
70
+
71
+ def convert_pdf_pages_to_images(pdf_file_path, dpi=150):
72
+ """Convert each PDF page to an image for comprehensive analysis"""
73
+ images = []
74
+
75
+ # Open the PDF
76
+ pdf_document = fitz.open(pdf_file_path)
77
+
78
+ for page_num in range(len(pdf_document)):
79
+ page = pdf_document.load_page(page_num)
80
+
81
+ # Convert page to image
82
+ mat = fitz.Matrix(dpi/72, dpi/72) # zoom factor
83
+ pix = page.get_pixmap(matrix=mat)
84
+
85
+ # Convert to PIL Image
86
+ img_data = pix.tobytes("png")
87
+ pil_image = Image.open(BytesIO(img_data))
88
+
89
+ images.append({
90
+ 'image': pil_image,
91
+ 'page': page_num + 1,
92
+ 'type': 'full_page'
93
+ })
94
+
95
+ pdf_document.close()
96
+ return images
97
+
98
+ def encode_image_to_base64(pil_image):
99
+ """Convert PIL Image to base64 string"""
100
+ buffered = BytesIO()
101
+ pil_image.save(buffered, format="PNG")
102
+ img_str = base64.b64encode(buffered.getvalue()).decode()
103
+ return f"data:image/png;base64,{img_str}"
104
+
105
  def summarize_with_groq(api_key, text):
106
  """Generate summary using Groq API"""
107
  try:
 
116
 
117
  # System prompt for summarization
118
  system_prompt = """You are a highly capable language model specialized in document summarization. Your task is to read and understand the full content of a multi-page PDF document and generate a clear, accurate, and detailed summary of the entire document.
 
119
  Focus on capturing all main ideas, key points, arguments, findings, and conclusions presented throughout the document. If the document is technical, legal, academic, or contains structured sections (e.g., introduction, methods, results, discussion), maintain the logical flow and structure while expressing the content in a comprehensive and accessible manner.
 
120
  Avoid unnecessary simplification. Include important details, supporting evidence, and nuanced insights that reflect the depth of the original material. Do not copy the text verbatim.
 
121
  Output only the summary. Do not explain your process. Use a neutral, professional, and informative tone. The summary should provide a full understanding of the document to someone who has not read it."""
122
 
123
  # Create completion with optimal hyperparameters
 
149
  error_msg += "\n\nPlease check your Groq API key and ensure it's valid."
150
  return error_msg
151
 
152
+ def analyze_images_with_groq(api_key, images):
153
+ """Analyze images using Groq API"""
154
+ if not api_key:
155
+ return "❌ Please enter your Groq API key."
156
+
157
+ try:
158
+ client = Groq(api_key=api_key)
159
+
160
+ results = []
161
+
162
+ for idx, img_data in enumerate(images):
163
+ # Encode image to base64
164
+ base64_image = encode_image_to_base64(img_data['image'])
165
+
166
+ # Prepare messages for the API call
167
+ messages = [
168
+ {
169
+ "role": "system",
170
+ "content": """You are an advanced language model with strong capabilities in visual and textual understanding. Your task is to analyze all images, diagrams, and flowcharts within a PDF document. This includes:
171
+
172
+ 1. Extracting and interpreting text from images and flowcharts.
173
+ 2. Understanding the visual structure, logic, and relationships depicted in diagrams.
174
+ 3. Summarizing the content and purpose of each visual element in a clear and informative manner.
175
+
176
+ After processing, be ready to answer user questions about any of the images or flowcharts, including their meaning, structure, data, process flows, or relationships shown.
177
+
178
+ Be accurate, concise, and visually aware. Clearly explain visual content in text form. Do not guess if visual data is unclear or ambiguous β€” instead, state what is observable.
179
+
180
+ Use a neutral, helpful tone. Do not include irrelevant information or commentary unrelated to the visual content. When summarizing or answering questions, assume the user may not have access to the original image or diagram."""
181
+ },
182
+ {
183
+ "role": "user",
184
+ "content": [
185
+ {
186
+ "type": "text",
187
+ "text": f"Please analyze this image from page {img_data.get('page', 'unknown')} of the PDF document. Provide a detailed analysis of all visual elements, text, diagrams, flowcharts, and their relationships."
188
+ },
189
+ {
190
+ "type": "image_url",
191
+ "image_url": {
192
+ "url": base64_image
193
+ }
194
+ }
195
+ ]
196
+ }
197
+ ]
198
+
199
+ # Make API call with optimal parameters
200
+ completion = client.chat.completions.create(
201
+ model="llama-3.3-70b-versatile",
202
+ messages=messages,
203
+ temperature=0.2,
204
+ max_completion_tokens=2048,
205
+ top_p=0.85,
206
+ stream=False
207
+ )
208
+
209
+ analysis = completion.choices[0].message.content
210
+
211
+ page_info = f"Page {img_data.get('page', 'N/A')}"
212
+ if 'index' in img_data:
213
+ page_info += f", Image {img_data['index']}"
214
+ elif 'type' in img_data and img_data['type'] == 'full_page':
215
+ page_info += " (Full Page)"
216
+
217
+ results.append(f"## πŸ“„ {page_info}\n\n{analysis}\n\n---\n")
218
+
219
+ if not results:
220
+ return "⚠️ No images found in the PDF document."
221
+
222
+ return "\n".join(results)
223
+
224
+ except Exception as e:
225
+ return f"❌ Error analyzing images: {str(e)}"
226
+
227
+ def process_pdf_text_summary(api_key, pdf_file, progress=gr.Progress()):
228
+ """Process PDF and generate text summary"""
229
  try:
230
  if not api_key or not api_key.strip():
231
  return "❌ Please enter your Groq API key", "", ""
 
258
  error_traceback = traceback.format_exc()
259
  return f"❌ Unexpected error: {str(e)}\n\nTraceback:\n{error_traceback}", "", ""
260
 
261
+ def process_pdf_image_analysis(api_key, pdf_file, analysis_method, progress=gr.Progress()):
262
+ """Process PDF and analyze images"""
263
+ if pdf_file is None:
264
+ return "⚠️ Please upload a PDF file."
265
+
266
+ if not api_key or api_key.strip() == "":
267
+ return "⚠️ Please enter your Groq API key."
268
+
269
+ try:
270
+ progress(0.1, desc="Processing PDF file...")
271
+
272
+ # Create temporary file for PDF processing
273
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
274
+ # Write uploaded file content to temporary file
275
+ if hasattr(pdf_file, 'read'):
276
+ tmp_file.write(pdf_file.read())
277
+ else:
278
+ with open(pdf_file.name, 'rb') as f:
279
+ tmp_file.write(f.read())
280
+ tmp_file_path = tmp_file.name
281
+
282
+ progress(0.3, desc="Extracting images...")
283
+
284
+ images_to_analyze = []
285
+
286
+ if analysis_method == "Extract embedded images only":
287
+ # Extract only embedded images
288
+ images_to_analyze = extract_images_from_pdf(tmp_file_path)
289
+ if not images_to_analyze:
290
+ return "⚠️ No embedded images found in the PDF. Try 'Full page analysis' to analyze the entire content."
291
+
292
+ elif analysis_method == "Full page analysis":
293
+ # Convert each page to image for comprehensive analysis
294
+ images_to_analyze = convert_pdf_pages_to_images(tmp_file_path)
295
+
296
+ else: # Both methods
297
+ # First try embedded images
298
+ embedded_images = extract_images_from_pdf(tmp_file_path)
299
+ # Then add full page analysis
300
+ page_images = convert_pdf_pages_to_images(tmp_file_path)
301
+ images_to_analyze = embedded_images + page_images
302
+
303
+ # Clean up temporary file
304
+ os.unlink(tmp_file_path)
305
+
306
+ if not images_to_analyze:
307
+ return "⚠️ No visual content found in the PDF document."
308
+
309
+ progress(0.7, desc="Analyzing images with AI...")
310
+
311
+ # Analyze images with Groq
312
+ analysis_result = analyze_images_with_groq(api_key, images_to_analyze)
313
+
314
+ progress(1.0, desc="Analysis complete!")
315
+
316
+ return analysis_result
317
+
318
+ except Exception as e:
319
+ return f"❌ Error processing PDF: {str(e)}"
320
+
321
+ def clear_all_text():
322
+ """Clear all text analysis fields"""
323
  return "", None, "", "", ""
324
 
325
+ def clear_all_image():
326
+ """Clear all image analysis fields"""
327
+ return "", None, "Full page analysis", ""
328
+
329
  # Custom CSS for better styling
330
  css = """
331
  .gradio-container {
332
+ max-width: 1400px !important;
333
  margin: auto !important;
334
  }
335
  .main-header {
 
349
  border-left: 4px solid #007bff;
350
  margin: 1rem 0;
351
  }
352
+ .feature-box {
353
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
354
+ padding: 1.5rem;
355
+ border-radius: 1rem;
356
+ color: white;
357
+ margin: 1rem 0;
358
+ }
359
  """
360
 
361
  # Create Gradio interface
362
+ with gr.Blocks(css=css, title="Advanced PDF Analyzer with Groq AI") as demo:
363
  # Header
364
  gr.HTML("""
365
  <div class="main-header">
366
+ <h1>πŸš€ Advanced PDF Analyzer with Groq AI</h1>
367
+ <p>Comprehensive PDF analysis tool - Extract text summaries and analyze images/diagrams using state-of-the-art AI models</p>
368
+ </div>
369
+ """)
370
+
371
+ # Feature overview
372
+ gr.HTML("""
373
+ <div class="feature-box">
374
+ <h3>✨ What this tool can do:</h3>
375
+ <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; margin-top: 1rem;">
376
+ <div>
377
+ <h4>πŸ“ Text Analysis:</h4>
378
+ <ul>
379
+ <li>Extract and summarize text content</li>
380
+ <li>Generate comprehensive document summaries</li>
381
+ <li>Maintain logical structure and key insights</li>
382
+ </ul>
383
+ </div>
384
+ <div>
385
+ <h4>πŸ–ΌοΈ Visual Analysis:</h4>
386
+ <ul>
387
+ <li>Analyze embedded images and diagrams</li>
388
+ <li>Process flowcharts and technical drawings</li>
389
+ <li>Extract text from images (OCR)</li>
390
+ </ul>
391
+ </div>
392
+ </div>
393
  </div>
394
  """)
395
 
396
+ # API Key section
397
  gr.HTML("""
398
  <div class="info-box">
399
  <strong>πŸ”‘ How to get your Groq API Key:</strong><br>
 
405
  </div>
406
  """)
407
 
408
+ # Global API key input
409
+ api_key_input = gr.Textbox(
410
+ label="πŸ”‘ Groq API Key",
411
+ placeholder="Enter your Groq API key here (used for both text and image analysis)...",
412
+ type="password",
413
+ info="Your API key is not stored and only used for this session"
414
+ )
415
+
416
+ # Tabs for different functionalities
417
+ with gr.Tabs():
418
+ # Text Summary Tab
419
+ with gr.TabItem("πŸ“ Text Summary", elem_id="text-tab"):
 
 
 
 
 
420
  with gr.Row():
421
+ with gr.Column(scale=1):
422
+ gr.Markdown("## πŸ“„ Text Analysis")
423
+
424
+ pdf_file_text = gr.File(
425
+ label="Upload PDF Document",
426
+ file_types=[".pdf"]
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  )
428
+
429
+ with gr.Row():
430
+ summarize_btn = gr.Button("πŸ“‹ Generate Text Summary", variant="primary", size="lg")
431
+ clear_text_btn = gr.Button("πŸ—‘οΈ Clear All", variant="secondary")
432
 
433
+ with gr.Column(scale=2):
434
+ gr.Markdown("## πŸ“Š Text Analysis Results")
435
+ status_text_output = gr.Textbox(
436
+ label="Status",
437
  interactive=False,
438
+ show_label=True
439
+ )
440
+
441
+ with gr.Tabs():
442
+ with gr.TabItem("πŸ“ Summary"):
443
+ summary_output = gr.Textbox(
444
+ label="AI Generated Summary",
445
+ lines=15,
446
+ interactive=False,
447
+ placeholder="Your PDF summary will appear here...",
448
+ show_copy_button=True
449
+ )
450
+
451
+ with gr.TabItem("πŸ“„ Extracted Text Preview"):
452
+ text_preview_output = gr.Textbox(
453
+ label="Extracted Text (First 500 characters)",
454
+ lines=10,
455
+ interactive=False,
456
+ placeholder="Preview of extracted text will appear here...",
457
+ show_copy_button=True
458
+ )
459
+
460
+ # Image Analysis Tab
461
+ with gr.TabItem("πŸ–ΌοΈ Image Analysis", elem_id="image-tab"):
462
+ with gr.Row():
463
+ with gr.Column(scale=1):
464
+ gr.Markdown("## πŸ” Visual Analysis")
465
+
466
+ pdf_file_image = gr.File(
467
+ label="Upload PDF Document",
468
+ file_types=[".pdf"]
469
+ )
470
+
471
+ analysis_method = gr.Radio(
472
+ choices=[
473
+ "Extract embedded images only",
474
+ "Full page analysis",
475
+ "Both (embedded + full pages)"
476
+ ],
477
+ value="Full page analysis",
478
+ label="Analysis Method",
479
+ info="Choose how to analyze the PDF content"
480
+ )
481
+
482
+ with gr.Row():
483
+ analyze_images_btn = gr.Button("πŸ” Analyze Images", variant="primary", size="lg")
484
+ clear_image_btn = gr.Button("πŸ—‘οΈ Clear All", variant="secondary")
485
+
486
+ with gr.Column(scale=2):
487
+ gr.Markdown("## πŸ“Š Image Analysis Results")
488
+ image_analysis_output = gr.Textbox(
489
+ label="Visual Analysis Results",
490
+ lines=20,
491
+ max_lines=50,
492
+ show_copy_button=True,
493
+ placeholder="Image analysis results will appear here..."
494
  )
495
 
496
  # Usage instructions
497
  gr.HTML("""
498
  <div class="info-box">
499
  <strong>πŸ“‹ Usage Instructions:</strong><br>
500
+ <h4>For Text Summary:</h4>
501
+ 1. Enter your Groq API key above<br>
502
+ 2. Go to "Text Summary" tab<br>
503
+ 3. Upload a PDF document<br>
504
+ 4. Click "Generate Text Summary"<br>
505
+
506
+ <h4>For Image Analysis:</h4>
507
+ 1. Enter your Groq API key above<br>
508
+ 2. Go to "Image Analysis" tab<br>
509
+ 3. Upload a PDF document<br>
510
+ 4. Choose analysis method<br>
511
+ 5. Click "Analyze Images"<br>
512
+
513
+ <h4>Analysis Methods:</h4>
514
+ β€’ <strong>Extract embedded images:</strong> Analyzes only images embedded within the PDF<br>
515
+ β€’ <strong>Full page analysis:</strong> Converts each page to image for comprehensive analysis (recommended)<br>
516
+ β€’ <strong>Both:</strong> Combines both methods for maximum coverage
517
  </div>
518
  """)
519
 
 
521
  gr.HTML("""
522
  <div style="margin-top: 2rem; padding: 1rem; background-color: #e9ecef; border-radius: 0.5rem;">
523
  <strong>πŸ€– Model Information:</strong><br>
524
+ β€’ <strong>Text Analysis:</strong> Llama-3.3-70B-Versatile (optimized for summarization)<br>
525
+ β€’ <strong>Image Analysis:</strong> Llama-3.3-70B-Versatile (with vision capabilities)<br>
526
+ β€’ <strong>Temperature:</strong> 0.2-0.3 (focused, factual analysis)<br>
527
+ β€’ <strong>Max Tokens:</strong> 2048 (comprehensive outputs)
528
  </div>
529
  """)
530
 
531
+ # Event handlers for text summary
532
  summarize_btn.click(
533
+ fn=process_pdf_text_summary,
534
+ inputs=[api_key_input, pdf_file_text],
535
+ outputs=[status_text_output, text_preview_output, summary_output],
536
+ show_progress=True
537
+ )
538
+
539
+ clear_text_btn.click(
540
+ fn=clear_all_text,
541
+ outputs=[api_key_input, pdf_file_text, status_text_output, text_preview_output, summary_output]
542
+ )
543
+
544
+ # Event handlers for image analysis
545
+ analyze_images_btn.click(
546
+ fn=process_pdf_image_analysis,
547
+ inputs=[api_key_input, pdf_file_image, analysis_method],
548
+ outputs=[image_analysis_output],
549
  show_progress=True
550
  )
551
 
552
+ clear_image_btn.click(
553
+ fn=clear_all_image,
554
+ outputs=[api_key_input, pdf_file_image, analysis_method, image_analysis_output]
555
  )
556
 
557
  # Launch the app
558
  if __name__ == "__main__":
559
+ print("πŸš€ Starting Advanced PDF Analyzer App...")
560
  print("πŸ“‹ Make sure you have the required packages installed:")
561
+ print(" pip install -r requirements.txt")
562
  print("\nπŸ”‘ Don't forget to get your Groq API key from: https://console.groq.com/")
563
 
564
  demo.launch(