sachin commited on
Commit
6ddaa39
·
1 Parent(s): 86c296c
Files changed (1) hide show
  1. app.py +23 -9
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio as gr
2
  import requests
3
  from PyPDF2 import PdfReader
4
- import io
5
  import os
6
 
7
  # Function to validate PDF file
@@ -20,11 +19,11 @@ def is_valid_pdf(file_path):
20
  return False, f"Invalid PDF: {str(e)}"
21
 
22
  # Function to send the POST request to the API
23
- def extract_text_from_pdf(pdf_file, page_number):
24
  if not pdf_file:
25
  return "Error: No file uploaded. Please upload a PDF file."
26
 
27
- # Validate the PDF using the file path
28
  valid, message = is_valid_pdf(pdf_file)
29
  if not valid:
30
  return f"Error: {message}. Please upload a valid PDF file or repair the current one."
@@ -39,9 +38,9 @@ def extract_text_from_pdf(pdf_file, page_number):
39
  }
40
  data = {
41
  "page_number": str(page_number),
42
- "src_lang": "eng_Latn",
43
- "tgt_lang": "eng_Latn",
44
- "prompt": "describe the image"
45
  }
46
 
47
  # Headers
@@ -68,14 +67,29 @@ with gr.Blocks(title="PDF Content Description") as demo:
68
  gr.Markdown("# PDF Content Description Extractor")
69
  gr.Markdown(
70
  """
71
- Upload a PDF file (e.g., Dhwani-AI-Pitch-Europe.pdf) and specify a page number to extract a description of its content.
72
- The API will analyze the page and return a textual description, such as details about images, text, or layout.
73
  """
74
  )
75
 
76
  # Input components
77
  pdf_input = gr.File(label="Upload PDF File", file_types=[".pdf"], type="filepath")
78
  page_number_input = gr.Number(label="Page Number", value=1, precision=0, minimum=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  # Submit button
81
  submit_button = gr.Button("Extract Description")
@@ -90,7 +104,7 @@ with gr.Blocks(title="PDF Content Description") as demo:
90
  # Connect the button to the function
91
  submit_button.click(
92
  fn=extract_text_from_pdf,
93
- inputs=[pdf_input, page_number_input],
94
  outputs=output_text
95
  )
96
 
 
1
  import gradio as gr
2
  import requests
3
  from PyPDF2 import PdfReader
 
4
  import os
5
 
6
  # Function to validate PDF file
 
19
  return False, f"Invalid PDF: {str(e)}"
20
 
21
  # Function to send the POST request to the API
22
+ def extract_text_from_pdf(pdf_file, page_number, src_lang, tgt_lang, prompt):
23
  if not pdf_file:
24
  return "Error: No file uploaded. Please upload a PDF file."
25
 
26
+ # Validate the PDF
27
  valid, message = is_valid_pdf(pdf_file)
28
  if not valid:
29
  return f"Error: {message}. Please upload a valid PDF file or repair the current one."
 
38
  }
39
  data = {
40
  "page_number": str(page_number),
41
+ "src_lang": src_lang,
42
+ "tgt_lang": tgt_lang,
43
+ "prompt": prompt
44
  }
45
 
46
  # Headers
 
67
  gr.Markdown("# PDF Content Description Extractor")
68
  gr.Markdown(
69
  """
70
+ Upload a PDF file (e.g., Dhwani-AI-Pitch-Europe.pdf) and specify parameters to extract a description of its content.
71
+ The API will analyze the page and return a textual description based on the provided prompt and languages.
72
  """
73
  )
74
 
75
  # Input components
76
  pdf_input = gr.File(label="Upload PDF File", file_types=[".pdf"], type="filepath")
77
  page_number_input = gr.Number(label="Page Number", value=1, precision=0, minimum=1)
78
+ src_lang_input = gr.Textbox(
79
+ label="Source Language",
80
+ value="eng_Latn",
81
+ placeholder="Enter source language (e.g., eng_Latn)"
82
+ )
83
+ tgt_lang_input = gr.Textbox(
84
+ label="Target Language",
85
+ value="eng_Latn",
86
+ placeholder="Enter target language (e.g., eng_Latn)"
87
+ )
88
+ prompt_input = gr.Textbox(
89
+ label="Prompt",
90
+ value="describe the image",
91
+ placeholder="Enter prompt (e.g., describe the image)"
92
+ )
93
 
94
  # Submit button
95
  submit_button = gr.Button("Extract Description")
 
104
  # Connect the button to the function
105
  submit_button.click(
106
  fn=extract_text_from_pdf,
107
+ inputs=[pdf_input, page_number_input, src_lang_input, tgt_lang_input, prompt_input],
108
  outputs=output_text
109
  )
110