Spaces:
Running
Running
sachin
commited on
Commit
·
6ea2bcc
1
Parent(s):
9d2f56c
add-pdf-chat
Browse files
app.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import requests
|
3 |
+
|
4 |
+
# Function to send the POST request to the API
|
5 |
+
def extract_text_from_pdf(pdf_file, page_number):
|
6 |
+
# API endpoint
|
7 |
+
url = "http://209.20.158.215:7861/extract-text-eng/"
|
8 |
+
|
9 |
+
# Prepare the payload
|
10 |
+
files = {
|
11 |
+
"file": (pdf_file.name, pdf_file, "application/pdf")
|
12 |
+
}
|
13 |
+
data = {
|
14 |
+
"page_number": str(page_number),
|
15 |
+
"src_lang": "eng_Latn",
|
16 |
+
"tgt_lang": "eng_Latn",
|
17 |
+
"prompt": "describe the image"
|
18 |
+
}
|
19 |
+
|
20 |
+
# Headers
|
21 |
+
headers = {
|
22 |
+
"accept": "application/json"
|
23 |
+
}
|
24 |
+
|
25 |
+
try:
|
26 |
+
# Send the POST request
|
27 |
+
response = requests.post(url, files=files, data=data, headers=headers)
|
28 |
+
|
29 |
+
# Check if the request was successful
|
30 |
+
if response.status_code == 200:
|
31 |
+
return response.json().get("result", "No result returned from API")
|
32 |
+
else:
|
33 |
+
return f"Error: {response.status_code} - {response.text}"
|
34 |
+
except Exception as e:
|
35 |
+
return f"Error: Failed to connect to the API - {str(e)}"
|
36 |
+
|
37 |
+
# Gradio interface
|
38 |
+
with gr.Blocks(title="PDF Text Extraction") as demo:
|
39 |
+
gr.Markdown("# Extract Text from PDF and Describe Content")
|
40 |
+
|
41 |
+
# Input components
|
42 |
+
pdf_input = gr.File(label="Upload PDF File", file_types=[".pdf"])
|
43 |
+
page_number_input = gr.Number(label="Page Number", value=1, precision=0, minimum=1)
|
44 |
+
|
45 |
+
# Submit button
|
46 |
+
submit_button = gr.Button("Extract and Describe")
|
47 |
+
|
48 |
+
# Output component
|
49 |
+
output_text = gr.Textbox(label="API Response", lines=10)
|
50 |
+
|
51 |
+
# Connect the button to the function
|
52 |
+
submit_button.click(
|
53 |
+
fn=extract_text_from_pdf,
|
54 |
+
inputs=[pdf_input, page_number_input],
|
55 |
+
outputs=output_text
|
56 |
+
)
|
57 |
+
|
58 |
+
# Launch the Gradio app
|
59 |
+
demo.launch()
|