merterbak commited on
Commit
842fe9d
·
verified ·
1 Parent(s): 8858f7f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -0
app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ from io import BytesIO
4
+ import gradio as gr
5
+ from mistralai import Mistral
6
+ from PIL import Image
7
+ from pathlib import Path
8
+
9
+ api_key = os.environ.get("MISTRAL")
10
+ client = Mistral(api_key=api_key)
11
+
12
+ #config
13
+ VALID_DOCUMENT_EXTENSIONS = {".pdf"}
14
+ VALID_IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png",}
15
+
16
+ def upload_pdf(content, filename):
17
+ uploaded_file = client.files.upload(
18
+ file={"file_name": filename, "content": content},
19
+ purpose="ocr",
20
+ )
21
+ signed_url = client.files.get_signed_url(file_id=uploaded_file.id)
22
+ return signed_url.url
23
+ def process_ocr(document_source):
24
+ return client.ocr.process(
25
+ model="mistral-ocr-latest",
26
+ document=document_source,
27
+ include_image_base64=True
28
+ )
29
+ def do_ocr(input_type, url=None, file=None):
30
+ document_source = None
31
+
32
+ if input_type == "URL":
33
+ if not url or url.strip() == "":
34
+ return "Please provide a valid URL.", "", []
35
+ url_lower = url.lower()
36
+ if any(url_lower.endswith(ext) for ext in VALID_IMAGE_EXTENSIONS):
37
+ document_source = {"type": "image_url", "image_url": url.strip()}
38
+ else:
39
+ document_source = {"type": "document_url", "document_url": url.strip()}
40
+
41
+ elif input_type == "Upload file":
42
+ if not file:
43
+ return "Please upload a file.", "", []
44
+ file_name = file.name.lower()
45
+ file_extension = os.path.splitext(file_name)[1]
46
+ if file_extension in VALID_DOCUMENT_EXTENSIONS:
47
+ with open(file.name, "rb") as f:
48
+ content = f.read()
49
+ signed_url = upload_pdf(content, os.path.basename(file_name))
50
+ document_source = {"type": "document_url", "document_url": signed_url}
51
+ elif file_extension in VALID_IMAGE_EXTENSIONS:
52
+ img = Image.open(file)
53
+ buffered = BytesIO()
54
+ img.save(buffered, format="PNG")
55
+ img_str = base64.b64encode(buffered.getvalue()).decode()
56
+ document_source = {"type": "image_url", "image_url": f"data:image/png;base64,{img_str}"}
57
+ else:
58
+ return f"Error: Unsupported file type. Supported types: {', '.join(VALID_DOCUMENT_EXTENSIONS | VALID_IMAGE_EXTENSIONS)}", "", []
59
+
60
+ else:
61
+ return "Invalid input type ", "", []
62
+
63
+ ocr_response = process_ocr(document_source)
64
+ markdown_text = "\n\n".join(page.markdown for page in ocr_response.pages)
65
+ extracted_text = markdown_text
66
+ rendered_markdown = markdown_text
67
+ images = []
68
+
69
+ for page in ocr_response.pages:
70
+ for img in page.images:
71
+ if img.image_base64:
72
+ base64_str = img.image_base64
73
+ if "," in base64_str:
74
+ base64_str = base64_str.split(",")[1]
75
+ img_bytes = base64.b64decode(base64_str)
76
+ img_pil = Image.open(BytesIO(img_bytes))
77
+ images.append(img_pil)
78
+ img_buffer = BytesIO()
79
+ img_pil.save(img_buffer, format="PNG")
80
+ img_base64 = base64.b64encode(img_buffer.getvalue()).decode()
81
+ data_url = f"data:image/png;base64,{img_base64}"
82
+ rendered_markdown = rendered_markdown.replace(
83
+ f"![{img.id}]({img.id})", f"![{img.id}]({data_url})"
84
+ )
85
+ else:
86
+ rendered_markdown += f"\n\n[Image Warning: No base64 data for {img.id}]"
87
+
88
+ return extracted_text.strip(), rendered_markdown.strip(), images
89
+
90
+ custom_css = """
91
+ body {font-family: body {font-family: 'Helvetica Neue', Helvetica;}
92
+ .gr-button {background-color: #4CAF50; color: white; border: none; padding: 10px 20px; border-radius: 5px;}
93
+ .gr-button:hover {background-color: #45a049;}
94
+ .gr-textbox {margin-bottom: 15px;}
95
+ .example-button {background-color: #1E90FF; color: white; border: none; padding: 8px 15px; border-radius: 5px; margin: 5px;}
96
+ .example-button:hover {background-color: #FF4500;}
97
+ .tall-radio .gr-radio-item {padding: 15px 0; min-height: 50px; display: flex; align-items: center;}
98
+ .tall-radio label {font-size: 16px;}
99
+ """
100
+ with gr.Blocks(
101
+ title="Mistral OCR Demo",
102
+ css=custom_css,
103
+ theme=gr.themes.Soft()
104
+ ) as demo:
105
+ gr.Markdown("<h1 style='text-align: center; color: #333;'>Mistral OCR Demo</h1>")
106
+ gr.Markdown("<p style='text-align: center; color: #666;'>Extract text and images from PDFs or images using Mistral's latest OCR model. You can also see markdown live.</p>")
107
+
108
+ with gr.Row():
109
+ with gr.Column(scale=1):
110
+ input_type = gr.Radio(
111
+ choices=["URL", "Upload file"],
112
+ label="Input Type",
113
+ value="URL",
114
+ elem_classes="tall-radio"
115
+ )
116
+ url_input = gr.Textbox(
117
+ label="Document or Image URL",
118
+ placeholder="e.g., https://arxiv.org/pdf/2501.12948",
119
+ visible=True,
120
+ lines=1
121
+ )
122
+ file_input = gr.File(
123
+ label="Upload PDF or Image",
124
+ file_types=[".pdf", ".jpg", ".jpeg", ".png"],
125
+ visible=False
126
+ )
127
+ submit_btn = gr.Button("Extract Text and Images")
128
+ gr.Markdown("### Try These Examples")
129
+ pdf_example = gr.Button("PDF", elem_classes="example-button")
130
+ img_example = gr.Button("Image", elem_classes="example-button")
131
+ with gr.Column(scale=2):
132
+ cleaned_output = gr.Textbox(label="Extracted Plain Text", lines=10, show_copy_button=True)
133
+ markdown_output = gr.Markdown(label="Rendered Markdown Text")
134
+ image_output = gr.Gallery(label="OCR Extracted Images", columns=2, height="auto")
135
+
136
+ def update_visibility(choice):
137
+ return gr.update(visible=(choice == "URL")), gr.update(visible=(choice == "Upload file"))
138
+ input_type.change(fn=update_visibility, inputs=input_type, outputs=[url_input, file_input])
139
+ def set_url_and_type(url):
140
+ return url, "URL"
141
+
142
+ pdf_example.click(
143
+ fn=lambda: set_url_and_type("https://arxiv.org/pdf/2501.12948"),
144
+ outputs=[url_input, input_type]
145
+ )
146
+ img_example.click(
147
+ fn=lambda: set_url_and_type("https://huggingface.co/datasets/agents-course/course-images/resolve/main/en/unit0/recommended-pace.jpg"),
148
+ outputs=[url_input, input_type]
149
+ )
150
+
151
+ submit_btn.click(
152
+ fn=do_ocr,
153
+ inputs=[input_type, url_input, file_input],
154
+ outputs=[cleaned_output, markdown_output, image_output]
155
+ )
156
+
157
+ if __name__ == "__main__":
158
+ demo.launch()