Daniil Larionov commited on
Commit
4d86d72
·
1 Parent(s): 48cd443
Files changed (3) hide show
  1. .gitignore +10 -0
  2. app.py +21 -0
  3. requirements.txt +60 -0
.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ignore Python cache files
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+
7
+ # Ignore flagged folder
8
+ flagged/
9
+
10
+ # Add other files or directories you want to ignore below
app.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pymupdf4llm as pdf
3
+
4
+ with gr.Blocks(theme="soft", analytics_enabled=True, title="PDF to Text") as demo:
5
+ gr.Markdown("""
6
+ # PDF to Text
7
+ Convert your PDF files to text with ease.
8
+ """)
9
+
10
+ pdf_file = gr.File(label="Upload your PDF file")
11
+ text_output = gr.Code(label="Extracted text", language="markdown")
12
+
13
+ button = gr.Button("Extract text")
14
+
15
+ def extract_text(file):
16
+ return pdf.to_markdown(file)
17
+
18
+ button.click(extract_text, inputs=pdf_file, outputs=text_output)
19
+
20
+ demo.launch()
21
+
requirements.txt ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ annotated-types==0.7.0
3
+ anyio==4.6.0
4
+ certifi==2024.8.30
5
+ charset-normalizer==3.3.2
6
+ click==8.1.7
7
+ contourpy==1.3.0
8
+ cycler==0.12.1
9
+ fastapi==0.115.0
10
+ ffmpy==0.4.0
11
+ filelock==3.16.1
12
+ fonttools==4.54.1
13
+ fsspec==2024.9.0
14
+ gradio==4.44.1
15
+ gradio_client==1.3.0
16
+ h11==0.14.0
17
+ httpcore==1.0.6
18
+ httpx==0.27.2
19
+ huggingface-hub==0.25.1
20
+ idna==3.10
21
+ importlib_resources==6.4.5
22
+ Jinja2==3.1.4
23
+ kiwisolver==1.4.7
24
+ markdown-it-py==3.0.0
25
+ MarkupSafe==2.1.5
26
+ matplotlib==3.9.2
27
+ mdurl==0.1.2
28
+ numpy==2.1.1
29
+ orjson==3.10.7
30
+ packaging==24.1
31
+ pandas==2.2.3
32
+ pillow==10.4.0
33
+ pydantic==2.9.2
34
+ pydantic_core==2.23.4
35
+ pydub==0.25.1
36
+ Pygments==2.18.0
37
+ PyMuPDF==1.24.10
38
+ pymupdf4llm==0.0.17
39
+ PyMuPDFb==1.24.10
40
+ pyparsing==3.1.4
41
+ python-dateutil==2.9.0.post0
42
+ python-multipart==0.0.12
43
+ pytz==2024.2
44
+ PyYAML==6.0.2
45
+ requests==2.32.3
46
+ rich==13.9.1
47
+ ruff==0.6.8
48
+ semantic-version==2.10.0
49
+ shellingham==1.5.4
50
+ six==1.16.0
51
+ sniffio==1.3.1
52
+ starlette==0.38.6
53
+ tomlkit==0.12.0
54
+ tqdm==4.66.5
55
+ typer==0.12.5
56
+ typing_extensions==4.12.2
57
+ tzdata==2024.2
58
+ urllib3==2.2.3
59
+ uvicorn==0.31.0
60
+ websockets==12.0