Spaces:
Running
on
Zero
Running
on
Zero
app.py
CHANGED
@@ -10,9 +10,20 @@ from marker.pdf.extract_text import get_length_of_text
|
|
10 |
from marker.models import load_all_models
|
11 |
from marker.settings import settings
|
12 |
from marker.logger import configure_logging
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
model_name = "maxidl/arena-test"
|
15 |
|
|
|
16 |
model = AutoModelForCausalLM.from_pretrained(
|
17 |
model_name,
|
18 |
torch_dtype=torch.bfloat16,
|
@@ -25,8 +36,33 @@ title = "# Placeholder Title"
|
|
25 |
steps = """Placeholder Description"""
|
26 |
# steps = """1. Converts uploaded pdf file to markdown. You can edit the intermediate markdown output.\n2. Generates a review for the paper"""
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
def process_file(file):
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
|
32 |
@spaces.GPU(duration=60)
|
|
|
10 |
from marker.models import load_all_models
|
11 |
from marker.settings import settings
|
12 |
from marker.logger import configure_logging
|
13 |
+
import traceback
|
14 |
+
|
15 |
+
|
16 |
+
configure_logging()
|
17 |
+
MAX_PAGES = 20
|
18 |
+
MIN_LENGTH=200
|
19 |
+
settings.EXTRACT_IMAGES = False
|
20 |
+
settings.DEBUG = False
|
21 |
+
|
22 |
+
model_refs = load_all_models()
|
23 |
+
metadata = {}
|
24 |
|
|
|
25 |
|
26 |
+
model_name = "maxidl/arena-test"
|
27 |
model = AutoModelForCausalLM.from_pretrained(
|
28 |
model_name,
|
29 |
torch_dtype=torch.bfloat16,
|
|
|
36 |
steps = """Placeholder Description"""
|
37 |
# steps = """1. Converts uploaded pdf file to markdown. You can edit the intermediate markdown output.\n2. Generates a review for the paper"""
|
38 |
|
39 |
+
@spaces.GPU(duration=60)
|
40 |
+
def convert_file(filepath):
|
41 |
+
full_text, images, out_metadata = convert_single_pdf(
|
42 |
+
filepath, model_refs, metadata=metadata, max_pages=MAX_PAGES
|
43 |
+
)
|
44 |
+
return full_text.strip()
|
45 |
+
|
46 |
def process_file(file):
|
47 |
+
print(file.name)
|
48 |
+
filepath = file.name
|
49 |
+
try:
|
50 |
+
if MIN_LENGTH:
|
51 |
+
filetype = find_filetype(filepath)
|
52 |
+
if filetype == "other":
|
53 |
+
raise ValueError()
|
54 |
+
|
55 |
+
length = get_length_of_text(filepath)
|
56 |
+
if length < MIN_LENGTH:
|
57 |
+
raise ValueError()
|
58 |
+
paper_text = convert_file(filepath)
|
59 |
+
if not len(paper_text) > MIN_LENGTH:
|
60 |
+
raise ValueError()
|
61 |
+
except Exception as e:
|
62 |
+
print(traceback.format_exc())
|
63 |
+
print(f"Error converting {filepath}: {e}")
|
64 |
+
return "Error processing pdf"
|
65 |
+
return paper_text
|
66 |
|
67 |
|
68 |
@spaces.GPU(duration=60)
|