Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,858 Bytes
7da5dc8 f5e862d c7d8cb8 3879030 f5e862d 3879030 f5e862d 7da5dc8 abced76 7da5dc8 3879030 a23b73c 3879030 a23b73c abced76 4eb0203 abced76 137c997 abced76 4d564f8 abced76 137c997 abced76 137c997 abced76 7da5dc8 4eb0203 7da5dc8 4eb0203 7da5dc8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import gradio as gr
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from marker.convert import convert_single_pdf
from marker.output import markdown_exists, save_markdown, get_markdown_filepath
from marker.pdf.utils import find_filetype
from marker.pdf.extract_text import get_length_of_text
from marker.models import load_all_models
from marker.settings import settings
from marker.logger import configure_logging
import traceback
configure_logging()
MAX_PAGES = 20
MIN_LENGTH=200
settings.EXTRACT_IMAGES = False
settings.DEBUG = False
model_refs = load_all_models()
metadata = {}
model_name = "maxidl/arena-test"
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
title = "# Placeholder Title"
steps = """Placeholder Description"""
# steps = """1. Converts uploaded pdf file to markdown. You can edit the intermediate markdown output.\n2. Generates a review for the paper"""
@spaces.GPU(duration=60)
def convert_file(filepath):
full_text, images, out_metadata = convert_single_pdf(
filepath, model_refs, metadata=metadata, max_pages=MAX_PAGES
)
return full_text.strip()
def process_file(file):
print(file.name)
filepath = file.name
try:
if MIN_LENGTH:
filetype = find_filetype(filepath)
if filetype == "other":
raise ValueError()
length = get_length_of_text(filepath)
if length < MIN_LENGTH:
raise ValueError()
paper_text = convert_file(filepath)
if not len(paper_text) > MIN_LENGTH:
raise ValueError()
except Exception as e:
print(traceback.format_exc())
print(f"Error converting {filepath}: {e}")
return "Error processing pdf"
return paper_text
@spaces.GPU(duration=60)
def generate(paper_text):
messages = [
{"role": "system", "content": "You are a pirate."},
{"role": "user", "content": paper_text}
]
input_ids = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors='pt'
).to(model.device)
generated_ids = model.generate(
input_ids=input_ids,
max_new_tokens=256
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
return response
# return "Success"
with gr.Blocks() as demo:
title = gr.Markdown(title)
steps = gr.Markdown(steps)
instr = gr.Markdown("## Upload your paper as a pdf file")
file_input = gr.File(file_types=[".pdf"], file_count="single")
markdown_field = gr.Textbox(label="Markdown", max_lines=20, autoscroll=False)
# generate_button = gr.Button("Generate Review", interactive=not markdown_field)
generate_button = gr.Button("Generate Review")
file_input.upload(process_file, file_input, markdown_field)
# markdown_field.change(lambda text: gr.update(interactive=True) if len(text) > 1000 else gr.update(interactive=False), markdown_field, generate_button)
review_field = gr.Markdown(label="Review")
# generate_button.click(fn=lambda: gr.update(interactive=False), inputs=None, outputs=generate_button).then(generate, markdown_field, review_field).then(fn=lambda: gr.update(interactive=True), inputs=None, outputs=generate_button)
generate_button.click(fn=lambda: gr.update(interactive=False), inputs=None, outputs=generate_button).then(generate, markdown_field, review_field).then(fn=lambda: gr.update(interactive=True), inputs=None, outputs=generate_button)
demo.title = "Paper Review Generator"
if __name__ == "__main__":
demo.launch()
|