File size: 3,858 Bytes
7da5dc8
f5e862d
 
 
 
c7d8cb8
 
 
 
 
 
 
3879030
f5e862d
 
3879030
 
 
 
 
 
 
 
 
 
 
f5e862d
 
 
 
 
 
 
7da5dc8
abced76
 
 
7da5dc8
3879030
 
 
 
 
 
 
a23b73c
3879030
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a23b73c
 
abced76
4eb0203
abced76
 
 
 
137c997
abced76
4d564f8
 
abced76
 
 
137c997
abced76
 
 
137c997
abced76
 
 
 
 
7da5dc8
 
 
4eb0203
 
 
 
 
 
 
 
 
 
7da5dc8
4eb0203
 
 
 
7da5dc8
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import gradio as gr
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

from marker.convert import convert_single_pdf
from marker.output import markdown_exists, save_markdown, get_markdown_filepath
from marker.pdf.utils import find_filetype
from marker.pdf.extract_text import get_length_of_text
from marker.models import load_all_models
from marker.settings import settings
from marker.logger import configure_logging
import traceback


configure_logging()
MAX_PAGES = 20
MIN_LENGTH=200
settings.EXTRACT_IMAGES = False
settings.DEBUG = False

model_refs = load_all_models()
metadata = {}


model_name = "maxidl/arena-test"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)


title = "# Placeholder Title"
steps = """Placeholder Description"""
# steps = """1. Converts uploaded pdf file to markdown. You can edit the intermediate markdown output.\n2. Generates a review for the paper"""

@spaces.GPU(duration=60)
def convert_file(filepath):
    full_text, images, out_metadata = convert_single_pdf(
            filepath, model_refs, metadata=metadata, max_pages=MAX_PAGES
    )
    return full_text.strip()

def process_file(file):
    print(file.name)
    filepath = file.name
    try:
        if MIN_LENGTH:
            filetype = find_filetype(filepath)
            if filetype == "other":
                raise ValueError()

            length = get_length_of_text(filepath)
            if length < MIN_LENGTH:
                raise ValueError()
        paper_text = convert_file(filepath)
        if not len(paper_text) > MIN_LENGTH:
            raise ValueError()
    except Exception as e:
        print(traceback.format_exc())
        print(f"Error converting {filepath}: {e}")
        return "Error processing pdf"
    return paper_text


@spaces.GPU(duration=60)
def generate(paper_text):
    messages = [
        {"role": "system", "content": "You are a pirate."},
        {"role": "user", "content": paper_text}
    ]
    input_ids = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors='pt'
    ).to(model.device)

    generated_ids = model.generate(
        input_ids=input_ids,
        max_new_tokens=256
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(input_ids, generated_ids)
    ]
    
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return response
    # return "Success"



with gr.Blocks() as demo:
    title = gr.Markdown(title)
    steps = gr.Markdown(steps)
    instr = gr.Markdown("## Upload your paper as a pdf file")
    file_input = gr.File(file_types=[".pdf"], file_count="single")
    markdown_field = gr.Textbox(label="Markdown", max_lines=20, autoscroll=False)
    # generate_button = gr.Button("Generate Review", interactive=not markdown_field)
    generate_button = gr.Button("Generate Review")
    file_input.upload(process_file, file_input, markdown_field)
    # markdown_field.change(lambda text: gr.update(interactive=True) if len(text) > 1000 else gr.update(interactive=False), markdown_field, generate_button)

    review_field = gr.Markdown(label="Review")
    # generate_button.click(fn=lambda: gr.update(interactive=False), inputs=None, outputs=generate_button).then(generate, markdown_field, review_field).then(fn=lambda: gr.update(interactive=True), inputs=None, outputs=generate_button)
    generate_button.click(fn=lambda: gr.update(interactive=False), inputs=None, outputs=generate_button).then(generate, markdown_field, review_field).then(fn=lambda: gr.update(interactive=True), inputs=None, outputs=generate_button)
    demo.title = "Paper Review Generator"



if __name__ == "__main__":
    demo.launch()