File size: 6,437 Bytes
f8ccd9c
edd57c3
9634f2d
d391eaa
9634f2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edd57c3
cab36bf
 
 
 
 
de8d8bf
fd188ea
9634f2d
fd188ea
 
9634f2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455edde
 
 
 
9652e54
455edde
 
 
 
1f0d187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455edde
 
 
 
9634f2d
1c7e415
 
 
bc59001
 
 
 
d391eaa
9634f2d
d391eaa
 
 
9634f2d
 
 
1c7e415
fd188ea
9634f2d
 
 
 
 
 
 
 
 
8da6c61
9634f2d
8da6c61
 
455edde
5054699
55bc6eb
5054699
 
 
 
15fc5af
770532b
 
 
bc59001
55bc6eb
d7f18f1
55bc6eb
 
 
15f60af
 
 
5054699
 
cab36bf
de8d8bf
cab36bf
455edde
 
15fc5af
cab36bf
15fc5af
 
59fb81e
 
8da6c61
59fb81e
d9efbf0
 
f8ccd9c
fd188ea
1c7e415
f8ccd9c
8da6c61
cab36bf
fd188ea
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from gradio_client import Client
import gradio as gr
import os, uuid, json, random, time
import datetime
from huggingface_hub import hf_api, CommitScheduler, HfApi
from pathlib import Path

feedback_file = Path("output_data/") / f"output_{uuid.uuid4()}.json"
feedback_folder = feedback_file.parent

scheduler = CommitScheduler(
    repo_id="eth-zurich-cle/deckify-dataset",
    repo_type="dataset",
    folder_path=feedback_folder,
    path_in_repo="output_data",
    every=10,
)

# scheduler = CommitScheduler(
#     repo_id="eth-zurich-cle/deckify-dataset",
#     repo_type="dataset",
#     folder_path=feedback_folder,
#     path_in_repo="input_data",
#     every=10,
# )

api = HfApi()

def check_password(username, password):
    if password == os.environ["ACCESS"]:
        return True
    else:
        return False

def func(file, number_of_pages, secret):
    
    if secret != os.environ["ACCESS"]:
        return "Wrong password, please try again"

    date_string = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    
    print(file)
    unique_filename = f"{file.split('.')[0]}_{date_string}.{file.split('.')[-1]}"
    
    print(unique_filename)
    
    api.upload_file(
        # path_or_fileobj="/path/to/local/folder/README.md",
        path_or_fileobj=file,
        path_in_repo=f"input_files/{unique_filename}",
        repo_id="eth-zurich-cle/deckify-dataset",
        repo_type="dataset",
    )
    
    
    space_runtime = hf_api.get_space_runtime("ByMatthew/deckify_private", token=read_key)
    print(f"Space runtime: {space_runtime}")
    
    if not space_runtime.stage == "RUNNING": # might need to check lowercase or something
        
        space_runtime_after_restart = hf_api.restart_space("ByMatthew/deckify_private", token=read_key)
        print(f"Space runtime after restart: {space_runtime_after_restart}")
        
        
        max_retries = 20
        retry_delay = 10
        success = False
        for i in range(max_retries):
            space_runtime = hf_api.get_space_runtime("ByMatthew/deckify_private", token=read_key)
            print(f"Space runtime: {space_runtime}")
            if space_runtime.stage == "RUNNING":
                success = True
                break
            time.sleep(retry_delay)
        
        if not success:
            return "Failed to start the private space in time. Please try again later."
        
    client = Client("ByMatthew/deckify_private", hf_token=read_key)
    
    print(f"Client: {client}")


    output = client.predict(file, number_of_pages)
    if "Error" in output:
        return output   
    # generate a random sequence of numbers
    # s = "".join([str(random.randint(0, 9)) for i in range(10)])
    # with open(f"{s}.tex", "w", encoding="utf-8") as f:
    #     f.write(text)

    temp_string = "% The following slides are generated with [[SCIDECK]](https://huggingface.co/spaces/ByMatthew/Scideck)"
    temp_string += "\n% Generated on " + date_string
    temp_string += "\n%" + "-"*100 + "\n"
    output = temp_string + output 
    
    save_output(unique_filename, output, number_of_pages, date_string)
    
    return output

def save_output(unique_filename: str, output: str, num_pages:int, date_string: str) -> None:

    # Append outputs and using a thread lock to avoid concurrent writes from different users.
    with scheduler.lock:
        with feedback_file.open("a") as f:
            f.write(json.dumps({"input_name": unique_filename, "output": output, "num_pages": num_pages, "timestamp": date_string}))
            f.write("\n")


def upload_file(file):
    print(file)
    return file.name

# 📝 If you get an error message, you can send me email with the PDF file attached to this email address: <b>nkoisheke [at] ethz [dot] ch</b>, and I will generate the slides for you. If there are any other issues or questions, please do not hesitate to contact me 🤗 <br>
description = r"""
<h3> SCIDECK is a tool that allows you to convert your PDF files into a presentation deck.</h3>
<br>

❗️❗️❗️[<b>Important</b>] Instructions:<br>
1️⃣ <b>Upload the PDF document</b>: Select the PDF file you want to convert into slides.<br>
2️⃣ <b>Specify the number of pages</b>: Indicate the range of pages you'd like to include in the slide generation. <b>Set it to 0</b> if you want to include all pages. <br>
3️⃣ <b>Enter the password provided in the invite email.</b><br>
4️⃣ <b>Click the Generate button</b>: Initiate the slide generation process by clicking the designated "Generate" button.<br>
5️⃣ <b>Be patient 🙂</b>: Generating the slides could take between 1 minute and 5 minutes.<br>
 

🖼️ Some examples of slides generated using <b>SCIDECK</b> are shown below: <br>
1. Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift [[Paper]](https://arxiv.org/pdf/1502.03167.pdf) [[Slides]](https://drive.google.com/file/d/1Zt5FFH0nKxut-LyEr9pNAIdtgR_lBtIj/view?usp=sharing) <br>
2. Attention Is All You Need [[Paper]](https://arxiv.org/pdf/1706.03762.pdf) [[Slides]](https://drive.google.com/file/d/1xKgohh_QKV9pD_XjDuXR566h0VJ1S7WI/view?usp=sharing) <br>
3. Denoising Diffusion Probabilistic Models [[Paper]](https://arxiv.org/pdf/2006.11239.pdf) [[Slides]](https://drive.google.com/file/d/1D2ZfoJpHR3kP0JdsYyjxUq-vjVMV-KTO/view?usp=sharing) <br>


ver 0.1
"""

read_key = os.environ.get("HF_TOKEN", None)

if __name__ == "__main__":
    # client = Client.duplicate("ByMatthew/deckify_private", hf_token=read_key)
    
    temp = "<h1> SCIDECK: Generate slides (LaTeX Beamer) from PDF</h1>"
    with gr.Blocks() as demo:
        gr.Markdown(temp)
        gr.Image("demo.png", width=600, show_download_button=False, show_label=False)
        gr.Markdown(description)
        
        file_output = gr.File()
        upload_button = gr.UploadButton("Click to Upload a PDF File", file_types=["file"], file_count="single", size="sm")
        upload_button.upload(upload_file, upload_button, file_output)

        number_of_pages = gr.Number(label="Number of pages")
        secret = gr.Textbox(label="Password", type="password")
        output = gr.Textbox(label="Output", show_copy_button=True, interactive=False)
        greet_btn = gr.Button("Generate slides")
        greet_btn.click(fn=func, inputs=[upload_button, number_of_pages, secret], outputs=output, api_name="greet")
    
    demo.queue(max_size=5)
    demo.launch()