|
from gradio_client import Client |
|
import gradio as gr |
|
import os, uuid, json, random, time |
|
import datetime |
|
from huggingface_hub import hf_api, CommitScheduler, HfApi |
|
from pathlib import Path |
|
|
|
feedback_file = Path("output_data/") / f"output_{uuid.uuid4()}.json" |
|
feedback_folder = feedback_file.parent |
|
|
|
scheduler = CommitScheduler( |
|
repo_id="eth-zurich-cle/deckify-dataset", |
|
repo_type="dataset", |
|
folder_path=feedback_folder, |
|
path_in_repo="output_data", |
|
every=10, |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
api = HfApi() |
|
|
|
def check_password(username, password): |
|
if password == os.environ["ACCESS"]: |
|
return True |
|
else: |
|
return False |
|
|
|
def func(file, number_of_pages, secret): |
|
|
|
if secret != os.environ["ACCESS"]: |
|
return "Wrong password, please try again" |
|
|
|
date_string = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") |
|
|
|
print(file) |
|
unique_filename = f"{file.split('.')[0]}_{date_string}.{file.split('.')[-1]}" |
|
|
|
print(unique_filename) |
|
|
|
api.upload_file( |
|
|
|
path_or_fileobj=file, |
|
path_in_repo=f"input_files/{unique_filename}", |
|
repo_id="eth-zurich-cle/deckify-dataset", |
|
repo_type="dataset", |
|
) |
|
|
|
|
|
space_runtime = hf_api.get_space_runtime("ByMatthew/deckify_private", token=read_key) |
|
print(f"Space runtime: {space_runtime}") |
|
|
|
if not space_runtime.stage == "RUNNING": |
|
|
|
space_runtime_after_restart = hf_api.restart_space("ByMatthew/deckify_private", token=read_key) |
|
print(f"Space runtime after restart: {space_runtime_after_restart}") |
|
|
|
|
|
max_retries = 20 |
|
retry_delay = 10 |
|
success = False |
|
for i in range(max_retries): |
|
space_runtime = hf_api.get_space_runtime("ByMatthew/deckify_private", token=read_key) |
|
print(f"Space runtime: {space_runtime}") |
|
if space_runtime.stage == "RUNNING": |
|
success = True |
|
break |
|
time.sleep(retry_delay) |
|
|
|
if not success: |
|
return "Failed to start the private space in time. Please try again later." |
|
|
|
client = Client("ByMatthew/deckify_private", hf_token=read_key) |
|
|
|
print(f"Client: {client}") |
|
|
|
|
|
output = client.predict(file, number_of_pages) |
|
if "Error" in output: |
|
return output |
|
|
|
|
|
|
|
|
|
|
|
temp_string = "% The following slides are generated with [[SCIDECK]](https://huggingface.co/spaces/ByMatthew/Scideck)" |
|
temp_string += "\n% Generated on " + date_string |
|
temp_string += "\n%" + "-"*100 + "\n" |
|
output = temp_string + output |
|
|
|
save_output(unique_filename, output, number_of_pages, date_string) |
|
|
|
return output |
|
|
|
def save_output(unique_filename: str, output: str, num_pages:int, date_string: str) -> None: |
|
|
|
|
|
with scheduler.lock: |
|
with feedback_file.open("a") as f: |
|
f.write(json.dumps({"input_name": unique_filename, "output": output, "num_pages": num_pages, "timestamp": date_string})) |
|
f.write("\n") |
|
|
|
|
|
def upload_file(file): |
|
print(file) |
|
return file.name |
|
|
|
|
|
description = r""" |
|
<h3> SCIDECK is a tool that allows you to convert your PDF files into a presentation deck.</h3> |
|
<br> |
|
|
|
❗️❗️❗️[<b>Important</b>] Instructions:<br> |
|
1️⃣ <b>Upload the PDF document</b>: Select the PDF file you want to convert into slides.<br> |
|
2️⃣ <b>Specify the number of pages</b>: Indicate the range of pages you'd like to include in the slide generation. <b>Set it to 0</b> if you want to include all pages. <br> |
|
3️⃣ <b>Enter the password provided in the invite email.</b><br> |
|
4️⃣ <b>Click the Generate button</b>: Initiate the slide generation process by clicking the designated "Generate" button.<br> |
|
5️⃣ <b>Be patient 🙂</b>: Generating the slides could take between 1 minute and 5 minutes.<br> |
|
|
|
|
|
🖼️ Some examples of slides generated using <b>SCIDECK</b> are shown below: <br> |
|
1. Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift [[Paper]](https://arxiv.org/pdf/1502.03167.pdf) [[Slides]](https://drive.google.com/file/d/1Zt5FFH0nKxut-LyEr9pNAIdtgR_lBtIj/view?usp=sharing) <br> |
|
2. Attention Is All You Need [[Paper]](https://arxiv.org/pdf/1706.03762.pdf) [[Slides]](https://drive.google.com/file/d/1xKgohh_QKV9pD_XjDuXR566h0VJ1S7WI/view?usp=sharing) <br> |
|
3. Denoising Diffusion Probabilistic Models [[Paper]](https://arxiv.org/pdf/2006.11239.pdf) [[Slides]](https://drive.google.com/file/d/1D2ZfoJpHR3kP0JdsYyjxUq-vjVMV-KTO/view?usp=sharing) <br> |
|
|
|
|
|
ver 0.1 |
|
""" |
|
|
|
read_key = os.environ.get("HF_TOKEN", None) |
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
temp = "<h1> SCIDECK: Generate slides (LaTeX Beamer) from PDF</h1>" |
|
with gr.Blocks() as demo: |
|
gr.Markdown(temp) |
|
gr.Image("demo.png", width=600, show_download_button=False, show_label=False) |
|
gr.Markdown(description) |
|
|
|
file_output = gr.File() |
|
upload_button = gr.UploadButton("Click to Upload a PDF File", file_types=["file"], file_count="single", size="sm") |
|
upload_button.upload(upload_file, upload_button, file_output) |
|
|
|
number_of_pages = gr.Number(label="Number of pages") |
|
secret = gr.Textbox(label="Password", type="password") |
|
output = gr.Textbox(label="Output", show_copy_button=True, interactive=False) |
|
greet_btn = gr.Button("Generate slides") |
|
greet_btn.click(fn=func, inputs=[upload_button, number_of_pages, secret], outputs=output, api_name="greet") |
|
|
|
demo.queue(max_size=5) |
|
demo.launch() |