feat: upload certificate pdf to dataset

#4
by wseo - opened
Files changed (1) hide show
  1. app.py +65 -26
app.py CHANGED
@@ -11,13 +11,14 @@ import os
11
  import sys
12
  import pandas as pd
13
  import json
 
14
 
15
  api = HfApi()
16
  HF_TOKEN = os.environ.get("HF_TOKEN")
17
 
18
  # Public dataset repo containing the pdfs of already certified users
19
- DATASET_REPO_URL = "https://huggingface.co/datasets/wseo/huggingface-krew-hackathon23"
20
- CERTIFIED_USERS_FILENAME = "usernames.csv"
21
 
22
  ORGANIZATION = "pseudolab"
23
 
@@ -101,10 +102,10 @@ def generate_certificate(certificate_template, first_name, last_name, hf_usernam
101
  d.text((538, 419), name, fill=(87,87,87), anchor="mm", font=name_font)
102
 
103
  # Debug line id
104
- #d.line(((863, 0), (863, 1400)), "gray")
105
 
106
  # Date of certification
107
- d.text((863, 336), f"HKH23-{username}", fill=(117,117,117), anchor="mm", font=username_font)
108
 
109
  pdf = im.convert('RGB')
110
  pdf.save('certificate.pdf')
@@ -112,27 +113,65 @@ def generate_certificate(certificate_template, first_name, last_name, hf_usernam
112
  return im, "./certificate.pdf"
113
 
114
 
 
 
 
 
 
 
 
 
 
 
115
  def add_certified_user(hf_username, first_name, last_name, certificate_type):
116
- """
117
- Add the certified user to the database
118
- """
119
-
120
- print("ADD CERTIFIED USER")
121
- repo = Repository(local_dir="usernames", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
122
- repo.git_pull()
123
 
124
- history = pd.read_csv(os.path.join("usernames", CERTIFIED_USERS_FILENAME))
125
 
126
- # Check if this hf_username is already in our dataset:
127
- check = history.loc[history['hf_username'] == hf_username]
128
- if not check.empty:
129
- history = history.drop(labels=check.index[0], axis=0)
130
-
131
- new_row = pd.DataFrame({'hf_username': hf_username, 'first_name': first_name, 'last_name': last_name, 'certificate_type': certificate_type, 'datetime': time.time()}, index=[0])
132
- history = pd.concat([new_row, history[:]]).reset_index(drop=True)
 
 
 
 
 
 
 
 
 
 
 
133
 
134
- history.to_csv(os.path.join("usernames", CERTIFIED_USERS_FILENAME), index=False)
135
- repo.push_to_hub(commit_message="Update certified users list")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
 
138
  def create_certificate(passed, certificate_type, hf_username, first_name, last_name):
@@ -149,7 +188,7 @@ def create_certificate(passed, certificate_type, hf_username, first_name, last_n
149
  # Generate a certificate of
150
  certificate, pdf = generate_certificate("./certificate-excellence.png", first_name, last_name, hf_username)
151
  # Add this user to our database
152
- # add_certified_user(hf_username, first_name, last_name, certificate_type)
153
  # Add a message
154
  message = """
155
  Congratulations, you successfully completed the 2023 Hackathon πŸŽ‰! \n
@@ -161,7 +200,7 @@ def create_certificate(passed, certificate_type, hf_username, first_name, last_n
161
  # Generate a certificate of completion
162
  certificate, pdf = generate_certificate("./certificate-completion.png", first_name, last_name, hf_username)
163
  # Add this user to our database
164
- # add_certified_user(hf_username, first_name, last_name, certificate_type)
165
  # Add a message
166
  message = """
167
  Congratulations, you successfully completed the 2023 Hackathon πŸŽ‰! \n
@@ -177,7 +216,7 @@ def create_certificate(passed, certificate_type, hf_username, first_name, last_n
177
  # Add a message
178
  message = """
179
  You didn't pass the minimum of one contribution to get a certificate of completion.
180
- For more information about the certification process, refer to the submit page.
181
  If the results here differ from your contributions, make sure you moved your space to the pseudolab organization.
182
  """
183
  return certificate, message, pdf
@@ -200,11 +239,11 @@ with gr.Blocks() as demo:
200
  # Get your 2023 Hackathon Certificate πŸŽ“
201
  The certification process is completely free:
202
  - To get a *certificate of completion*: you need to **contribute to at least one model, dataset, or space**.
203
- - To get a *certificate of excellence*: you need to **contribute to models, datasets, and spaces**.
204
 
205
  For more information about the certification process [check the hackathon page on certification](https://pseudo-lab.github.io/huggingface-hackathon23/submit.html#certification).
206
 
207
- Don't hesitate to share your certificate on Twitter (tag me [@wonhseo](https://twitter.com/wonhseo) and [@huggingface](https://twitter.com/huggingface)) and on LinkedIn.
208
  """)
209
 
210
  hf_username = gr.Textbox(placeholder="wseo", label="Your Hugging Face Username (case sensitive)")
 
11
  import sys
12
  import pandas as pd
13
  import json
14
+ import shutil
15
 
16
  api = HfApi()
17
  HF_TOKEN = os.environ.get("HF_TOKEN")
18
 
19
  # Public dataset repo containing the pdfs of already certified users
20
+ DATASET_REPO_URL = f"https://wseo:{HF_TOKEN}@huggingface.co/datasets/pseudolab/huggingface-krew-hackathon2023"
21
+ CERTIFIED_USERS_FILENAME = "certified.csv"
22
 
23
  ORGANIZATION = "pseudolab"
24
 
 
102
  d.text((538, 419), name, fill=(87,87,87), anchor="mm", font=name_font)
103
 
104
  # Debug line id
105
+ #d.line(((815, 0), (815, 1400)), "gray")
106
 
107
  # Date of certification
108
+ d.text((815, 327), f"HKH23-{hf_username}", fill=(117,117,117), font=username_font)
109
 
110
  pdf = im.convert('RGB')
111
  pdf.save('certificate.pdf')
 
113
  return im, "./certificate.pdf"
114
 
115
 
116
+ def create_initial_csv(path):
117
+ """Create an initial CSV file with headers if it doesn't exist."""
118
+ # Define the headers for our CSV file
119
+ headers = ['hf_username', 'first_name', 'last_name', 'certificate_type', 'datetime', 'pdf_path']
120
+ # Create a new DataFrame with no data and these headers
121
+ df = pd.DataFrame(columns=headers)
122
+ # Save the DataFrame to a CSV file
123
+ df.to_csv(path, index=False)
124
+
125
+
126
  def add_certified_user(hf_username, first_name, last_name, certificate_type):
127
+ """
128
+ Add the certified user to the dataset and include their certificate PDF.
129
+ """
130
+ print("ADD CERTIFIED USER")
131
+ repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL)
132
+ repo.git_pull()
 
133
 
134
+ csv_full_path = os.path.join("data", CERTIFIED_USERS_FILENAME)
135
 
136
+ if not os.path.isfile(csv_full_path):
137
+ create_initial_csv(csv_full_path)
138
+
139
+ history = pd.read_csv(csv_full_path)
140
+
141
+ # Check if this hf_username is already in our dataset:
142
+ check = history.loc[history['hf_username'] == hf_username]
143
+ if not check.empty:
144
+ history = history.drop(labels=check.index[0], axis=0)
145
+
146
+ pdfs_repo_path = os.path.join("data", "pdfs")
147
+
148
+ # Copy the PDF from its current location to the target directory in the repository
149
+ pdf_repo_filename = f"{hf_username}.pdf" # Create a specific name for the PDF file
150
+ pdf_repo_path_full = os.path.join(pdfs_repo_path, pdf_repo_filename)
151
+
152
+ # Create the pdfs directory if it doesn't exist
153
+ os.makedirs(pdfs_repo_path, exist_ok=True)
154
 
155
+ shutil.copy('./certificate.pdf', pdf_repo_path_full) # Copy the file
156
+
157
+ # Now, add a new entry to your CSV for this user and their PDF
158
+ new_row = pd.DataFrame({
159
+ 'hf_username': hf_username,
160
+ 'first_name': first_name,
161
+ 'last_name': last_name,
162
+ 'certificate_type': certificate_type,
163
+ 'datetime': time.time(), # This captures the current time
164
+ 'pdf_path': pdf_repo_path_full[5:] # This is the relative path to the PDF within the repo
165
+ }, index=[0])
166
+
167
+ history = pd.concat([new_row, history[:]]).reset_index(drop=True)
168
+
169
+ # Save the updated CSV
170
+ history.to_csv(os.path.join("data", CERTIFIED_USERS_FILENAME), index=False)
171
+
172
+ # Add the PDF and CSV changes to the repo and push
173
+ repo.git_add()
174
+ repo.push_to_hub(commit_message="Update certified users list and add PDF")
175
 
176
 
177
  def create_certificate(passed, certificate_type, hf_username, first_name, last_name):
 
188
  # Generate a certificate of
189
  certificate, pdf = generate_certificate("./certificate-excellence.png", first_name, last_name, hf_username)
190
  # Add this user to our database
191
+ add_certified_user(hf_username, first_name, last_name, certificate_type)
192
  # Add a message
193
  message = """
194
  Congratulations, you successfully completed the 2023 Hackathon πŸŽ‰! \n
 
200
  # Generate a certificate of completion
201
  certificate, pdf = generate_certificate("./certificate-completion.png", first_name, last_name, hf_username)
202
  # Add this user to our database
203
+ add_certified_user(hf_username, first_name, last_name, certificate_type)
204
  # Add a message
205
  message = """
206
  Congratulations, you successfully completed the 2023 Hackathon πŸŽ‰! \n
 
216
  # Add a message
217
  message = """
218
  You didn't pass the minimum of one contribution to get a certificate of completion.
219
+ For more information about the certification process, refer to the hackathon page.
220
  If the results here differ from your contributions, make sure you moved your space to the pseudolab organization.
221
  """
222
  return certificate, message, pdf
 
239
  # Get your 2023 Hackathon Certificate πŸŽ“
240
  The certification process is completely free:
241
  - To get a *certificate of completion*: you need to **contribute to at least one model, dataset, or space**.
242
+ - To get a *certificate of excellence*: you need to **contribute to models, datasets, and spaces**. *(Yes, all three!)*
243
 
244
  For more information about the certification process [check the hackathon page on certification](https://pseudo-lab.github.io/huggingface-hackathon23/submit.html#certification).
245
 
246
+ Don't hesitate to share your certificate on Twitter (tag me [@wonhseo](https://twitter.com/wonhseo), [@pseudolab](https://twitter.com/pseudolab), and [@huggingface](https://twitter.com/huggingface)) and on LinkedIn.
247
  """)
248
 
249
  hf_username = gr.Textbox(placeholder="wseo", label="Your Hugging Face Username (case sensitive)")