Wauplin HF Staff commited on
Commit
f535fc4
·
1 Parent(s): 2304e08

Better handling of PR updates

Browse files
Files changed (1) hide show
  1. app.py +72 -27
app.py CHANGED
@@ -11,8 +11,11 @@ from huggingface_hub import (
11
  create_commit,
12
  create_repo,
13
  delete_repo,
 
14
  snapshot_download,
 
15
  )
 
16
  from pydantic import BaseModel
17
  from requests import HTTPError
18
 
@@ -29,6 +32,7 @@ class WebhookPayloadRepo(BaseModel):
29
  type: Literal["dataset", "model", "space"]
30
  name: str
31
  private: bool
 
32
 
33
 
34
  class WebhookPayloadDiscussion(BaseModel):
@@ -65,32 +69,61 @@ async def post_webhook(
65
  if payload.repo.type != "space":
66
  raise HTTPException(400, f"Must be a Space, not {payload.repo.type}")
67
 
68
- if not payload.event.scope.startswith("discussion"):
69
- return "Not a discussion"
70
 
71
- if payload.discussion is None:
72
- return "Couldn't parse 'payload.discussion'"
73
-
74
- if not payload.discussion.isPullRequest:
75
- return "Not a Pull Request"
76
-
77
- if payload.event.action == "create" or payload.event.action == "update":
78
  task_queue.add_task(
79
  sync_ci_space,
80
- space_id=payload.repo.name,
81
  pr_num=payload.discussion.num,
82
  private=payload.repo.private,
83
  )
84
- elif payload.event.action == "delete":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  task_queue.add_task(
86
  delete_ci_space,
87
  space_id=payload.repo.name,
88
  pr_num=payload.discussion.num,
89
  )
90
- else:
91
- return f"Couldn't handle action {payload.event.action}"
92
 
93
- return "Processed"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
 
96
  def sync_ci_space(space_id: str, pr_num: int, private: bool) -> None:
@@ -113,25 +146,37 @@ def sync_ci_space(space_id: str, pr_num: int, private: bool) -> None:
113
  raise
114
 
115
  # Download space codebase from PR revision
116
- snapshot_path = snapshot_download(
117
- repo_id=space_id,
118
- revision=f"refs/pr/{pr_num}",
119
- repo_type="space",
120
- token=HF_TOKEN,
 
 
121
  )
122
 
123
  # Sync space codebase with PR revision
124
  operations = [ # little aggressive but works
125
  CommitOperationDelete(".", is_folder=True)
126
  ]
127
- operations += [
128
- CommitOperationAdd(
129
- path_in_repo=str(filepath.relative_to(snapshot_path)),
130
- path_or_fileobj=filepath,
131
- )
132
- for filepath in Path(snapshot_path).glob("**/*")
133
- if filepath.is_file()
134
- ]
 
 
 
 
 
 
 
 
 
 
135
  create_commit(
136
  repo_id=ci_space_id,
137
  repo_type="space",
 
11
  create_commit,
12
  create_repo,
13
  delete_repo,
14
+ get_repo_discussions,
15
  snapshot_download,
16
+ space_info,
17
  )
18
+ from huggingface_hub.repocard import RepoCard
19
  from pydantic import BaseModel
20
  from requests import HTTPError
21
 
 
32
  type: Literal["dataset", "model", "space"]
33
  name: str
34
  private: bool
35
+ headSha: str
36
 
37
 
38
  class WebhookPayloadDiscussion(BaseModel):
 
69
  if payload.repo.type != "space":
70
  raise HTTPException(400, f"Must be a Space, not {payload.repo.type}")
71
 
72
+ space_id = payload.repo.name
 
73
 
74
+ if (
75
+ payload.event.scope.startswith("discussion")
76
+ and payload.discussion is not None
77
+ and payload.discussion.isPullRequest
78
+ and payload.event.action == "create"
79
+ ):
80
+ # New PR!
81
  task_queue.add_task(
82
  sync_ci_space,
83
+ space_id=space_id,
84
  pr_num=payload.discussion.num,
85
  private=payload.repo.private,
86
  )
87
+ elif (
88
+ payload.event.scope.startswith("repo.content")
89
+ and payload.event.action == "update"
90
+ ):
91
+ # New repo change. Is it a commit on a PR?
92
+ # => loop through all PRs and check if new changes happened
93
+ for discussion in get_repo_discussions(
94
+ repo_id=space_id, repo_type="space", token=HF_TOKEN
95
+ ):
96
+ if discussion.is_pull_request and discussion.status == "open":
97
+ if not is_pr_synced(space_id=space_id, pr_num=discussion.num):
98
+ task_queue.add_task(
99
+ sync_ci_space,
100
+ space_id=space_id,
101
+ pr_num=discussion.num,
102
+ private=payload.repo.private,
103
+ )
104
+ elif False:
105
+ # PR merged!
106
  task_queue.add_task(
107
  delete_ci_space,
108
  space_id=payload.repo.name,
109
  pr_num=payload.discussion.num,
110
  )
 
 
111
 
112
+ return "Processed."
113
+
114
+
115
+ def is_pr_synced(space_id: str, pr_num: int) -> bool:
116
+ # What is the last synced commit for this PR?
117
+ ci_space_id = _get_ci_space_id(space_id=space_id, pr_num=pr_num)
118
+ card = RepoCard.load(repo_id_or_path=ci_space_id, repo_type="space", token=HF_TOKEN)
119
+ last_synced_sha = getattr(card.data, "synced_sha", None)
120
+
121
+ # What is the last commit id for this PR?
122
+ info = space_info(repo_id=space_id, revision=f"refs/pr/{pr_num}")
123
+ last_pr_sha = info.sha
124
+
125
+ # Is it up to date ?
126
+ return last_synced_sha == last_pr_sha
127
 
128
 
129
  def sync_ci_space(space_id: str, pr_num: int, private: bool) -> None:
 
146
  raise
147
 
148
  # Download space codebase from PR revision
149
+ snapshot_path = Path(
150
+ snapshot_download(
151
+ repo_id=space_id,
152
+ revision=f"refs/pr/{pr_num}",
153
+ repo_type="space",
154
+ token=HF_TOKEN,
155
+ )
156
  )
157
 
158
  # Sync space codebase with PR revision
159
  operations = [ # little aggressive but works
160
  CommitOperationDelete(".", is_folder=True)
161
  ]
162
+ for filepath in snapshot_path.glob("**/*"):
163
+ if filepath.is_file():
164
+ path_in_repo = str(filepath.relative_to(snapshot_path))
165
+
166
+ # Upload all files without changes except for the README file
167
+ if path_in_repo == "README.md":
168
+ card = RepoCard.load(filepath)
169
+ setattr(card.data, "synced_sha", snapshot_path.name) # latest sha
170
+ path_or_fileobj = str(card).encode()
171
+ else:
172
+ path_or_fileobj = filepath
173
+
174
+ operations.append(
175
+ CommitOperationAdd(
176
+ path_in_repo=path_in_repo, path_or_fileobj=path_or_fileobj
177
+ )
178
+ )
179
+
180
  create_commit(
181
  repo_id=ci_space_id,
182
  repo_type="space",