zhiminy commited on
Commit
ec86ed3
·
1 Parent(s): 03acfce

add repochat functionality

Browse files
Files changed (3) hide show
  1. app.py +187 -11
  2. context_window.json +0 -3
  3. requirements.txt +2 -1
app.py CHANGED
@@ -1,17 +1,20 @@
1
  import dotenv
2
  import evalica
 
3
  import io
4
  import json
5
  import os
6
  import random
 
7
  import threading
8
 
9
  import gradio as gr
10
  import pandas as pd
11
 
12
- from huggingface_hub import upload_file, hf_hub_download, HfFolder, HfApi
13
  from datetime import datetime
 
14
  from gradio_leaderboard import Leaderboard
 
15
  from openai import OpenAI
16
 
17
  # Load environment variables
@@ -45,6 +48,149 @@ models_state = {}
45
  conversation_state = {}
46
 
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  # Truncate prompt
49
  def truncate_prompt(user_input, model_alias, models, conversation_state):
50
  """
@@ -334,9 +480,9 @@ with gr.Blocks() as app:
334
  # Add title and description as a Markdown component
335
  leaderboard_intro = gr.Markdown(
336
  """
337
- # 🏆 Software Engineering Arena Leaderboard: Community-Driven Evaluation of Top SE Chatbots
338
 
339
- The Software Engineering (SE) Arena is an open-source platform designed to evaluate language models through human preference, fostering transparency and collaboration. Developed by researchers at [Software Analysis and Intelligence Lab (SAIL)](https://sail.cs.queensu.ca), the platform empowers the community to assess and compare the performance of leading foundation models in SE tasks. For technical details, check out our [paper](TODO).
340
  """,
341
  elem_classes="leaderboard-intro",
342
  )
@@ -375,7 +521,7 @@ with gr.Blocks() as app:
375
  # ⚔️ Software Engineering (SE) Arena: Explore and Test the Best SE Chatbots with Long-Context Interactions
376
 
377
  ## 📜How It Works
378
- - **Blind Comparison**: Submit a software engineering-related query to two anonymous chatbots randomly selected from up to {len(available_models)} top models, including ChatGPT, Gemini, Claude, Llama, and others.
379
  - **Interactive Voting**: Engage in multi-turn dialogues with both chatbots and compare their responses. You can continue the conversation until you confidently choose the better model.
380
  - **Fair Play Rules**: Votes are counted only if chatbot identities remain anonymous. Revealing a chatbot's identity disqualifies the session.
381
 
@@ -393,10 +539,19 @@ with gr.Blocks() as app:
393
  login_button = gr.Button(
394
  "Sign in with Hugging Face", elem_id="oauth-button"
395
  )
396
-
 
 
 
 
 
 
 
 
397
  # Components with initial non-interactive state
398
  shared_input = gr.Textbox(
399
- label="Enter your prompt for both models",
 
400
  lines=2,
401
  interactive=False, # Initially non-interactive
402
  )
@@ -441,6 +596,9 @@ with gr.Blocks() as app:
441
  model_b_input_state = gr.update(interactive=True)
442
  model_b_send_state = toggle_submit_button(model_b_input.value)
443
 
 
 
 
444
  return (
445
  gr.update(visible=False), # Hide the timeout popup
446
  shared_input_state, # Update shared_input
@@ -449,6 +607,7 @@ with gr.Blocks() as app:
449
  model_a_send_state, # Update model_a_send button
450
  model_b_input_state, # Update model_b_input
451
  model_b_send_state, # Update model_b_send button
 
452
  )
453
 
454
  # Multi-round inputs, initially hidden
@@ -483,13 +642,17 @@ with gr.Blocks() as app:
483
  model_a_send,
484
  model_b_input,
485
  model_b_send,
 
486
  ],
487
  )
488
 
489
  # Function to update model titles and responses
490
  def update_model_titles_and_responses(
491
- user_input, models_state, conversation_state
492
  ):
 
 
 
493
  # Dynamically select two random models
494
  if len(available_models) < 2:
495
  raise ValueError(
@@ -506,10 +669,10 @@ with gr.Blocks() as app:
506
 
507
  try:
508
  response_a = chat_with_models(
509
- user_input, "Model A", models_state, conversation_state
510
  )
511
  response_b = chat_with_models(
512
- user_input, "Model B", models_state, conversation_state
513
  )
514
  except TimeoutError as e:
515
  # Handle the timeout by resetting components, showing a popup, and disabling inputs
@@ -517,6 +680,9 @@ with gr.Blocks() as app:
517
  gr.update(
518
  value="", interactive=False, visible=True
519
  ), # Disable shared_input
 
 
 
520
  gr.update(value="", visible=False), # Hide user_prompt_md
521
  gr.update(value="", visible=False), # Hide Model A title
522
  gr.update(value="", visible=False), # Hide Model B title
@@ -539,8 +705,9 @@ with gr.Blocks() as app:
539
 
540
  return (
541
  gr.update(visible=False), # Hide shared_input
 
542
  gr.update(
543
- value=f"**Your Prompt:**\n\n{user_input}", visible=True
544
  ), # Show user_prompt_md
545
  gr.update(value=f"### Model A:", visible=True),
546
  gr.update(value=f"### Model B:", visible=True),
@@ -597,6 +764,7 @@ with gr.Blocks() as app:
597
  # If token is successfully retrieved, update the interface state
598
  return (
599
  gr.update(visible=False), # Hide the login button
 
600
  gr.update(interactive=True), # Enable shared_input
601
  gr.update(
602
  interactive=False
@@ -610,6 +778,7 @@ with gr.Blocks() as app:
610
  print(f"Login failed: {e}")
611
  return (
612
  gr.update(visible=True), # Keep the login button visible
 
613
  gr.update(interactive=False), # Keep shared_input disabled
614
  gr.update(interactive=False), # Keep send_first disabled
615
  gr.update(
@@ -625,6 +794,7 @@ with gr.Blocks() as app:
625
  inputs=[],
626
  outputs=[
627
  login_button, # Hide the login button after successful login
 
628
  shared_input, # Enable shared_input
629
  send_first, # Enable send_first button
630
  feedback, # Enable feedback radio buttons
@@ -638,9 +808,10 @@ with gr.Blocks() as app:
638
  fn=hide_thanks_message, inputs=[], outputs=[thanks_message]
639
  ).then(
640
  fn=update_model_titles_and_responses,
641
- inputs=[shared_input, models_state, conversation_state],
642
  outputs=[
643
  shared_input,
 
644
  user_prompt_md,
645
  response_a_title,
646
  response_b_title,
@@ -776,6 +947,9 @@ with gr.Blocks() as app:
776
  gr.update(
777
  value="", interactive=True, visible=True
778
  ), # Clear shared_input
 
 
 
779
  gr.update(value="", visible=False), # Hide user_prompt_md
780
  gr.update(value="", visible=False), # Hide response_a_title
781
  gr.update(value="", visible=False), # Hide response_b_title
@@ -791,6 +965,7 @@ with gr.Blocks() as app:
791
  ), # Reset feedback selection
792
  leaderboard_data, # Updated leaderboard data
793
  gr.update(visible=True), # Show the thanks message
 
794
  )
795
 
796
  # Update the click event for the submit feedback button
@@ -799,6 +974,7 @@ with gr.Blocks() as app:
799
  inputs=[feedback, models_state, conversation_state],
800
  outputs=[
801
  shared_input, # Reset shared_input
 
802
  user_prompt_md, # Hide user_prompt_md
803
  response_a_title, # Hide Model A title
804
  response_b_title, # Hide Model B title
 
1
  import dotenv
2
  import evalica
3
+ import gitlab
4
  import io
5
  import json
6
  import os
7
  import random
8
+ import re
9
  import threading
10
 
11
  import gradio as gr
12
  import pandas as pd
13
 
 
14
  from datetime import datetime
15
+ from github import Github
16
  from gradio_leaderboard import Leaderboard
17
+ from huggingface_hub import upload_file, hf_hub_download, HfFolder, HfApi
18
  from openai import OpenAI
19
 
20
  # Load environment variables
 
48
  conversation_state = {}
49
 
50
 
51
+ def fetch_github_content(url):
52
+ """Fetch detailed content from a GitHub URL using PyGithub."""
53
+ token = os.getenv("GITHUB_TOKEN")
54
+ if not token:
55
+ print("GITHUB_TOKEN not set.")
56
+ return None
57
+
58
+ g = Github(token)
59
+
60
+ try:
61
+ match = re.match(
62
+ r"https?://github\.com/([^/]+)/([^/]+)/(commit|pull|issues|discussions)/([a-z0-9]+)",
63
+ url,
64
+ )
65
+
66
+ if not match:
67
+ repo_part = re.match(r"https?://github\.com/([^/]+)/([^/]+)/?", url)
68
+ if repo_part:
69
+ owner, repo = repo_part.groups()
70
+ repo = g.get_repo(f"{owner}/{repo}")
71
+ try:
72
+ readme = repo.get_readme()
73
+ return readme.decoded_content.decode()
74
+ except:
75
+ return repo.description
76
+ return None
77
+
78
+ owner, repo, category, identifier = match.groups()
79
+ repo = g.get_repo(f"{owner}/{repo}")
80
+
81
+ if category == "commit":
82
+ commit = repo.get_commit(identifier)
83
+ return commit.__dict__
84
+
85
+ elif category in ["pull", "issues"]:
86
+ obj = (
87
+ repo.get_pull(int(identifier))
88
+ if category == "pull"
89
+ else repo.get_issue(int(identifier))
90
+ )
91
+ return obj.__dict__
92
+
93
+ except Exception as e:
94
+ print(f"GitHub API error: {e}")
95
+ return None
96
+
97
+
98
+ def fetch_gitlab_content(url):
99
+ """Fetch content from GitLab URL using python-gitlab."""
100
+ token = os.getenv("GITLAB_TOKEN")
101
+ if not token:
102
+ print("GITLAB_TOKEN not set.")
103
+ return None
104
+ gl = gitlab.Gitlab(private_token=token)
105
+
106
+ try:
107
+ match = re.match(
108
+ r"https?://gitlab\.com/([^/]+)/([^/]+)/-/?(commit|merge_requests|issues)/([^/]+)",
109
+ url,
110
+ )
111
+ if not match:
112
+ repo_part = re.match(r"https?://gitlab\.com/([^/]+)/([^/]+)/?", url)
113
+ if repo_part:
114
+ owner, repo = repo_part.groups()
115
+ project = gl.projects.get(f"{owner}/{repo}")
116
+ try:
117
+ readme = project.files.get(file_path="README.md", ref="master")
118
+ return readme.decode()
119
+ except gitlab.exceptions.GitlabGetError:
120
+ return project.description
121
+ return None
122
+
123
+ owner, repo, category, identifier = match.groups()
124
+ project = gl.projects.get(f"{owner}/{repo}")
125
+
126
+ if category == "commit":
127
+ commit = project.commits.get(identifier)
128
+ return commit.__dict__
129
+
130
+ elif category == "merge_requests":
131
+ merge_request = project.mergerequests.get(int(identifier))
132
+ return merge_request.__dict__
133
+
134
+ elif category == "issues":
135
+ issue = project.issues.get(int(identifier))
136
+ return issue.__dict__
137
+
138
+ except Exception as e:
139
+ print(f"GitLab API error: {e}")
140
+ return None
141
+
142
+
143
+ def fetch_huggingface_content(url):
144
+ """Fetch detailed content from a Hugging Face URL using huggingface_hub API."""
145
+ token = os.getenv("HF_TOKEN")
146
+ if not token:
147
+ print("HF_TOKEN not set.")
148
+ return None
149
+
150
+ api = HfApi(token=token)
151
+
152
+ try:
153
+ if "/commit/" in url:
154
+ commit_hash = url.split("/commit/")[-1]
155
+ repo_id = url.split("/commit/")[0].split("huggingface.co/")[-1]
156
+ commits = api.list_repo_commits(repo_id=repo_id, revision=commit_hash)
157
+ if commits:
158
+ commit = commits[0]
159
+ return commit.__dict__
160
+ return None
161
+
162
+ elif "/discussions/" in url:
163
+ discussion_num = int(url.split("/discussions/")[-1])
164
+ repo_id = url.split("/discussions/")[0].split("/huggingface.co/")[-1]
165
+ discussion = api.get_discussion_details(
166
+ repo_id=repo_id, discussion_num=discussion_num
167
+ )
168
+ return discussion.__dict__
169
+
170
+ else:
171
+ repo_id = url.split("huggingface.co/")[-1]
172
+ repo_info = api.repo_info(repo_id=repo_id)
173
+ return repo_info.__dict__
174
+
175
+ except Exception as e:
176
+ print(f"Hugging Face API error: {e}")
177
+ return None
178
+
179
+
180
+ def fetch_url_content(url):
181
+ """Main URL content fetcher that routes to platform-specific handlers."""
182
+ try:
183
+ if "github.com" in url:
184
+ return fetch_github_content(url)
185
+ elif "gitlab.com" in url:
186
+ return fetch_gitlab_content(url)
187
+ elif "huggingface.co" in url:
188
+ return fetch_huggingface_content(url)
189
+ except Exception as e:
190
+ print(f"Error fetching URL content: {e}")
191
+ return None
192
+
193
+
194
  # Truncate prompt
195
  def truncate_prompt(user_input, model_alias, models, conversation_state):
196
  """
 
480
  # Add title and description as a Markdown component
481
  leaderboard_intro = gr.Markdown(
482
  """
483
+ # 🏆 Software Engineering (SE) Chatbot Leaderboard: Community-Driven Evaluation of Top SE Chatbots
484
 
485
+ The SE Arena is an open-source platform designed to evaluate language models through human preference, fostering transparency and collaboration. Developed by researchers at [Software Analysis and Intelligence Lab (SAIL)](https://sail.cs.queensu.ca), the platform empowers the community to assess and compare the performance of leading foundation models in SE tasks. For technical details, check out our [paper](TODO).
486
  """,
487
  elem_classes="leaderboard-intro",
488
  )
 
521
  # ⚔️ Software Engineering (SE) Arena: Explore and Test the Best SE Chatbots with Long-Context Interactions
522
 
523
  ## 📜How It Works
524
+ - **Blind Comparison**: Submit a SE-related query to two anonymous chatbots randomly selected from up to {len(available_models)} top models, including ChatGPT, Gemini, Claude, Llama, and others.
525
  - **Interactive Voting**: Engage in multi-turn dialogues with both chatbots and compare their responses. You can continue the conversation until you confidently choose the better model.
526
  - **Fair Play Rules**: Votes are counted only if chatbot identities remain anonymous. Revealing a chatbot's identity disqualifies the session.
527
 
 
539
  login_button = gr.Button(
540
  "Sign in with Hugging Face", elem_id="oauth-button"
541
  )
542
+
543
+ # NEW: Add a textbox for the repository URL above the user prompt
544
+ repo_url = gr.Textbox(
545
+ show_label=False,
546
+ placeholder="Enter the repo-related URL here (optional)",
547
+ lines=1,
548
+ interactive=False,
549
+ )
550
+
551
  # Components with initial non-interactive state
552
  shared_input = gr.Textbox(
553
+ show_label=False,
554
+ placeholder="Enter your query for both models here",
555
  lines=2,
556
  interactive=False, # Initially non-interactive
557
  )
 
596
  model_b_input_state = gr.update(interactive=True)
597
  model_b_send_state = toggle_submit_button(model_b_input.value)
598
 
599
+ # Keep repo_url in sync with shared_input
600
+ repo_url_state = gr.update(interactive=True)
601
+
602
  return (
603
  gr.update(visible=False), # Hide the timeout popup
604
  shared_input_state, # Update shared_input
 
607
  model_a_send_state, # Update model_a_send button
608
  model_b_input_state, # Update model_b_input
609
  model_b_send_state, # Update model_b_send button
610
+ repo_url_state, # Update repo_url button
611
  )
612
 
613
  # Multi-round inputs, initially hidden
 
642
  model_a_send,
643
  model_b_input,
644
  model_b_send,
645
+ repo_url,
646
  ],
647
  )
648
 
649
  # Function to update model titles and responses
650
  def update_model_titles_and_responses(
651
+ repo_info, user_input, models_state, conversation_state
652
  ):
653
+ # Combine repo-related information (if any) and user query into one prompt.
654
+ combined_user_input = f"Repo-related Information: {fetch_url_content(repo_info)}\n\n{user_input}" if repo_info else user_input
655
+
656
  # Dynamically select two random models
657
  if len(available_models) < 2:
658
  raise ValueError(
 
669
 
670
  try:
671
  response_a = chat_with_models(
672
+ combined_user_input, "Model A", models_state, conversation_state
673
  )
674
  response_b = chat_with_models(
675
+ combined_user_input, "Model B", models_state, conversation_state
676
  )
677
  except TimeoutError as e:
678
  # Handle the timeout by resetting components, showing a popup, and disabling inputs
 
680
  gr.update(
681
  value="", interactive=False, visible=True
682
  ), # Disable shared_input
683
+ gr.update(
684
+ value="", interactive=False, visible=True
685
+ ), # Disable repo_url
686
  gr.update(value="", visible=False), # Hide user_prompt_md
687
  gr.update(value="", visible=False), # Hide Model A title
688
  gr.update(value="", visible=False), # Hide Model B title
 
705
 
706
  return (
707
  gr.update(visible=False), # Hide shared_input
708
+ gr.update(visible=False), # Hide repo_url the same way
709
  gr.update(
710
+ value=f"**Your Query:**\n\n{user_input}", visible=True
711
  ), # Show user_prompt_md
712
  gr.update(value=f"### Model A:", visible=True),
713
  gr.update(value=f"### Model B:", visible=True),
 
764
  # If token is successfully retrieved, update the interface state
765
  return (
766
  gr.update(visible=False), # Hide the login button
767
+ gr.update(interactive=True), # repo_url -> Enable in sync
768
  gr.update(interactive=True), # Enable shared_input
769
  gr.update(
770
  interactive=False
 
778
  print(f"Login failed: {e}")
779
  return (
780
  gr.update(visible=True), # Keep the login button visible
781
+ gr.update(interactive=False), # repo_url -> disable if login failed
782
  gr.update(interactive=False), # Keep shared_input disabled
783
  gr.update(interactive=False), # Keep send_first disabled
784
  gr.update(
 
794
  inputs=[],
795
  outputs=[
796
  login_button, # Hide the login button after successful login
797
+ repo_url, # Keep this in sync with shared_input
798
  shared_input, # Enable shared_input
799
  send_first, # Enable send_first button
800
  feedback, # Enable feedback radio buttons
 
808
  fn=hide_thanks_message, inputs=[], outputs=[thanks_message]
809
  ).then(
810
  fn=update_model_titles_and_responses,
811
+ inputs=[repo_url, shared_input, models_state, conversation_state],
812
  outputs=[
813
  shared_input,
814
+ repo_url,
815
  user_prompt_md,
816
  response_a_title,
817
  response_b_title,
 
947
  gr.update(
948
  value="", interactive=True, visible=True
949
  ), # Clear shared_input
950
+ gr.update(
951
+ value="", interactive=True, visible=True
952
+ ), # Clear repo_url
953
  gr.update(value="", visible=False), # Hide user_prompt_md
954
  gr.update(value="", visible=False), # Hide response_a_title
955
  gr.update(value="", visible=False), # Hide response_b_title
 
965
  ), # Reset feedback selection
966
  leaderboard_data, # Updated leaderboard data
967
  gr.update(visible=True), # Show the thanks message
968
+ gr.update(value="", interactive=True, visible=True), # Show the repo-related url message
969
  )
970
 
971
  # Update the click event for the submit feedback button
 
974
  inputs=[feedback, models_state, conversation_state],
975
  outputs=[
976
  shared_input, # Reset shared_input
977
+ repo_url, # Show the repo-related URL message
978
  user_prompt_md, # Hide user_prompt_md
979
  response_a_title, # Hide Model A title
980
  response_b_title, # Hide Model B title
context_window.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "gpt-3.5-turbo": 16000,
3
  "gpt-3.5-turbo-16k": 16000,
4
- "gpt-4": 8192,
5
  "gpt-4-32k": 32000,
6
  "gpt-4-turbo": 128000,
7
  "gpt-4o": 128000,
@@ -14,9 +13,7 @@
14
  "llama-3-70b": 128000,
15
  "llama-3.1-405b": 128000,
16
  "llama-3.1-70b": 128000,
17
- "llama-3.1-8b": 128000,
18
  "llama-3.3-70b": 128000,
19
- "llama-v3.2-3b": 128000,
20
  "o1-all": 128000,
21
  "o1-mini-all": 128000,
22
  "Qwen2-72B-Instruct": 131072,
 
1
  {
2
  "gpt-3.5-turbo": 16000,
3
  "gpt-3.5-turbo-16k": 16000,
 
4
  "gpt-4-32k": 32000,
5
  "gpt-4-turbo": 128000,
6
  "gpt-4o": 128000,
 
13
  "llama-3-70b": 128000,
14
  "llama-3.1-405b": 128000,
15
  "llama-3.1-70b": 128000,
 
16
  "llama-3.3-70b": 128000,
 
17
  "o1-all": 128000,
18
  "o1-mini-all": 128000,
19
  "Qwen2-72B-Instruct": 131072,
requirements.txt CHANGED
@@ -1,7 +1,8 @@
1
- aisuite[all]
2
  evalica
3
  gradio[oauth]
4
  gradio_leaderboard
5
  huggingface_hub
 
6
  python-dotenv
 
7
  vertexai
 
 
1
  evalica
2
  gradio[oauth]
3
  gradio_leaderboard
4
  huggingface_hub
5
+ PyGithub
6
  python-dotenv
7
+ python-gitlab
8
  vertexai