halfacupoftea commited on
Commit
3960366
·
1 Parent(s): f03baa2

Make API calls asynchronus

Browse files
Files changed (4) hide show
  1. agent/core.py +1 -1
  2. tools/code_index.py +50 -17
  3. tools/github_tools.py +1 -1
  4. tools/utils.py +10 -7
agent/core.py CHANGED
@@ -22,7 +22,7 @@ api_key = MISTRAL_API_KEY
22
  model = "devstral-small-latest"
23
  client = Mistral(api_key=api_key)
24
 
25
- def run_agent(issue_url: str, branch_name: str = "main") -> str:
26
  """
27
  Run the agent workflow on a given GitHub issue URL.
28
  """
 
22
  model = "devstral-small-latest"
23
  client = Mistral(api_key=api_key)
24
 
25
+ async def run_agent(issue_url: str, branch_name: str = "main") -> str:
26
  """
27
  Run the agent workflow on a given GitHub issue URL.
28
  """
tools/code_index.py CHANGED
@@ -1,6 +1,6 @@
 
1
  import numpy as np
2
  import os
3
- import re
4
  from sklearn.metrics.pairwise import cosine_similarity
5
  import time
6
  from typing import List, Dict
@@ -74,33 +74,60 @@ def select_relevant_files_semantic(issue_description: str, file_paths: List[str]
74
  # ''',
75
  # ['.github/FUNDING.yml', '.github/workflows/process_challenge.yml', '.gitignore', 'README.md', 'annotations/test_annotations_devsplit.json', 'annotations/test_annotations_testsplit.json', 'challenge_config.yaml', 'challenge_data/__init__.py', 'challenge_data/challenge_1/__init__.py', 'challenge_data/challenge_1/main.py', 'evaluation_script/__init__.py', 'evaluation_script/main.py', 'github/challenge_processing_script.py', 'github/config.py', 'github/host_config.json', 'github/requirements.txt', 'github/utils.py', 'logo.jpg', 'remote_challenge_evaluation/README.md', 'remote_challenge_evaluation/eval_ai_interface.py', 'remote_challenge_evaluation/evaluate.py', 'remote_challenge_evaluation/main.py', 'remote_challenge_evaluation/requirements.txt', 'run.sh', 'submission.json', 'templates/challenge_phase_1_description.html', 'templates/challenge_phase_2_description.html', 'templates/description.html', 'templates/evaluation_details.html', 'templates/submission_guidelines.html', 'templates/terms_and_conditions.html', 'worker/__init__.py', 'worker/run.py']))
76
 
77
- def build_repo_index(owner: str, repo: str, ref: str = "main", issue_description: str = "") -> VectorStoreIndex:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  model_name = "codestral-embed"
79
  embed_model = MistralAIEmbedding(model_name=model_name, api_key=MISTRAL_API_KEY)
80
  print(f"[Indexing] Starting to index repository: {owner}/{repo} at ref {ref}...")
81
- file_paths = fetch_repo_files(owner, repo, ref)
 
82
 
83
  if issue_description:
84
- file_paths = select_relevant_files_semantic(issue_description, file_paths)
85
 
86
  documents = []
 
87
  for path in file_paths:
88
  _, ext = os.path.splitext(path)
89
  if ext.lower() not in INCLUDE_FILE_EXTENSIONS:
90
  continue
91
 
92
  try:
93
- content = fetch_file_content(owner, repo, path, ref)
94
  documents.append(Document(text=content, metadata={"file_path": path}))
95
  print(f"[Indexing] Added file: {path}")
96
- time.sleep(0.1)
97
  except Exception as e:
98
  print(f"[Warning] Skipping file {path} due to error: {e}")
99
 
100
- index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
 
 
 
 
 
101
  print(f"[Indexing] Finished indexing {len(documents)} files.")
102
  return index
103
 
 
104
  # print(build_repo_index("aditi-dsi", "EvalAI-Starters", "master",
105
  # '''
106
  # 🛠️ Configuration Error: Placeholder values detected in host_config.json
@@ -113,11 +140,12 @@ def build_repo_index(owner: str, repo: str, ref: str = "main", issue_description
113
  # '''))
114
 
115
 
116
- def retrieve_context(owner: str, repo: str, ref: str, issue_description: str) -> List[str]:
117
  print("Issue Description:", issue_description)
118
- index = build_repo_index(owner, repo, ref, issue_description)
119
  Settings.llm = MistralAI(model="codestral-latest", api_key=MISTRAL_API_KEY)
120
  Settings.embed_model = MistralAIEmbedding(model_name="codestral-embed", api_key=MISTRAL_API_KEY)
 
121
  retriever = index.as_retriever(similarity_top_k=3)
122
 
123
  query_engine = RetrieverQueryEngine(
@@ -127,16 +155,21 @@ def retrieve_context(owner: str, repo: str, ref: str, issue_description: str) ->
127
  SimilarityPostprocessor(similarity_top_k=3, similarity_cutoff=0.75)
128
  ],
129
  )
 
130
  query = (
131
- f"Please give relevant information from the codebase that highly matches the keywords of this issue and useful for solving or understanding this issue:{issue_description}"
132
- "STRICT RULES:\n"
133
- "- ONLY use information available in the retriever context.\n"
134
- "- DO NOT generate or assume any information outside the given context.\n"
135
- f"- ONLY include context that is highly relevant and clearly useful for understanding or solving this issue: {issue_description}\n"
136
- "- DO NOT include generic, loosely related, or unrelated content.\n"
137
  )
138
- print("query", query)
139
- response = query_engine.query(query)
 
 
 
 
140
  print(response)
141
  return response
142
 
 
1
+ import asyncio
2
  import numpy as np
3
  import os
 
4
  from sklearn.metrics.pairwise import cosine_similarity
5
  import time
6
  from typing import List, Dict
 
74
  # ''',
75
  # ['.github/FUNDING.yml', '.github/workflows/process_challenge.yml', '.gitignore', 'README.md', 'annotations/test_annotations_devsplit.json', 'annotations/test_annotations_testsplit.json', 'challenge_config.yaml', 'challenge_data/__init__.py', 'challenge_data/challenge_1/__init__.py', 'challenge_data/challenge_1/main.py', 'evaluation_script/__init__.py', 'evaluation_script/main.py', 'github/challenge_processing_script.py', 'github/config.py', 'github/host_config.json', 'github/requirements.txt', 'github/utils.py', 'logo.jpg', 'remote_challenge_evaluation/README.md', 'remote_challenge_evaluation/eval_ai_interface.py', 'remote_challenge_evaluation/evaluate.py', 'remote_challenge_evaluation/main.py', 'remote_challenge_evaluation/requirements.txt', 'run.sh', 'submission.json', 'templates/challenge_phase_1_description.html', 'templates/challenge_phase_2_description.html', 'templates/description.html', 'templates/evaluation_details.html', 'templates/submission_guidelines.html', 'templates/terms_and_conditions.html', 'worker/__init__.py', 'worker/run.py']))
76
 
77
+
78
+ # Assuming these are async now or wrapped appropriately
79
+ # async def fetch_repo_files(...)
80
+ # async def fetch_file_content(...)
81
+ # async def VectorStoreIndex.from_documents(...)
82
+
83
+ async def async_retry_on_429(func, *args, max_retries=3, delay=1, **kwargs):
84
+ for attempt in range(max_retries):
85
+ try:
86
+ return await func(*args, **kwargs)
87
+ except Exception as e:
88
+ status = getattr(e, 'response', None) and getattr(e.response, 'status_code', None)
89
+ if status == 429:
90
+ print(f"[Retry] Rate limit hit while calling {func.__name__}. Attempt {attempt+1}/{max_retries}. Retrying in {delay} seconds...")
91
+ await asyncio.sleep(delay)
92
+ delay *= 2
93
+ else:
94
+ raise
95
+
96
+ async def build_repo_index(owner: str, repo: str, ref: str = "main", issue_description: str = "") -> VectorStoreIndex:
97
  model_name = "codestral-embed"
98
  embed_model = MistralAIEmbedding(model_name=model_name, api_key=MISTRAL_API_KEY)
99
  print(f"[Indexing] Starting to index repository: {owner}/{repo} at ref {ref}...")
100
+
101
+ file_paths = await async_retry_on_429(fetch_repo_files, owner, repo, ref)
102
 
103
  if issue_description:
104
+ file_paths = select_relevant_files_semantic(issue_description, file_paths) # stays sync unless heavy
105
 
106
  documents = []
107
+
108
  for path in file_paths:
109
  _, ext = os.path.splitext(path)
110
  if ext.lower() not in INCLUDE_FILE_EXTENSIONS:
111
  continue
112
 
113
  try:
114
+ content = await async_retry_on_429(fetch_file_content, owner, repo, path, ref)
115
  documents.append(Document(text=content, metadata={"file_path": path}))
116
  print(f"[Indexing] Added file: {path}")
117
+ await asyncio.sleep(0.1)
118
  except Exception as e:
119
  print(f"[Warning] Skipping file {path} due to error: {e}")
120
 
121
+ try:
122
+ index = await async_retry_on_429(VectorStoreIndex.from_documents, documents, embed_model=embed_model)
123
+ except Exception as e:
124
+ print(f"[Error] Failed to build index due to: {e}")
125
+ raise
126
+
127
  print(f"[Indexing] Finished indexing {len(documents)} files.")
128
  return index
129
 
130
+
131
  # print(build_repo_index("aditi-dsi", "EvalAI-Starters", "master",
132
  # '''
133
  # 🛠️ Configuration Error: Placeholder values detected in host_config.json
 
140
  # '''))
141
 
142
 
143
+ async def retrieve_context(owner: str, repo: str, ref: str, issue_description: str) -> List[str]:
144
  print("Issue Description:", issue_description)
145
+ index = await build_repo_index(owner, repo, ref, issue_description)
146
  Settings.llm = MistralAI(model="codestral-latest", api_key=MISTRAL_API_KEY)
147
  Settings.embed_model = MistralAIEmbedding(model_name="codestral-embed", api_key=MISTRAL_API_KEY)
148
+
149
  retriever = index.as_retriever(similarity_top_k=3)
150
 
151
  query_engine = RetrieverQueryEngine(
 
155
  SimilarityPostprocessor(similarity_top_k=3, similarity_cutoff=0.75)
156
  ],
157
  )
158
+
159
  query = (
160
+ f"Please give relevant information from the codebase that highly matches the keywords of this issue and is useful for solving or understanding this issue: {issue_description}\n"
161
+ "STRICT RULES:\n"
162
+ "- ONLY use information available in the retriever context.\n"
163
+ "- DO NOT generate or assume any information outside the given context.\n"
164
+ f"- ONLY include context that is highly relevant and clearly useful for understanding or solving this issue: {issue_description}\n"
165
+ "- DO NOT include generic, loosely related, or unrelated content.\n"
166
  )
167
+
168
+ print("Query:", query)
169
+
170
+ # If query_engine.query is sync, wrap it in a thread
171
+ response = await asyncio.to_thread(query_engine.query, query)
172
+
173
  print(response)
174
  return response
175
 
tools/github_tools.py CHANGED
@@ -44,4 +44,4 @@ def post_comment(owner, repo, issue_num, comment_body):
44
  else:
45
  raise Exception(f"Failed to post comment: {response.status_code} {response.text}")
46
 
47
- # print(post_comment("aditi-dsi", "testing-cryptope", "3", "This is a test comment from OpenSorus."))
 
44
  else:
45
  raise Exception(f"Failed to post comment: {response.status_code} {response.text}")
46
 
47
+ # print(post_comment("aditi-dsi", "testing-cryptope", "3", "This is a test comment from OpenSorus."))
tools/utils.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import base64
2
  from datetime import datetime, timezone, timedelta
3
  import jwt
@@ -93,7 +94,7 @@ def get_installation_token(installation_id):
93
 
94
  # print(get_installation_token(69452220))
95
 
96
- def fetch_repo_files(owner: str, repo: str, ref: str = "main") -> List[str]:
97
  """
98
  Lists all files in the repository by recursively fetching the Git tree from GitHub API.
99
  Returns a list of file paths.
@@ -105,7 +106,8 @@ def fetch_repo_files(owner: str, repo: str, ref: str = "main") -> List[str]:
105
  "Authorization": f"Bearer {token}",
106
  "Accept": "application/vnd.github.v3+json"
107
  }
108
- response = github_request("GET", url, headers=headers)
 
109
  if response.status_code != 200:
110
  raise Exception(f"Failed to list repository files: {response.status_code} {response.text}")
111
 
@@ -115,18 +117,20 @@ def fetch_repo_files(owner: str, repo: str, ref: str = "main") -> List[str]:
115
 
116
  # print(fetch_repo_files("aditi-dsi", "EvalAI-Starters", "master"))
117
 
118
- def fetch_file_content(owner: str, repo: str, path: str, ref: str = "main") -> str:
119
  """
120
  Fetches the content of a file from the GitHub repository.
121
  """
122
  installation_id = get_installation_id(owner, repo)
123
- token = get_installation_token(installation_id)
 
124
  url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}?ref={ref}"
125
  headers = {
126
  "Authorization": f"Bearer {token}",
127
  "Accept": "application/vnd.github.v3+json"
128
  }
129
- response = github_request("GET", url, headers=headers)
 
130
  if response.status_code != 200:
131
  raise Exception(f"Failed to fetch file content {path}: {response.status_code} {response.text}")
132
 
@@ -134,5 +138,4 @@ def fetch_file_content(owner: str, repo: str, path: str, ref: str = "main") -> s
134
  content = base64.b64decode(content_json["content"]).decode("utf-8", errors="ignore")
135
  return content
136
 
137
- # print(fetch_file_content("aditi-dsi", "testing-cryptope", "frontend/src/lib/buildSwap.ts", "main"))
138
-
 
1
+ import asyncio
2
  import base64
3
  from datetime import datetime, timezone, timedelta
4
  import jwt
 
94
 
95
  # print(get_installation_token(69452220))
96
 
97
+ async def fetch_repo_files(owner: str, repo: str, ref: str = "main") -> List[str]:
98
  """
99
  Lists all files in the repository by recursively fetching the Git tree from GitHub API.
100
  Returns a list of file paths.
 
106
  "Authorization": f"Bearer {token}",
107
  "Accept": "application/vnd.github.v3+json"
108
  }
109
+
110
+ response = await asyncio.to_thread(github_request, "GET", url, headers=headers)
111
  if response.status_code != 200:
112
  raise Exception(f"Failed to list repository files: {response.status_code} {response.text}")
113
 
 
117
 
118
  # print(fetch_repo_files("aditi-dsi", "EvalAI-Starters", "master"))
119
 
120
+ async def fetch_file_content(owner: str, repo: str, path: str, ref: str = "main") -> str:
121
  """
122
  Fetches the content of a file from the GitHub repository.
123
  """
124
  installation_id = get_installation_id(owner, repo)
125
+ token = await asyncio.to_thread(get_installation_token, installation_id)
126
+
127
  url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}?ref={ref}"
128
  headers = {
129
  "Authorization": f"Bearer {token}",
130
  "Accept": "application/vnd.github.v3+json"
131
  }
132
+
133
+ response = await asyncio.to_thread(github_request, "GET", url, headers=headers)
134
  if response.status_code != 200:
135
  raise Exception(f"Failed to fetch file content {path}: {response.status_code} {response.text}")
136
 
 
138
  content = base64.b64decode(content_json["content"]).decode("utf-8", errors="ignore")
139
  return content
140
 
141
+ # print(fetch_file_content("aditi-dsi", "testing-cryptope", "frontend/src/lib/buildSwap.ts", "main"))