Spaces:

Agents-MCP-Hackathon
/

OpenSorus

Running

App Files Files Community

halfacupoftea commited on Jun 6

Commit

3960366

1 Parent(s): f03baa2

Make API calls asynchronus

Browse files

Files changed (4) hide show

agent/core.py +1 -1
tools/code_index.py +50 -17
tools/github_tools.py +1 -1
tools/utils.py +10 -7

agent/core.py CHANGED Viewed

@@ -22,7 +22,7 @@ api_key = MISTRAL_API_KEY
 model = "devstral-small-latest"
 client = Mistral(api_key=api_key)
-def run_agent(issue_url: str, branch_name: str = "main") -> str:
     """
     Run the agent workflow on a given GitHub issue URL.
     """

 model = "devstral-small-latest"
 client = Mistral(api_key=api_key)
+async def run_agent(issue_url: str, branch_name: str = "main") -> str:
     """
     Run the agent workflow on a given GitHub issue URL.
     """

tools/code_index.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import numpy as np
 import os
-import re
 from sklearn.metrics.pairwise import cosine_similarity
 import time
 from typing import List, Dict
@@ -74,33 +74,60 @@ def select_relevant_files_semantic(issue_description: str, file_paths: List[str]
 # ''',
 # ['.github/FUNDING.yml', '.github/workflows/process_challenge.yml', '.gitignore', 'README.md', 'annotations/test_annotations_devsplit.json', 'annotations/test_annotations_testsplit.json', 'challenge_config.yaml', 'challenge_data/__init__.py', 'challenge_data/challenge_1/__init__.py', 'challenge_data/challenge_1/main.py', 'evaluation_script/__init__.py', 'evaluation_script/main.py', 'github/challenge_processing_script.py', 'github/config.py', 'github/host_config.json', 'github/requirements.txt', 'github/utils.py', 'logo.jpg', 'remote_challenge_evaluation/README.md', 'remote_challenge_evaluation/eval_ai_interface.py', 'remote_challenge_evaluation/evaluate.py', 'remote_challenge_evaluation/main.py', 'remote_challenge_evaluation/requirements.txt', 'run.sh', 'submission.json', 'templates/challenge_phase_1_description.html', 'templates/challenge_phase_2_description.html', 'templates/description.html', 'templates/evaluation_details.html', 'templates/submission_guidelines.html', 'templates/terms_and_conditions.html', 'worker/__init__.py', 'worker/run.py']))
-def build_repo_index(owner: str, repo: str, ref: str = "main", issue_description: str = "") -> VectorStoreIndex:
     model_name = "codestral-embed"
     embed_model = MistralAIEmbedding(model_name=model_name, api_key=MISTRAL_API_KEY)
     print(f"[Indexing] Starting to index repository: {owner}/{repo} at ref {ref}...")
-    file_paths = fetch_repo_files(owner, repo, ref)
     if issue_description:
-        file_paths = select_relevant_files_semantic(issue_description, file_paths)
     documents = []
     for path in file_paths:
         _, ext = os.path.splitext(path)
         if ext.lower() not in INCLUDE_FILE_EXTENSIONS:
             continue
         try:
-            content = fetch_file_content(owner, repo, path, ref)
             documents.append(Document(text=content, metadata={"file_path": path}))
             print(f"[Indexing] Added file: {path}")
-            time.sleep(0.1)
         except Exception as e:
             print(f"[Warning] Skipping file {path} due to error: {e}")
-    index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
     print(f"[Indexing] Finished indexing {len(documents)} files.")
     return index
 # print(build_repo_index("aditi-dsi", "EvalAI-Starters", "master",
     # '''
     # 🛠️ Configuration Error: Placeholder values detected in host_config.json
@@ -113,11 +140,12 @@ def build_repo_index(owner: str, repo: str, ref: str = "main", issue_description
     # '''))
-def retrieve_context(owner: str, repo: str, ref: str, issue_description: str) -> List[str]:
     print("Issue Description:", issue_description)
-    index = build_repo_index(owner, repo, ref, issue_description)
     Settings.llm = MistralAI(model="codestral-latest", api_key=MISTRAL_API_KEY)
     Settings.embed_model = MistralAIEmbedding(model_name="codestral-embed", api_key=MISTRAL_API_KEY)
     retriever = index.as_retriever(similarity_top_k=3)
     query_engine = RetrieverQueryEngine(
@@ -127,16 +155,21 @@ def retrieve_context(owner: str, repo: str, ref: str, issue_description: str) ->
             SimilarityPostprocessor(similarity_top_k=3, similarity_cutoff=0.75)
         ],
     )
     query = (
-    f"Please give relevant information from the codebase that highly matches the keywords of this issue and useful for solving or understanding this issue:{issue_description}"
-    "STRICT RULES:\n"
-    "- ONLY use information available in the retriever context.\n"
-    "- DO NOT generate or assume any information outside the given context.\n"
-    f"- ONLY include context that is highly relevant and clearly useful for understanding or solving this issue: {issue_description}\n"
-    "- DO NOT include generic, loosely related, or unrelated content.\n"
     )
-    print("query", query)
-    response = query_engine.query(query)
     print(response)
     return response

+import asyncio
 import numpy as np
 import os
 from sklearn.metrics.pairwise import cosine_similarity
 import time
 from typing import List, Dict
 # ''',
 # ['.github/FUNDING.yml', '.github/workflows/process_challenge.yml', '.gitignore', 'README.md', 'annotations/test_annotations_devsplit.json', 'annotations/test_annotations_testsplit.json', 'challenge_config.yaml', 'challenge_data/__init__.py', 'challenge_data/challenge_1/__init__.py', 'challenge_data/challenge_1/main.py', 'evaluation_script/__init__.py', 'evaluation_script/main.py', 'github/challenge_processing_script.py', 'github/config.py', 'github/host_config.json', 'github/requirements.txt', 'github/utils.py', 'logo.jpg', 'remote_challenge_evaluation/README.md', 'remote_challenge_evaluation/eval_ai_interface.py', 'remote_challenge_evaluation/evaluate.py', 'remote_challenge_evaluation/main.py', 'remote_challenge_evaluation/requirements.txt', 'run.sh', 'submission.json', 'templates/challenge_phase_1_description.html', 'templates/challenge_phase_2_description.html', 'templates/description.html', 'templates/evaluation_details.html', 'templates/submission_guidelines.html', 'templates/terms_and_conditions.html', 'worker/__init__.py', 'worker/run.py']))
+# Assuming these are async now or wrapped appropriately
+# async def fetch_repo_files(...)
+# async def fetch_file_content(...)
+# async def VectorStoreIndex.from_documents(...)
+async def async_retry_on_429(func, *args, max_retries=3, delay=1, **kwargs):
+    for attempt in range(max_retries):
+        try:
+            return await func(*args, **kwargs)
+        except Exception as e:
+            status = getattr(e, 'response', None) and getattr(e.response, 'status_code', None)
+            if status == 429:
+                print(f"[Retry] Rate limit hit while calling {func.__name__}. Attempt {attempt+1}/{max_retries}. Retrying in {delay} seconds...")
+                await asyncio.sleep(delay)
+                delay *= 2
+            else:
+                raise
+async def build_repo_index(owner: str, repo: str, ref: str = "main", issue_description: str = "") -> VectorStoreIndex:
     model_name = "codestral-embed"
     embed_model = MistralAIEmbedding(model_name=model_name, api_key=MISTRAL_API_KEY)
     print(f"[Indexing] Starting to index repository: {owner}/{repo} at ref {ref}...")
+    file_paths = await async_retry_on_429(fetch_repo_files, owner, repo, ref)
     if issue_description:
+        file_paths = select_relevant_files_semantic(issue_description, file_paths)  # stays sync unless heavy
     documents = []
     for path in file_paths:
         _, ext = os.path.splitext(path)
         if ext.lower() not in INCLUDE_FILE_EXTENSIONS:
             continue
         try:
+            content = await async_retry_on_429(fetch_file_content, owner, repo, path, ref)
             documents.append(Document(text=content, metadata={"file_path": path}))
             print(f"[Indexing] Added file: {path}")
+            await asyncio.sleep(0.1)
         except Exception as e:
             print(f"[Warning] Skipping file {path} due to error: {e}")
+    try:
+        index = await async_retry_on_429(VectorStoreIndex.from_documents, documents, embed_model=embed_model)
+    except Exception as e:
+        print(f"[Error] Failed to build index due to: {e}")
+        raise
     print(f"[Indexing] Finished indexing {len(documents)} files.")
     return index
 # print(build_repo_index("aditi-dsi", "EvalAI-Starters", "master",
     # '''
     # 🛠️ Configuration Error: Placeholder values detected in host_config.json
     # '''))
+async def retrieve_context(owner: str, repo: str, ref: str, issue_description: str) -> List[str]:
     print("Issue Description:", issue_description)
+    index = await build_repo_index(owner, repo, ref, issue_description)
     Settings.llm = MistralAI(model="codestral-latest", api_key=MISTRAL_API_KEY)
     Settings.embed_model = MistralAIEmbedding(model_name="codestral-embed", api_key=MISTRAL_API_KEY)
     retriever = index.as_retriever(similarity_top_k=3)
     query_engine = RetrieverQueryEngine(
             SimilarityPostprocessor(similarity_top_k=3, similarity_cutoff=0.75)
         ],
     )
     query = (
+        f"Please give relevant information from the codebase that highly matches the keywords of this issue and is useful for solving or understanding this issue: {issue_description}\n"
+        "STRICT RULES:\n"
+        "- ONLY use information available in the retriever context.\n"
+        "- DO NOT generate or assume any information outside the given context.\n"
+        f"- ONLY include context that is highly relevant and clearly useful for understanding or solving this issue: {issue_description}\n"
+        "- DO NOT include generic, loosely related, or unrelated content.\n"
     )
+    print("Query:", query)
+    # If query_engine.query is sync, wrap it in a thread
+    response = await asyncio.to_thread(query_engine.query, query)
     print(response)
     return response

tools/github_tools.py CHANGED Viewed

@@ -44,4 +44,4 @@ def post_comment(owner, repo, issue_num, comment_body):
     else:
         raise Exception(f"Failed to post comment: {response.status_code} {response.text}")
-# print(post_comment("aditi-dsi", "testing-cryptope", "3", "This is a test comment from OpenSorus."))

     else:
         raise Exception(f"Failed to post comment: {response.status_code} {response.text}")
+# print(post_comment("aditi-dsi", "testing-cryptope", "3", "This is a test comment from OpenSorus."))

tools/utils.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import base64
 from datetime import datetime, timezone, timedelta
 import jwt
@@ -93,7 +94,7 @@ def get_installation_token(installation_id):
 # print(get_installation_token(69452220))
-def fetch_repo_files(owner: str, repo: str, ref: str = "main") -> List[str]:
     """
     Lists all files in the repository by recursively fetching the Git tree from GitHub API.
     Returns a list of file paths.
@@ -105,7 +106,8 @@ def fetch_repo_files(owner: str, repo: str, ref: str = "main") -> List[str]:
         "Authorization": f"Bearer {token}",
         "Accept": "application/vnd.github.v3+json"
     }
-    response = github_request("GET", url, headers=headers)
     if response.status_code != 200:
         raise Exception(f"Failed to list repository files: {response.status_code} {response.text}")
@@ -115,18 +117,20 @@ def fetch_repo_files(owner: str, repo: str, ref: str = "main") -> List[str]:
 # print(fetch_repo_files("aditi-dsi", "EvalAI-Starters", "master"))
-def fetch_file_content(owner: str, repo: str, path: str, ref: str = "main") -> str:
     """
     Fetches the content of a file from the GitHub repository.
     """
     installation_id = get_installation_id(owner, repo)
-    token = get_installation_token(installation_id)
     url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}?ref={ref}"
     headers = {
         "Authorization": f"Bearer {token}",
         "Accept": "application/vnd.github.v3+json"
     }
-    response = github_request("GET", url, headers=headers)
     if response.status_code != 200:
         raise Exception(f"Failed to fetch file content {path}: {response.status_code} {response.text}")
@@ -134,5 +138,4 @@ def fetch_file_content(owner: str, repo: str, path: str, ref: str = "main") -> s
     content = base64.b64decode(content_json["content"]).decode("utf-8", errors="ignore")
     return content
-# print(fetch_file_content("aditi-dsi", "testing-cryptope", "frontend/src/lib/buildSwap.ts", "main"))

+import asyncio
 import base64
 from datetime import datetime, timezone, timedelta
 import jwt
 # print(get_installation_token(69452220))
+async def fetch_repo_files(owner: str, repo: str, ref: str = "main") -> List[str]:
     """
     Lists all files in the repository by recursively fetching the Git tree from GitHub API.
     Returns a list of file paths.
         "Authorization": f"Bearer {token}",
         "Accept": "application/vnd.github.v3+json"
     }
+    response = await asyncio.to_thread(github_request, "GET", url, headers=headers)
     if response.status_code != 200:
         raise Exception(f"Failed to list repository files: {response.status_code} {response.text}")
 # print(fetch_repo_files("aditi-dsi", "EvalAI-Starters", "master"))
+async def fetch_file_content(owner: str, repo: str, path: str, ref: str = "main") -> str:
     """
     Fetches the content of a file from the GitHub repository.
     """
     installation_id = get_installation_id(owner, repo)
+    token = await asyncio.to_thread(get_installation_token, installation_id)
     url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}?ref={ref}"
     headers = {
         "Authorization": f"Bearer {token}",
         "Accept": "application/vnd.github.v3+json"
     }
+    response = await asyncio.to_thread(github_request, "GET", url, headers=headers)
     if response.status_code != 200:
         raise Exception(f"Failed to fetch file content {path}: {response.status_code} {response.text}")
     content = base64.b64decode(content_json["content"]).decode("utf-8", errors="ignore")
     return content
+# print(fetch_file_content("aditi-dsi", "testing-cryptope", "frontend/src/lib/buildSwap.ts", "main"))