Commit
·
f8147c5
0
Parent(s):
Initial commit
Browse files- .gitignore +111 -0
- agent/code_index.py +233 -0
- agent/function_calling.py +210 -0
- agent/mistral.py +198 -0
- config.py +8 -0
.gitignore
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
*.egg-info/
|
24 |
+
.installed.cfg
|
25 |
+
*.egg
|
26 |
+
|
27 |
+
# PyInstaller
|
28 |
+
*.manifest
|
29 |
+
*.spec
|
30 |
+
|
31 |
+
# Installer logs
|
32 |
+
pip-log.txt
|
33 |
+
pip-delete-this-directory.txt
|
34 |
+
|
35 |
+
# Unit test / coverage reports
|
36 |
+
htmlcov/
|
37 |
+
.tox/
|
38 |
+
.nox/
|
39 |
+
.coverage
|
40 |
+
.coverage.*
|
41 |
+
.cache
|
42 |
+
nosetests.xml
|
43 |
+
coverage.xml
|
44 |
+
*.cover
|
45 |
+
*.py,cover
|
46 |
+
.hypothesis/
|
47 |
+
.pytest_cache/
|
48 |
+
|
49 |
+
# Translations
|
50 |
+
*.mo
|
51 |
+
*.pot
|
52 |
+
|
53 |
+
# Django stuff:
|
54 |
+
*.log
|
55 |
+
local_settings.py
|
56 |
+
db.sqlite3
|
57 |
+
|
58 |
+
# Flask stuff:
|
59 |
+
instance/
|
60 |
+
.webassets-cache
|
61 |
+
|
62 |
+
# Scrapy stuff:
|
63 |
+
.scrapy
|
64 |
+
|
65 |
+
# Sphinx documentation
|
66 |
+
docs/_build/
|
67 |
+
|
68 |
+
# PyBuilder
|
69 |
+
target/
|
70 |
+
|
71 |
+
# Jupyter Notebook
|
72 |
+
.ipynb_checkpoints
|
73 |
+
|
74 |
+
# IPython
|
75 |
+
profile_default/
|
76 |
+
ipython_config.py
|
77 |
+
|
78 |
+
# pyenv
|
79 |
+
.python-version
|
80 |
+
|
81 |
+
# pipenv
|
82 |
+
pipenv.lock
|
83 |
+
|
84 |
+
# Poetry
|
85 |
+
poetry.lock
|
86 |
+
|
87 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
88 |
+
__pypackages__/
|
89 |
+
|
90 |
+
# dotenv environment variables file
|
91 |
+
.env
|
92 |
+
.env.* # e.g. .env.local, .env.production
|
93 |
+
|
94 |
+
# virtual environments
|
95 |
+
venv/
|
96 |
+
ENV/
|
97 |
+
env/
|
98 |
+
env.bak/
|
99 |
+
venv.bak/
|
100 |
+
|
101 |
+
# VS Code settings
|
102 |
+
.vscode/
|
103 |
+
|
104 |
+
# PyCharm
|
105 |
+
.idea/
|
106 |
+
|
107 |
+
# macOS system files
|
108 |
+
.DS_Store
|
109 |
+
|
110 |
+
# Windows system files
|
111 |
+
Thumbs.db
|
agent/code_index.py
ADDED
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import os
|
3 |
+
import re
|
4 |
+
import time
|
5 |
+
from typing import List, Dict
|
6 |
+
from llama_index.core import VectorStoreIndex, Document, Settings, get_response_synthesizer
|
7 |
+
from llama_index.core.query_engine import RetrieverQueryEngine
|
8 |
+
from llama_index.core.postprocessor import SimilarityPostprocessor
|
9 |
+
from llama_index.embeddings.mistralai import MistralAIEmbedding
|
10 |
+
from llama_index.llms.mistralai import MistralAI
|
11 |
+
from mistralai import Mistral
|
12 |
+
from agent.function_calling import github_request, get_installation_id, get_installation_token
|
13 |
+
from config import MISTRAL_API_KEY
|
14 |
+
|
15 |
+
repo_indices_cache: Dict[str, VectorStoreIndex] = {}
|
16 |
+
INCLUDE_FILE_EXTENSIONS = {".py", ".js", ".ts", ".json", ".md", ".txt"}
|
17 |
+
|
18 |
+
def fetch_repo_files(owner: str, repo: str, ref: str = "main") -> List[str]:
|
19 |
+
"""
|
20 |
+
Lists all files in the repository by recursively fetching the Git tree from GitHub API.
|
21 |
+
Returns a list of file paths.
|
22 |
+
"""
|
23 |
+
installation_id = get_installation_id(owner, repo)
|
24 |
+
token = get_installation_token(installation_id)
|
25 |
+
url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{ref}?recursive=1"
|
26 |
+
headers = {
|
27 |
+
"Authorization": f"Bearer {token}",
|
28 |
+
"Accept": "application/vnd.github.v3+json"
|
29 |
+
}
|
30 |
+
response = github_request("GET", url, headers=headers)
|
31 |
+
if response.status_code != 200:
|
32 |
+
raise Exception(f"Failed to list repository files: {response.status_code} {response.text}")
|
33 |
+
|
34 |
+
tree = response.json().get("tree", [])
|
35 |
+
file_paths = [item["path"] for item in tree if item["type"] == "blob"]
|
36 |
+
return file_paths
|
37 |
+
|
38 |
+
# print(fetch_repo_files("aditi-dsi", "EvalAI-Starters", "master"))
|
39 |
+
|
40 |
+
def fetch_file_content(owner: str, repo: str, path: str, ref: str = "main") -> str:
|
41 |
+
"""
|
42 |
+
Fetches the content of a file from the GitHub repository.
|
43 |
+
"""
|
44 |
+
installation_id = get_installation_id(owner, repo)
|
45 |
+
token = get_installation_token(installation_id)
|
46 |
+
url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}?ref={ref}"
|
47 |
+
headers = {
|
48 |
+
"Authorization": f"Bearer {token}",
|
49 |
+
"Accept": "application/vnd.github.v3+json"
|
50 |
+
}
|
51 |
+
response = github_request("GET", url, headers=headers)
|
52 |
+
if response.status_code != 200:
|
53 |
+
raise Exception(f"Failed to fetch file content {path}: {response.status_code} {response.text}")
|
54 |
+
|
55 |
+
content_json = response.json()
|
56 |
+
content = base64.b64decode(content_json["content"]).decode("utf-8", errors="ignore")
|
57 |
+
return content
|
58 |
+
|
59 |
+
# print(fetch_file_content("aditi-dsi", "testing-cryptope", "frontend/src/lib/buildSwap.ts", "main"))
|
60 |
+
|
61 |
+
def clean_line(line: str) -> str:
|
62 |
+
line = re.sub(r'^\s*\d+[\.\)]\s*', '', line)
|
63 |
+
line = line.strip(' `"\'')
|
64 |
+
|
65 |
+
return line.strip()
|
66 |
+
|
67 |
+
def select_relevant_files_mistral(issue_description: str, file_paths: List[str]) -> List[str]:
|
68 |
+
|
69 |
+
model = "devstral-small-latest"
|
70 |
+
client = Mistral(api_key=MISTRAL_API_KEY)
|
71 |
+
|
72 |
+
system_prompt = '''
|
73 |
+
You are a code reasoning assistant. Given a GitHub issue description and a list of file paths from a codebase, return a list of top 5 files that are most relevant to solving or understanding the issue, based on naming, possible associations, or inferred logic.
|
74 |
+
|
75 |
+
DO NOT RETURN ANYTHING ELSE.
|
76 |
+
DO NOT RETURN ANY ADDITIONAL INFORMATION OR EXPLANATIONS.
|
77 |
+
ONLY RETURN THE FILE PATHS, ONE PER LINE, WITHOUT ANY ADDITIONAL TEXT OR FORMATTING.
|
78 |
+
DO NOT HALLUCINATE.
|
79 |
+
'''
|
80 |
+
user_prompt = f"""Issue:
|
81 |
+
{issue_description}
|
82 |
+
|
83 |
+
Files:
|
84 |
+
{chr(10).join(file_paths)}
|
85 |
+
|
86 |
+
Return the list of most relevant files (only exact paths)."""
|
87 |
+
|
88 |
+
response = client.chat.complete(
|
89 |
+
model=model,
|
90 |
+
messages=[
|
91 |
+
{"role": "system", "content": system_prompt},
|
92 |
+
{"role": "user", "content": user_prompt},
|
93 |
+
],
|
94 |
+
)
|
95 |
+
|
96 |
+
reply = response.choices[0].message.content if hasattr(response.choices[0].message, "content") else str(response.choices[0].message)
|
97 |
+
|
98 |
+
lines = [line.strip() for line in reply.strip().splitlines()]
|
99 |
+
relevant_files = []
|
100 |
+
|
101 |
+
for line in lines:
|
102 |
+
cleaned = clean_line(line)
|
103 |
+
if cleaned in file_paths:
|
104 |
+
relevant_files.append(cleaned)
|
105 |
+
# else:
|
106 |
+
# print(f"[Warning] Ignored unexpected line from LLM response: {line}")
|
107 |
+
|
108 |
+
if not relevant_files:
|
109 |
+
print("[Info] No valid file paths found in LLM response, defaulting to all files.")
|
110 |
+
return file_paths
|
111 |
+
else:
|
112 |
+
# print("RELEVANT files selected by LLM:")
|
113 |
+
return relevant_files
|
114 |
+
|
115 |
+
# print(select_relevant_files_mistral('''
|
116 |
+
# 🛠️ Configuration Error: Placeholder values detected in host_config.json
|
117 |
+
# This file still includes default placeholders like:
|
118 |
+
|
119 |
+
# <evalai_user_auth_token>
|
120 |
+
# <host_team_pk>
|
121 |
+
# <evalai_host_url>
|
122 |
+
# Please replace them with real values to proceed.
|
123 |
+
# ''',
|
124 |
+
# ['.github/FUNDING.yml', '.github/workflows/process_challenge.yml', '.gitignore', 'README.md', 'annotations/test_annotations_devsplit.json', 'annotations/test_annotations_testsplit.json', 'challenge_config.yaml', 'challenge_data/__init__.py', 'challenge_data/challenge_1/__init__.py', 'challenge_data/challenge_1/main.py', 'evaluation_script/__init__.py', 'evaluation_script/main.py', 'github/challenge_processing_script.py', 'github/config.py', 'github/host_config.json', 'github/requirements.txt', 'github/utils.py', 'logo.jpg', 'remote_challenge_evaluation/README.md', 'remote_challenge_evaluation/eval_ai_interface.py', 'remote_challenge_evaluation/evaluate.py', 'remote_challenge_evaluation/main.py', 'remote_challenge_evaluation/requirements.txt', 'run.sh', 'submission.json', 'templates/challenge_phase_1_description.html', 'templates/challenge_phase_2_description.html', 'templates/description.html', 'templates/evaluation_details.html', 'templates/submission_guidelines.html', 'templates/terms_and_conditions.html', 'worker/__init__.py', 'worker/run.py']))
|
125 |
+
|
126 |
+
def build_repo_index(owner: str, repo: str, ref: str = "main", issue_description: str = "") -> VectorStoreIndex:
|
127 |
+
model_name = "codestral-embed"
|
128 |
+
embed_model = MistralAIEmbedding(model_name=model_name, api_key=MISTRAL_API_KEY)
|
129 |
+
print(f"[Indexing] Starting to index repository: {owner}/{repo} at ref {ref}...")
|
130 |
+
file_paths = fetch_repo_files(owner, repo, ref)
|
131 |
+
|
132 |
+
if issue_description:
|
133 |
+
file_paths = select_relevant_files_mistral(issue_description, file_paths)
|
134 |
+
|
135 |
+
documents = []
|
136 |
+
for path in file_paths:
|
137 |
+
_, ext = os.path.splitext(path)
|
138 |
+
if ext.lower() not in INCLUDE_FILE_EXTENSIONS:
|
139 |
+
continue
|
140 |
+
|
141 |
+
try:
|
142 |
+
content = fetch_file_content(owner, repo, path, ref)
|
143 |
+
documents.append(Document(text=content, metadata={"file_path": path}))
|
144 |
+
print(f"[Indexing] Added file: {path}")
|
145 |
+
time.sleep(0.1)
|
146 |
+
except Exception as e:
|
147 |
+
print(f"[Warning] Skipping file {path} due to error: {e}")
|
148 |
+
|
149 |
+
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
|
150 |
+
print(f"[Indexing] Finished indexing {len(documents)} files.")
|
151 |
+
return index
|
152 |
+
|
153 |
+
# print(build_repo_index("aditi-dsi", "EvalAI-Starters", "master",
|
154 |
+
# '''
|
155 |
+
# 🛠️ Configuration Error: Placeholder values detected in host_config.json
|
156 |
+
# This file still includes default placeholders like:
|
157 |
+
|
158 |
+
# <evalai_user_auth_token>
|
159 |
+
# <host_team_pk>
|
160 |
+
# <evalai_host_url>
|
161 |
+
# Please replace them with real values to proceed.
|
162 |
+
# '''))
|
163 |
+
|
164 |
+
|
165 |
+
def get_repo_index(owner: str, repo: str, ref: str, issue_description: str) -> VectorStoreIndex:
|
166 |
+
cache_key = f"{owner}/{repo}:{hash(issue_description)}"
|
167 |
+
if cache_key in repo_indices_cache:
|
168 |
+
print(f"[Cache] Returning cached index for {cache_key}")
|
169 |
+
return repo_indices_cache[cache_key]
|
170 |
+
|
171 |
+
index = build_repo_index(owner, repo, ref, issue_description)
|
172 |
+
repo_indices_cache[cache_key] = index
|
173 |
+
return index
|
174 |
+
|
175 |
+
|
176 |
+
# print(get_repo_index("aditi-dsi", "EvalAI-Starters", "master",
|
177 |
+
# '''
|
178 |
+
# 🛠️ Configuration Error: Placeholder values detected in host_config.json
|
179 |
+
# This file still includes default placeholders like:
|
180 |
+
|
181 |
+
# <evalai_user_auth_token>
|
182 |
+
# <host_team_pk>
|
183 |
+
# <evalai_host_url>
|
184 |
+
# Please replace them with real values to proceed.
|
185 |
+
# '''))
|
186 |
+
|
187 |
+
|
188 |
+
def retrieve_context(owner: str, repo: str, ref: str, issue_description: str) -> List[str]:
|
189 |
+
index = get_repo_index(owner, repo, ref, issue_description)
|
190 |
+
Settings.llm = MistralAI(model="codestral-latest", api_key=MISTRAL_API_KEY)
|
191 |
+
Settings.embed_model = MistralAIEmbedding(model_name="codestral-embed", api_key=MISTRAL_API_KEY)
|
192 |
+
retriever = index.as_retriever(similarity_top_k=5)
|
193 |
+
query_engine = RetrieverQueryEngine(
|
194 |
+
retriever=retriever,
|
195 |
+
response_synthesizer=get_response_synthesizer(),
|
196 |
+
node_postprocessors=[SimilarityPostprocessor(similarity_top_k=5)],
|
197 |
+
)
|
198 |
+
query = f"Please give relevant information from the codebase about that can help to solve or understand this issue:{issue_description}"
|
199 |
+
response = query_engine.query(query)
|
200 |
+
print(response)
|
201 |
+
return None
|
202 |
+
|
203 |
+
# index_tools = [
|
204 |
+
# {
|
205 |
+
# "type": "function",
|
206 |
+
# "function": {
|
207 |
+
# "name": "retrieve_context",
|
208 |
+
# "description": "Fetch relevant context from codebase for a GitHub issue",
|
209 |
+
# "parameters": {
|
210 |
+
# "type": "object",
|
211 |
+
# "properties": {
|
212 |
+
# "owner": {
|
213 |
+
# "type": "string",
|
214 |
+
# "description": "The owner of the repository."
|
215 |
+
# },
|
216 |
+
# "repo": {
|
217 |
+
# "type": "string",
|
218 |
+
# "description": "The name of the repository."
|
219 |
+
# },
|
220 |
+
# "ref": {
|
221 |
+
# "type": "string",
|
222 |
+
# "description": "The branch or commit reference to index from."
|
223 |
+
# },
|
224 |
+
# "issue_description": {
|
225 |
+
# "type": "string",
|
226 |
+
# "description": "The description of the issue to retrieve context for."
|
227 |
+
# }
|
228 |
+
# },
|
229 |
+
# "required": ["owner", "repo", "ref", "issue_description"]
|
230 |
+
# },
|
231 |
+
# },
|
232 |
+
# },
|
233 |
+
# ]
|
agent/function_calling.py
ADDED
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from urllib.parse import urlparse
|
3 |
+
from config import APP_ID, APP_PRIVATE_KEY
|
4 |
+
import time
|
5 |
+
import jwt
|
6 |
+
from datetime import datetime, timezone, timedelta
|
7 |
+
import threading
|
8 |
+
|
9 |
+
installation_tokens = {}
|
10 |
+
token_lock = threading.Lock()
|
11 |
+
|
12 |
+
def generate_jwt():
|
13 |
+
"""Generate a JWT signed with GitHub App private key."""
|
14 |
+
now = int(time.time())
|
15 |
+
payload = {
|
16 |
+
"iat": now,
|
17 |
+
"exp": now + (10 * 60),
|
18 |
+
"iss": APP_ID,
|
19 |
+
}
|
20 |
+
encoded_jwt = jwt.encode(payload, APP_PRIVATE_KEY, algorithm="RS256")
|
21 |
+
return encoded_jwt
|
22 |
+
|
23 |
+
|
24 |
+
def github_request(method, url, headers=None, **kwargs):
|
25 |
+
if headers is None:
|
26 |
+
jwt_token = generate_jwt()
|
27 |
+
headers = {
|
28 |
+
"Authorization": f"Bearer {jwt_token}",
|
29 |
+
"Accept": "application/vnd.github.v3+json",
|
30 |
+
}
|
31 |
+
while True:
|
32 |
+
response = requests.request(method, url, headers=headers, **kwargs)
|
33 |
+
|
34 |
+
remaining = response.headers.get("X-RateLimit-Remaining")
|
35 |
+
reset_time = response.headers.get("X-RateLimit-Reset")
|
36 |
+
|
37 |
+
if remaining is None or reset_time is None:
|
38 |
+
return response
|
39 |
+
|
40 |
+
remaining = int(remaining)
|
41 |
+
reset_time = int(reset_time)
|
42 |
+
|
43 |
+
print(f"[GitHub] Remaining: {remaining}, Reset: {reset_time}")
|
44 |
+
|
45 |
+
if response.status_code == 403 and "rate limit" in response.text.lower():
|
46 |
+
wait = reset_time - int(time.time()) + 5
|
47 |
+
print(f"Hit rate limit. Sleeping for {wait} seconds.")
|
48 |
+
time.sleep(max(wait, 0))
|
49 |
+
continue
|
50 |
+
if remaining <= 2:
|
51 |
+
wait = reset_time - int(time.time()) + 5
|
52 |
+
print(f"Approaching rate limit ({remaining} left). Sleeping for {wait} seconds.")
|
53 |
+
time.sleep(max(wait, 0))
|
54 |
+
continue
|
55 |
+
|
56 |
+
return response
|
57 |
+
|
58 |
+
|
59 |
+
def get_installation_id(owner, repo):
|
60 |
+
"""Fetch the installation ID for the app on a repo."""
|
61 |
+
url = f"https://api.github.com/repos/{owner}/{repo}/installation"
|
62 |
+
response = github_request("GET", url)
|
63 |
+
if response.status_code == 200:
|
64 |
+
data = response.json()
|
65 |
+
return data["id"]
|
66 |
+
else:
|
67 |
+
raise Exception(f"Failed to get installation ID for {owner}/{repo}: {response.status_code} {response.text}")
|
68 |
+
|
69 |
+
# print(get_installation_id("aditi-dsi", "testing-cryptope"))
|
70 |
+
|
71 |
+
|
72 |
+
def get_installation_token(installation_id):
|
73 |
+
"""Return a valid installation token, fetch new if expired or missing."""
|
74 |
+
with token_lock:
|
75 |
+
token_info = installation_tokens.get(installation_id)
|
76 |
+
if token_info and token_info["expires_at"] > datetime.now(timezone.utc) + timedelta(seconds=30):
|
77 |
+
return token_info["token"]
|
78 |
+
|
79 |
+
url = f"https://api.github.com/app/installations/{installation_id}/access_tokens"
|
80 |
+
response = github_request("POST", url)
|
81 |
+
if response.status_code != 201:
|
82 |
+
raise Exception(f"Failed to fetch installation token: {response.status_code} {response.text}")
|
83 |
+
|
84 |
+
token_data = response.json()
|
85 |
+
token = token_data["token"]
|
86 |
+
expires_at = datetime.strptime(token_data["expires_at"], "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
|
87 |
+
|
88 |
+
installation_tokens[installation_id] = {"token": token, "expires_at": expires_at}
|
89 |
+
return token
|
90 |
+
|
91 |
+
# print(get_installation_token(69452220))
|
92 |
+
|
93 |
+
def fetch_github_issue(issue_url):
|
94 |
+
parsed = urlparse(issue_url)
|
95 |
+
path_parts = parsed.path.strip('/').split('/')
|
96 |
+
if len(path_parts) >= 4 and path_parts[2] == 'issues':
|
97 |
+
owner = path_parts[0]
|
98 |
+
repo = path_parts[1]
|
99 |
+
issue_num = path_parts[3]
|
100 |
+
return owner, repo, issue_num
|
101 |
+
else:
|
102 |
+
raise ValueError("Invalid GitHub Issue URL")
|
103 |
+
|
104 |
+
|
105 |
+
def get_issue_details(owner, repo, issue_num):
|
106 |
+
installation_id = get_installation_id(owner, repo)
|
107 |
+
token = get_installation_token(installation_id)
|
108 |
+
url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_num}"
|
109 |
+
headers = {
|
110 |
+
"Authorization": f"Bearer {token}",
|
111 |
+
"Accept": "application/vnd.github.v3+json"
|
112 |
+
}
|
113 |
+
response = github_request("GET", url, headers=headers)
|
114 |
+
if response.status_code == 200:
|
115 |
+
return response.json()
|
116 |
+
else:
|
117 |
+
raise Exception(f"Failed to fetch issue: {response.status_code} {response.text}")
|
118 |
+
|
119 |
+
# print(get_issue_details("aditi-dsi", "testing-cryptope", "3"))
|
120 |
+
|
121 |
+
def post_comment(owner, repo, issue_num, comment_body):
|
122 |
+
installation_id = get_installation_id(owner, repo)
|
123 |
+
token = get_installation_token(installation_id)
|
124 |
+
url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_num}/comments"
|
125 |
+
headers = {
|
126 |
+
"Authorization": f"Bearer {token}",
|
127 |
+
"Accept": "application/vnd.github.v3+json"
|
128 |
+
}
|
129 |
+
data = {"body": comment_body}
|
130 |
+
response = github_request("POST", url, headers=headers, json=data)
|
131 |
+
if response.status_code == 201:
|
132 |
+
return response.json()
|
133 |
+
else:
|
134 |
+
raise Exception(f"Failed to post comment: {response.status_code} {response.text}")
|
135 |
+
|
136 |
+
# print(post_comment("aditi-dsi", "testing-cryptope", "3", "This is a test comment from OpenSorus."))
|
137 |
+
|
138 |
+
# tools = [
|
139 |
+
# {
|
140 |
+
# "type": "function",
|
141 |
+
# "function": {
|
142 |
+
# "name": "fetch_github_issue",
|
143 |
+
# "description": "Fetch GitHub issue details",
|
144 |
+
# "parameters": {
|
145 |
+
# "type": "object",
|
146 |
+
# "properties": {
|
147 |
+
# "issue_url": {
|
148 |
+
# "type": "string",
|
149 |
+
# "description": "The full URL of the GitHub issue"
|
150 |
+
# }
|
151 |
+
# },
|
152 |
+
# "required": ["issue_url"]
|
153 |
+
# },
|
154 |
+
# },
|
155 |
+
# },
|
156 |
+
# {
|
157 |
+
# "type": "function",
|
158 |
+
# "function": {
|
159 |
+
# "name": "get_issue_details",
|
160 |
+
# "description": "Get details of a GitHub issue",
|
161 |
+
# "parameters": {
|
162 |
+
# "type": "object",
|
163 |
+
# "properties": {
|
164 |
+
# "owner": {
|
165 |
+
# "type": "string",
|
166 |
+
# "description": "The owner of the repository."
|
167 |
+
# },
|
168 |
+
# "repo": {
|
169 |
+
# "type": "string",
|
170 |
+
# "description": "The name of the repository."
|
171 |
+
# },
|
172 |
+
# "issue_num": {
|
173 |
+
# "type": "string",
|
174 |
+
# "description": "The issue number."
|
175 |
+
# }
|
176 |
+
# },
|
177 |
+
# "required": ["owner", "repo", "issue_num"],
|
178 |
+
# },
|
179 |
+
# },
|
180 |
+
# },
|
181 |
+
# {
|
182 |
+
# "type": "function",
|
183 |
+
# "function": {
|
184 |
+
# "name": "post_comment",
|
185 |
+
# "description": "Post a comment on a GitHub issue",
|
186 |
+
# "parameters": {
|
187 |
+
# "type": "object",
|
188 |
+
# "properties": {
|
189 |
+
# "owner": {
|
190 |
+
# "type": "string",
|
191 |
+
# "description": "The owner of the repository."
|
192 |
+
# },
|
193 |
+
# "repo": {
|
194 |
+
# "type": "string",
|
195 |
+
# "description": "The name of the repository."
|
196 |
+
# },
|
197 |
+
# "issue_num": {
|
198 |
+
# "type": "string",
|
199 |
+
# "description": "The issue number."
|
200 |
+
# },
|
201 |
+
# "comment_body": {
|
202 |
+
# "type": "string",
|
203 |
+
# "description": "The body of the comment."
|
204 |
+
# }
|
205 |
+
# },
|
206 |
+
# "required": ["owner", "repo", "issue_num", "comment_body"],
|
207 |
+
# },
|
208 |
+
# },
|
209 |
+
# },
|
210 |
+
# ]
|
agent/mistral.py
ADDED
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from mistralai import Mistral
|
3 |
+
from agent.function_calling import fetch_github_issue, get_issue_details, post_comment
|
4 |
+
from agent.code_index import retrieve_context
|
5 |
+
from config import MISTRAL_API_KEY
|
6 |
+
|
7 |
+
tools = [
|
8 |
+
{
|
9 |
+
"type": "function",
|
10 |
+
"function": {
|
11 |
+
"name": "fetch_github_issue",
|
12 |
+
"description": "Fetch GitHub issue details",
|
13 |
+
"parameters": {
|
14 |
+
"type": "object",
|
15 |
+
"properties": {
|
16 |
+
"issue_url": {
|
17 |
+
"type": "string",
|
18 |
+
"description": "The full URL of the GitHub issue"
|
19 |
+
}
|
20 |
+
},
|
21 |
+
"required": ["issue_url"]
|
22 |
+
},
|
23 |
+
},
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"type": "function",
|
27 |
+
"function": {
|
28 |
+
"name": "get_issue_details",
|
29 |
+
"description": "Get details of a GitHub issue",
|
30 |
+
"parameters": {
|
31 |
+
"type": "object",
|
32 |
+
"properties": {
|
33 |
+
"owner": {
|
34 |
+
"type": "string",
|
35 |
+
"description": "The owner of the repository."
|
36 |
+
},
|
37 |
+
"repo": {
|
38 |
+
"type": "string",
|
39 |
+
"description": "The name of the repository."
|
40 |
+
},
|
41 |
+
"issue_num": {
|
42 |
+
"type": "string",
|
43 |
+
"description": "The issue number."
|
44 |
+
}
|
45 |
+
},
|
46 |
+
"required": ["owner", "repo", "issue_num"],
|
47 |
+
},
|
48 |
+
},
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"type": "function",
|
52 |
+
"function": {
|
53 |
+
"name": "retrieve_context",
|
54 |
+
"description": "Fetch relevant context from codebase for a GitHub issue",
|
55 |
+
"parameters": {
|
56 |
+
"type": "object",
|
57 |
+
"properties": {
|
58 |
+
"owner": {
|
59 |
+
"type": "string",
|
60 |
+
"description": "The owner of the repository."
|
61 |
+
},
|
62 |
+
"repo": {
|
63 |
+
"type": "string",
|
64 |
+
"description": "The name of the repository."
|
65 |
+
},
|
66 |
+
"ref": {
|
67 |
+
"type": "string",
|
68 |
+
"description": "The branch reference from either master or main to index from."
|
69 |
+
},
|
70 |
+
"issue_description": {
|
71 |
+
"type": "string",
|
72 |
+
"description": "The description of the issue to retrieve context for."
|
73 |
+
}
|
74 |
+
},
|
75 |
+
"required": ["owner", "repo", "ref", "issue_description"]
|
76 |
+
},
|
77 |
+
},
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"type": "function",
|
81 |
+
"function": {
|
82 |
+
"name": "post_comment",
|
83 |
+
"description": "Post a comment on a GitHub issue",
|
84 |
+
"parameters": {
|
85 |
+
"type": "object",
|
86 |
+
"properties": {
|
87 |
+
"owner": {
|
88 |
+
"type": "string",
|
89 |
+
"description": "The owner of the repository."
|
90 |
+
},
|
91 |
+
"repo": {
|
92 |
+
"type": "string",
|
93 |
+
"description": "The name of the repository."
|
94 |
+
},
|
95 |
+
"issue_num": {
|
96 |
+
"type": "string",
|
97 |
+
"description": "The issue number."
|
98 |
+
},
|
99 |
+
"comment_body": {
|
100 |
+
"type": "string",
|
101 |
+
"description": "The body of the comment."
|
102 |
+
}
|
103 |
+
},
|
104 |
+
"required": ["owner", "repo", "issue_num", "comment_body"],
|
105 |
+
},
|
106 |
+
},
|
107 |
+
},
|
108 |
+
]
|
109 |
+
|
110 |
+
names_to_functions = {
|
111 |
+
"fetch_github_issue": fetch_github_issue,
|
112 |
+
"get_issue_details": get_issue_details,
|
113 |
+
"retrieve_context": retrieve_context,
|
114 |
+
"post_comment": post_comment,
|
115 |
+
}
|
116 |
+
|
117 |
+
allowed_tools = set(names_to_functions.keys())
|
118 |
+
|
119 |
+
system_message = {
|
120 |
+
"role": "system",
|
121 |
+
"content": (
|
122 |
+
"You are a senior developer assistant bot for GitHub issues.\n\n"
|
123 |
+
|
124 |
+
"Your job is to respond to GitHub issues **professionally** and **helpfully**, but never repeat the issue description verbatim.\n\n"
|
125 |
+
"First, classify the issue as one of the following:\n"
|
126 |
+
"- Bug report\n"
|
127 |
+
"- Implementation question\n"
|
128 |
+
"- Feature request\n"
|
129 |
+
"- Incomplete or unclear\n\n"
|
130 |
+
|
131 |
+
"Then, based on the classification, write a clear, concise, and friendly response.\n\n"
|
132 |
+
"The comment should be well formatted and readable, using Markdown for code blocks and lists where appropriate.\n\n"
|
133 |
+
"DO NOT paste or repeat the issue description. DO NOT quote it. Respond entirely in your own words.\n"
|
134 |
+
"You can only use the following tools: fetch_github_issue, get_issue_details, retrieve_context, post_comment.\n"
|
135 |
+
"Do not attempt to use any other tools such as web_search."
|
136 |
+
"DO NOT HALLUCINATE OR MAKE UP TOOLS."
|
137 |
+
)
|
138 |
+
}
|
139 |
+
|
140 |
+
user_message = {
|
141 |
+
"role": "user",
|
142 |
+
"content": "Please suggest a fix on this issue https://github.com/aditi-dsi/testing-cryptope/issues/4."
|
143 |
+
}
|
144 |
+
|
145 |
+
messages = [system_message, user_message]
|
146 |
+
|
147 |
+
api_key = MISTRAL_API_KEY
|
148 |
+
model = "devstral-small-latest"
|
149 |
+
client = Mistral(api_key=api_key)
|
150 |
+
|
151 |
+
MAX_STEPS = 5
|
152 |
+
tool_calls = 0
|
153 |
+
|
154 |
+
while True:
|
155 |
+
response = client.chat.complete(
|
156 |
+
model=model,
|
157 |
+
messages=messages,
|
158 |
+
tools=tools,
|
159 |
+
tool_choice="any",
|
160 |
+
)
|
161 |
+
msg = response.choices[0].message
|
162 |
+
messages.append(msg)
|
163 |
+
|
164 |
+
if hasattr(msg, "tool_calls") and msg.tool_calls:
|
165 |
+
for tool_call in msg.tool_calls:
|
166 |
+
function_name = tool_call.function.name
|
167 |
+
function_params = json.loads(tool_call.function.arguments)
|
168 |
+
if function_name in allowed_tools:
|
169 |
+
function_result = names_to_functions[function_name](**function_params)
|
170 |
+
print(f"Agent is calling tool: {function_name}")
|
171 |
+
tool_calls += 1
|
172 |
+
messages.append({
|
173 |
+
"role": "tool",
|
174 |
+
"tool_call_id": tool_call.id,
|
175 |
+
"content": str(function_result)
|
176 |
+
})
|
177 |
+
|
178 |
+
if function_name == "post_comment":
|
179 |
+
print("OpenSorus (final): ✅ Comment posted successfully. No further action needed.")
|
180 |
+
exit(0)
|
181 |
+
|
182 |
+
else:
|
183 |
+
print(f"LLM tried to call unknown tool: {function_name}")
|
184 |
+
tool_error_msg = (
|
185 |
+
f"Error: Tool '{function_name}' is not available. "
|
186 |
+
"You can only use the following tools: fetch_github_issue, get_issue_details, post_comment."
|
187 |
+
)
|
188 |
+
messages.append({
|
189 |
+
"role": "tool",
|
190 |
+
"tool_call_id": tool_call.id,
|
191 |
+
"content": tool_error_msg
|
192 |
+
})
|
193 |
+
if tool_calls >= MAX_STEPS:
|
194 |
+
print(f"Agent stopped after {MAX_STEPS} tool calls to protect against rate limiting.")
|
195 |
+
break
|
196 |
+
else:
|
197 |
+
print("OpenSorus (final):", msg.content)
|
198 |
+
break
|
config.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
|
4 |
+
load_dotenv()
|
5 |
+
|
6 |
+
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
|
7 |
+
APP_ID = os.getenv("APP_ID")
|
8 |
+
APP_PRIVATE_KEY = os.getenv("APP_PRIVATE_KEY", "").encode().decode("unicode_escape").strip()
|