Spaces:
Sleeping
Sleeping
Aiswarya Sankar
commited on
Commit
·
f3b7606
1
Parent(s):
aaea47d
Work with Cody
Browse files
app.py
CHANGED
@@ -18,7 +18,7 @@ import random
|
|
18 |
import time
|
19 |
import together
|
20 |
|
21 |
-
os.environ['OPENAI_API_KEY']='sk-
|
22 |
os.environ['ACTIVELOOP_TOKEN']='eyJhbGciOiJIUzUxMiIsImlhdCI6MTY4MTU5NTgyOCwiZXhwIjoxNzEzMjE4MTU5fQ.eyJpZCI6ImFpc3dhcnlhcyJ9.eoiMFZsS20zzMXXupFbowUlLdgIgf_MA1ck_DByzREeoQvNm8GPhKEfqea2y1Qak-ud2jo9dhSTBTfRe1ztezw'
|
23 |
|
24 |
|
@@ -27,7 +27,7 @@ from langchain.document_loaders import TextLoader
|
|
27 |
from langchain.text_splitter import CharacterTextSplitter
|
28 |
|
29 |
import subprocess
|
30 |
-
# repo_name = "https://github.com/
|
31 |
|
32 |
from langchain.callbacks.base import BaseCallbackHandler
|
33 |
from langchain.schema import LLMResult
|
@@ -86,7 +86,7 @@ global tickets
|
|
86 |
global ticket_choices
|
87 |
tickets = []
|
88 |
|
89 |
-
repoName = "https://github.com/
|
90 |
|
91 |
embeddings = OpenAIEmbeddings(disallowed_special=())
|
92 |
|
@@ -100,6 +100,7 @@ def git_clone(repo_url):
|
|
100 |
|
101 |
def index_repo(textbox: str, dropdown: str) -> Response:
|
102 |
|
|
|
103 |
mapping = {
|
104 |
"Langchain" : "https://github.com/langchain-ai/langchain.git",
|
105 |
"Weaviate": "https://github.com/weaviate/weaviate.git",
|
@@ -109,9 +110,6 @@ def index_repo(textbox: str, dropdown: str) -> Response:
|
|
109 |
"GenerativeAgents": "https://github.com/joonspk-research/generative_agents.git"
|
110 |
}
|
111 |
|
112 |
-
# print(textbox)
|
113 |
-
# print(dropdown[0])
|
114 |
-
|
115 |
if textbox != "":
|
116 |
repo = textbox
|
117 |
else:
|
@@ -124,7 +122,8 @@ def index_repo(textbox: str, dropdown: str) -> Response:
|
|
124 |
|
125 |
print("Repo name after setting the value: " + str(repoName))
|
126 |
activeloop_username = "aiswaryas"
|
127 |
-
dataset_path = f"hub://{activeloop_username}/" + pathName
|
|
|
128 |
|
129 |
try:
|
130 |
db = DeepLake(dataset_path=dataset_path,
|
@@ -143,7 +142,9 @@ def index_repo(textbox: str, dropdown: str) -> Response:
|
|
143 |
try:
|
144 |
docs = []
|
145 |
for dirpath, dirnames, filenames in os.walk(root_dir):
|
|
|
146 |
for file in filenames:
|
|
|
147 |
try:
|
148 |
loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')
|
149 |
docs.extend(loader.load_and_split())
|
@@ -152,7 +153,7 @@ def index_repo(textbox: str, dropdown: str) -> Response:
|
|
152 |
pass
|
153 |
|
154 |
activeloop_username = "aiswaryas"
|
155 |
-
dataset_path = f"hub://{activeloop_username}/" + pathName
|
156 |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
157 |
texts = text_splitter.split_documents(docs)
|
158 |
|
@@ -162,6 +163,7 @@ def index_repo(textbox: str, dropdown: str) -> Response:
|
|
162 |
read_only=False)
|
163 |
# Do this in chunks to avoid hitting the ratelimit immediately
|
164 |
for i in range(0, len(texts), 500):
|
|
|
165 |
db.add_documents(texts[i:i+500])
|
166 |
time.sleep(.1)
|
167 |
|
@@ -179,6 +181,7 @@ def index_repo(textbox: str, dropdown: str) -> Response:
|
|
179 |
# db = DeepLake(dataset_path=dataset_path,
|
180 |
# embedding_function=embeddings,
|
181 |
# token=os.environ['ACTIVELOOP_TOKEN'], read_only=False)
|
|
|
182 |
else:
|
183 |
print("Dataset already exists")
|
184 |
|
@@ -194,7 +197,7 @@ def index_repo(textbox: str, dropdown: str) -> Response:
|
|
194 |
print("REPO name in bug triage: " + str(repoName))
|
195 |
repo = "/".join(repoName[:-4].split("/")[-2:])
|
196 |
tickets = fetchGithubIssues(repo, 10)
|
197 |
-
print("tickets: " + str(tickets))
|
198 |
|
199 |
# Create the dropdown
|
200 |
ticket_choices = {ticket["title"]: ticket for ticket in tickets}
|
@@ -215,9 +218,9 @@ def answer_questions(question: str, github: str, **kwargs) -> Response:
|
|
215 |
github = repoName[:-4]
|
216 |
print(github)
|
217 |
try:
|
218 |
-
embeddings = OpenAIEmbeddings(openai_api_key="sk-
|
219 |
pathName = github.split('/')[-1]
|
220 |
-
dataset_path = "hub://aiswaryas/" + pathName
|
221 |
|
222 |
db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
|
223 |
|
@@ -238,7 +241,7 @@ def answer_questions(question: str, github: str, **kwargs) -> Response:
|
|
238 |
callback_manager=CallbackManager(
|
239 |
[StreamingGradioCallbackHandler(q)]
|
240 |
),
|
241 |
-
openai_api_key="sk-
|
242 |
)
|
243 |
qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
|
244 |
chat_history = []
|
@@ -291,7 +294,7 @@ def fetchGithubIssues(repo: str, num_issues:int, **kwargs) -> Response:
|
|
291 |
"comments_url": issue["comments_url"],
|
292 |
})
|
293 |
|
294 |
-
print(issues_data)
|
295 |
return issues_data
|
296 |
|
297 |
|
@@ -303,7 +306,7 @@ def generateFolderNamesForRepo(repo):
|
|
303 |
input data and generate the responses that are displayed in the UI.
|
304 |
"""
|
305 |
pathName = git_clone(repo)
|
306 |
-
root_dir = './' + pathName
|
307 |
|
308 |
files, dirs, docs = [], [], []
|
309 |
for dirpath, dirnames, filenames in os.walk(root_dir):
|
@@ -317,7 +320,7 @@ def generateFolderNamesForRepo(repo):
|
|
317 |
print("Exception: " + str(e) + "| File: " + os.path.join(dirpath, file))
|
318 |
pass
|
319 |
|
320 |
-
return dirs
|
321 |
|
322 |
|
323 |
def generateDocumentationPerFolder(dir, github):
|
@@ -339,10 +342,10 @@ def generateDocumentationPerFolder(dir, github):
|
|
339 |
|
340 |
print(prompt)
|
341 |
try:
|
342 |
-
embeddings = OpenAIEmbeddings(openai_api_key="sk-
|
343 |
pathName = github.split('/')[-1]
|
344 |
print("PATH NAME: " + str(pathName))
|
345 |
-
dataset_path = "hub://aiswaryas/" + pathName
|
346 |
|
347 |
db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
|
348 |
|
@@ -359,7 +362,7 @@ def generateDocumentationPerFolder(dir, github):
|
|
359 |
temperature=0.0,
|
360 |
verbose=True,
|
361 |
streaming=True, # Pass `streaming=True` to make sure the client receives the data.
|
362 |
-
openai_api_key="sk-
|
363 |
)
|
364 |
qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
|
365 |
chat_history = []
|
@@ -402,9 +405,9 @@ def solveGithubIssue(ticket, history) -> Response:
|
|
402 |
print(question)
|
403 |
|
404 |
try:
|
405 |
-
embeddings = OpenAIEmbeddings(openai_api_key="sk-
|
406 |
pathName = github.split('/')[-1]
|
407 |
-
dataset_path = "hub://aiswaryas/" + pathName
|
408 |
|
409 |
db = DeepLake(dataset_path=dataset_path, read_only=True, embedding=embeddings)
|
410 |
|
@@ -424,7 +427,7 @@ def solveGithubIssue(ticket, history) -> Response:
|
|
424 |
callback_manager=CallbackManager(
|
425 |
[StreamingGradioCallbackHandler(q)]
|
426 |
),
|
427 |
-
openai_api_key="sk-
|
428 |
)
|
429 |
qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever,max_tokens_limit=8000)
|
430 |
|
@@ -452,9 +455,9 @@ def bot(history, **kwargs):
|
|
452 |
print("Repo name in the bot: " + str(repoName))
|
453 |
github = repoName[:-4]
|
454 |
try:
|
455 |
-
embeddings = OpenAIEmbeddings(openai_api_key="sk-
|
456 |
pathName = github.split('/')[-1]
|
457 |
-
dataset_path = "hub://aiswaryas/" + pathName
|
458 |
|
459 |
db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
|
460 |
|
@@ -474,7 +477,7 @@ def bot(history, **kwargs):
|
|
474 |
callback_manager=CallbackManager(
|
475 |
[StreamingGradioCallbackHandler(q)]
|
476 |
),
|
477 |
-
openai_api_key="sk-
|
478 |
)
|
479 |
qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
|
480 |
chat_history = []
|
@@ -501,7 +504,7 @@ with gr.Blocks() as demo:
|
|
501 |
repoTextBox = gr.Textbox(label="Github Repository")
|
502 |
|
503 |
gr.Markdown("""Choose from any of the following repositories""")
|
504 |
-
ingestedRepos = gr.CheckboxGroup(choices=['Langchain', 'Weaviate', 'OpenAssistant', 'GenerativeAgents','Llama2', "MemeAI"], label="Github Repository", value="
|
505 |
|
506 |
success_response = gr.Textbox(label="")
|
507 |
ingest_btn = gr.Button("Index repo")
|
@@ -534,7 +537,7 @@ with gr.Blocks() as demo:
|
|
534 |
print("REPO name in bug triage: " + str(repoName))
|
535 |
repo = "/".join(repoName[:-4].split("/")[-2:])
|
536 |
tickets = fetchGithubIssues(repo, 10)
|
537 |
-
print("tickets: " + str(tickets))
|
538 |
|
539 |
# Create the dropdown
|
540 |
ticket_choices = {ticket["title"]: ticket for ticket in tickets}
|
@@ -549,7 +552,7 @@ with gr.Blocks() as demo:
|
|
549 |
|
550 |
# # Create the dropdown
|
551 |
# global ticket_choices
|
552 |
-
print("tickets in bug triage: " + str(tickets))
|
553 |
ticket_choices = {ticket["title"]: ticket for ticket in tickets}
|
554 |
ticket_titles = [ticket["title"] for ticket in tickets]
|
555 |
|
|
|
18 |
import time
|
19 |
import together
|
20 |
|
21 |
+
os.environ['OPENAI_API_KEY']='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'
|
22 |
os.environ['ACTIVELOOP_TOKEN']='eyJhbGciOiJIUzUxMiIsImlhdCI6MTY4MTU5NTgyOCwiZXhwIjoxNzEzMjE4MTU5fQ.eyJpZCI6ImFpc3dhcnlhcyJ9.eoiMFZsS20zzMXXupFbowUlLdgIgf_MA1ck_DByzREeoQvNm8GPhKEfqea2y1Qak-ud2jo9dhSTBTfRe1ztezw'
|
23 |
|
24 |
|
|
|
27 |
from langchain.text_splitter import CharacterTextSplitter
|
28 |
|
29 |
import subprocess
|
30 |
+
# repo_name = "https://github.com/sourcegraph/cody.git"
|
31 |
|
32 |
from langchain.callbacks.base import BaseCallbackHandler
|
33 |
from langchain.schema import LLMResult
|
|
|
86 |
global ticket_choices
|
87 |
tickets = []
|
88 |
|
89 |
+
repoName = "https://github.com/sourcegraph/cody.git"
|
90 |
|
91 |
embeddings = OpenAIEmbeddings(disallowed_special=())
|
92 |
|
|
|
100 |
|
101 |
def index_repo(textbox: str, dropdown: str) -> Response:
|
102 |
|
103 |
+
print("IN INDEX_REPO")
|
104 |
mapping = {
|
105 |
"Langchain" : "https://github.com/langchain-ai/langchain.git",
|
106 |
"Weaviate": "https://github.com/weaviate/weaviate.git",
|
|
|
110 |
"GenerativeAgents": "https://github.com/joonspk-research/generative_agents.git"
|
111 |
}
|
112 |
|
|
|
|
|
|
|
113 |
if textbox != "":
|
114 |
repo = textbox
|
115 |
else:
|
|
|
122 |
|
123 |
print("Repo name after setting the value: " + str(repoName))
|
124 |
activeloop_username = "aiswaryas"
|
125 |
+
dataset_path = f"hub://{activeloop_username}/" + pathName + "1000"
|
126 |
+
print(dataset_path)
|
127 |
|
128 |
try:
|
129 |
db = DeepLake(dataset_path=dataset_path,
|
|
|
142 |
try:
|
143 |
docs = []
|
144 |
for dirpath, dirnames, filenames in os.walk(root_dir):
|
145 |
+
print("rootdir: " + str(root_dir))
|
146 |
for file in filenames:
|
147 |
+
print(file)
|
148 |
try:
|
149 |
loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')
|
150 |
docs.extend(loader.load_and_split())
|
|
|
153 |
pass
|
154 |
|
155 |
activeloop_username = "aiswaryas"
|
156 |
+
dataset_path = f"hub://{activeloop_username}/" + pathName + "1000"
|
157 |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
158 |
texts = text_splitter.split_documents(docs)
|
159 |
|
|
|
163 |
read_only=False)
|
164 |
# Do this in chunks to avoid hitting the ratelimit immediately
|
165 |
for i in range(0, len(texts), 500):
|
166 |
+
print("Adding documents " + str(i))
|
167 |
db.add_documents(texts[i:i+500])
|
168 |
time.sleep(.1)
|
169 |
|
|
|
181 |
# db = DeepLake(dataset_path=dataset_path,
|
182 |
# embedding_function=embeddings,
|
183 |
# token=os.environ['ACTIVELOOP_TOKEN'], read_only=False)
|
184 |
+
|
185 |
else:
|
186 |
print("Dataset already exists")
|
187 |
|
|
|
197 |
print("REPO name in bug triage: " + str(repoName))
|
198 |
repo = "/".join(repoName[:-4].split("/")[-2:])
|
199 |
tickets = fetchGithubIssues(repo, 10)
|
200 |
+
# print("tickets: " + str(tickets))
|
201 |
|
202 |
# Create the dropdown
|
203 |
ticket_choices = {ticket["title"]: ticket for ticket in tickets}
|
|
|
218 |
github = repoName[:-4]
|
219 |
print(github)
|
220 |
try:
|
221 |
+
embeddings = OpenAIEmbeddings(openai_api_key="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")
|
222 |
pathName = github.split('/')[-1]
|
223 |
+
dataset_path = "hub://aiswaryas/" + pathName + "1000"
|
224 |
|
225 |
db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
|
226 |
|
|
|
241 |
callback_manager=CallbackManager(
|
242 |
[StreamingGradioCallbackHandler(q)]
|
243 |
),
|
244 |
+
openai_api_key="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",
|
245 |
)
|
246 |
qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
|
247 |
chat_history = []
|
|
|
294 |
"comments_url": issue["comments_url"],
|
295 |
})
|
296 |
|
297 |
+
# print(issues_data)
|
298 |
return issues_data
|
299 |
|
300 |
|
|
|
306 |
input data and generate the responses that are displayed in the UI.
|
307 |
"""
|
308 |
pathName = git_clone(repo)
|
309 |
+
root_dir = './' + pathName + "1000"
|
310 |
|
311 |
files, dirs, docs = [], [], []
|
312 |
for dirpath, dirnames, filenames in os.walk(root_dir):
|
|
|
320 |
print("Exception: " + str(e) + "| File: " + os.path.join(dirpath, file))
|
321 |
pass
|
322 |
|
323 |
+
return dirs
|
324 |
|
325 |
|
326 |
def generateDocumentationPerFolder(dir, github):
|
|
|
342 |
|
343 |
print(prompt)
|
344 |
try:
|
345 |
+
embeddings = OpenAIEmbeddings(openai_api_key="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")
|
346 |
pathName = github.split('/')[-1]
|
347 |
print("PATH NAME: " + str(pathName))
|
348 |
+
dataset_path = "hub://aiswaryas/" + pathName + "1000"
|
349 |
|
350 |
db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
|
351 |
|
|
|
362 |
temperature=0.0,
|
363 |
verbose=True,
|
364 |
streaming=True, # Pass `streaming=True` to make sure the client receives the data.
|
365 |
+
openai_api_key="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",
|
366 |
)
|
367 |
qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
|
368 |
chat_history = []
|
|
|
405 |
print(question)
|
406 |
|
407 |
try:
|
408 |
+
embeddings = OpenAIEmbeddings(openai_api_key="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")
|
409 |
pathName = github.split('/')[-1]
|
410 |
+
dataset_path = "hub://aiswaryas/" + pathName + "1000"
|
411 |
|
412 |
db = DeepLake(dataset_path=dataset_path, read_only=True, embedding=embeddings)
|
413 |
|
|
|
427 |
callback_manager=CallbackManager(
|
428 |
[StreamingGradioCallbackHandler(q)]
|
429 |
),
|
430 |
+
openai_api_key="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",
|
431 |
)
|
432 |
qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever,max_tokens_limit=8000)
|
433 |
|
|
|
455 |
print("Repo name in the bot: " + str(repoName))
|
456 |
github = repoName[:-4]
|
457 |
try:
|
458 |
+
embeddings = OpenAIEmbeddings(openai_api_key="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")
|
459 |
pathName = github.split('/')[-1]
|
460 |
+
dataset_path = "hub://aiswaryas/" + pathName + "1000"
|
461 |
|
462 |
db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
|
463 |
|
|
|
477 |
callback_manager=CallbackManager(
|
478 |
[StreamingGradioCallbackHandler(q)]
|
479 |
),
|
480 |
+
openai_api_key="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",
|
481 |
)
|
482 |
qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
|
483 |
chat_history = []
|
|
|
504 |
repoTextBox = gr.Textbox(label="Github Repository")
|
505 |
|
506 |
gr.Markdown("""Choose from any of the following repositories""")
|
507 |
+
ingestedRepos = gr.CheckboxGroup(choices=['Langchain', 'Weaviate', 'OpenAssistant', 'GenerativeAgents','Llama2', "MemeAI"], label="Github Repository", value="Langchain")
|
508 |
|
509 |
success_response = gr.Textbox(label="")
|
510 |
ingest_btn = gr.Button("Index repo")
|
|
|
537 |
print("REPO name in bug triage: " + str(repoName))
|
538 |
repo = "/".join(repoName[:-4].split("/")[-2:])
|
539 |
tickets = fetchGithubIssues(repo, 10)
|
540 |
+
# print("tickets: " + str(tickets))
|
541 |
|
542 |
# Create the dropdown
|
543 |
ticket_choices = {ticket["title"]: ticket for ticket in tickets}
|
|
|
552 |
|
553 |
# # Create the dropdown
|
554 |
# global ticket_choices
|
555 |
+
# print("tickets in bug triage: " + str(tickets))
|
556 |
ticket_choices = {ticket["title"]: ticket for ticket in tickets}
|
557 |
ticket_titles = [ticket["title"] for ticket in tickets]
|
558 |
|