Aiswarya Sankar commited on
Commit
f3b7606
·
1 Parent(s): aaea47d

Work with Cody

Browse files
Files changed (1) hide show
  1. app.py +30 -27
app.py CHANGED
@@ -18,7 +18,7 @@ import random
18
  import time
19
  import together
20
 
21
- os.environ['OPENAI_API_KEY']='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'
22
  os.environ['ACTIVELOOP_TOKEN']='eyJhbGciOiJIUzUxMiIsImlhdCI6MTY4MTU5NTgyOCwiZXhwIjoxNzEzMjE4MTU5fQ.eyJpZCI6ImFpc3dhcnlhcyJ9.eoiMFZsS20zzMXXupFbowUlLdgIgf_MA1ck_DByzREeoQvNm8GPhKEfqea2y1Qak-ud2jo9dhSTBTfRe1ztezw'
23
 
24
 
@@ -27,7 +27,7 @@ from langchain.document_loaders import TextLoader
27
  from langchain.text_splitter import CharacterTextSplitter
28
 
29
  import subprocess
30
- # repo_name = "https://github.com/aiswaryasankar/memeAI.git"
31
 
32
  from langchain.callbacks.base import BaseCallbackHandler
33
  from langchain.schema import LLMResult
@@ -86,7 +86,7 @@ global tickets
86
  global ticket_choices
87
  tickets = []
88
 
89
- repoName = "https://github.com/aiswaryasankar/memeAI.git"
90
 
91
  embeddings = OpenAIEmbeddings(disallowed_special=())
92
 
@@ -100,6 +100,7 @@ def git_clone(repo_url):
100
 
101
  def index_repo(textbox: str, dropdown: str) -> Response:
102
 
 
103
  mapping = {
104
  "Langchain" : "https://github.com/langchain-ai/langchain.git",
105
  "Weaviate": "https://github.com/weaviate/weaviate.git",
@@ -109,9 +110,6 @@ def index_repo(textbox: str, dropdown: str) -> Response:
109
  "GenerativeAgents": "https://github.com/joonspk-research/generative_agents.git"
110
  }
111
 
112
- # print(textbox)
113
- # print(dropdown[0])
114
-
115
  if textbox != "":
116
  repo = textbox
117
  else:
@@ -124,7 +122,8 @@ def index_repo(textbox: str, dropdown: str) -> Response:
124
 
125
  print("Repo name after setting the value: " + str(repoName))
126
  activeloop_username = "aiswaryas"
127
- dataset_path = f"hub://{activeloop_username}/" + pathName
 
128
 
129
  try:
130
  db = DeepLake(dataset_path=dataset_path,
@@ -143,7 +142,9 @@ def index_repo(textbox: str, dropdown: str) -> Response:
143
  try:
144
  docs = []
145
  for dirpath, dirnames, filenames in os.walk(root_dir):
 
146
  for file in filenames:
 
147
  try:
148
  loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')
149
  docs.extend(loader.load_and_split())
@@ -152,7 +153,7 @@ def index_repo(textbox: str, dropdown: str) -> Response:
152
  pass
153
 
154
  activeloop_username = "aiswaryas"
155
- dataset_path = f"hub://{activeloop_username}/" + pathName
156
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
157
  texts = text_splitter.split_documents(docs)
158
 
@@ -162,6 +163,7 @@ def index_repo(textbox: str, dropdown: str) -> Response:
162
  read_only=False)
163
  # Do this in chunks to avoid hitting the ratelimit immediately
164
  for i in range(0, len(texts), 500):
 
165
  db.add_documents(texts[i:i+500])
166
  time.sleep(.1)
167
 
@@ -179,6 +181,7 @@ def index_repo(textbox: str, dropdown: str) -> Response:
179
  # db = DeepLake(dataset_path=dataset_path,
180
  # embedding_function=embeddings,
181
  # token=os.environ['ACTIVELOOP_TOKEN'], read_only=False)
 
182
  else:
183
  print("Dataset already exists")
184
 
@@ -194,7 +197,7 @@ def index_repo(textbox: str, dropdown: str) -> Response:
194
  print("REPO name in bug triage: " + str(repoName))
195
  repo = "/".join(repoName[:-4].split("/")[-2:])
196
  tickets = fetchGithubIssues(repo, 10)
197
- print("tickets: " + str(tickets))
198
 
199
  # Create the dropdown
200
  ticket_choices = {ticket["title"]: ticket for ticket in tickets}
@@ -215,9 +218,9 @@ def answer_questions(question: str, github: str, **kwargs) -> Response:
215
  github = repoName[:-4]
216
  print(github)
217
  try:
218
- embeddings = OpenAIEmbeddings(openai_api_key="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")
219
  pathName = github.split('/')[-1]
220
- dataset_path = "hub://aiswaryas/" + pathName
221
 
222
  db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
223
 
@@ -238,7 +241,7 @@ def answer_questions(question: str, github: str, **kwargs) -> Response:
238
  callback_manager=CallbackManager(
239
  [StreamingGradioCallbackHandler(q)]
240
  ),
241
- openai_api_key="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",
242
  )
243
  qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
244
  chat_history = []
@@ -291,7 +294,7 @@ def fetchGithubIssues(repo: str, num_issues:int, **kwargs) -> Response:
291
  "comments_url": issue["comments_url"],
292
  })
293
 
294
- print(issues_data)
295
  return issues_data
296
 
297
 
@@ -303,7 +306,7 @@ def generateFolderNamesForRepo(repo):
303
  input data and generate the responses that are displayed in the UI.
304
  """
305
  pathName = git_clone(repo)
306
- root_dir = './' + pathName
307
 
308
  files, dirs, docs = [], [], []
309
  for dirpath, dirnames, filenames in os.walk(root_dir):
@@ -317,7 +320,7 @@ def generateFolderNamesForRepo(repo):
317
  print("Exception: " + str(e) + "| File: " + os.path.join(dirpath, file))
318
  pass
319
 
320
- return dirs[0]
321
 
322
 
323
  def generateDocumentationPerFolder(dir, github):
@@ -339,10 +342,10 @@ def generateDocumentationPerFolder(dir, github):
339
 
340
  print(prompt)
341
  try:
342
- embeddings = OpenAIEmbeddings(openai_api_key="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")
343
  pathName = github.split('/')[-1]
344
  print("PATH NAME: " + str(pathName))
345
- dataset_path = "hub://aiswaryas/" + pathName
346
 
347
  db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
348
 
@@ -359,7 +362,7 @@ def generateDocumentationPerFolder(dir, github):
359
  temperature=0.0,
360
  verbose=True,
361
  streaming=True, # Pass `streaming=True` to make sure the client receives the data.
362
- openai_api_key="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",
363
  )
364
  qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
365
  chat_history = []
@@ -402,9 +405,9 @@ def solveGithubIssue(ticket, history) -> Response:
402
  print(question)
403
 
404
  try:
405
- embeddings = OpenAIEmbeddings(openai_api_key="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")
406
  pathName = github.split('/')[-1]
407
- dataset_path = "hub://aiswaryas/" + pathName
408
 
409
  db = DeepLake(dataset_path=dataset_path, read_only=True, embedding=embeddings)
410
 
@@ -424,7 +427,7 @@ def solveGithubIssue(ticket, history) -> Response:
424
  callback_manager=CallbackManager(
425
  [StreamingGradioCallbackHandler(q)]
426
  ),
427
- openai_api_key="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",
428
  )
429
  qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever,max_tokens_limit=8000)
430
 
@@ -452,9 +455,9 @@ def bot(history, **kwargs):
452
  print("Repo name in the bot: " + str(repoName))
453
  github = repoName[:-4]
454
  try:
455
- embeddings = OpenAIEmbeddings(openai_api_key="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")
456
  pathName = github.split('/')[-1]
457
- dataset_path = "hub://aiswaryas/" + pathName
458
 
459
  db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
460
 
@@ -474,7 +477,7 @@ def bot(history, **kwargs):
474
  callback_manager=CallbackManager(
475
  [StreamingGradioCallbackHandler(q)]
476
  ),
477
- openai_api_key="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",
478
  )
479
  qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
480
  chat_history = []
@@ -501,7 +504,7 @@ with gr.Blocks() as demo:
501
  repoTextBox = gr.Textbox(label="Github Repository")
502
 
503
  gr.Markdown("""Choose from any of the following repositories""")
504
- ingestedRepos = gr.CheckboxGroup(choices=['Langchain', 'Weaviate', 'OpenAssistant', 'GenerativeAgents','Llama2', "MemeAI"], label="Github Repository", value="MemeAI")
505
 
506
  success_response = gr.Textbox(label="")
507
  ingest_btn = gr.Button("Index repo")
@@ -534,7 +537,7 @@ with gr.Blocks() as demo:
534
  print("REPO name in bug triage: " + str(repoName))
535
  repo = "/".join(repoName[:-4].split("/")[-2:])
536
  tickets = fetchGithubIssues(repo, 10)
537
- print("tickets: " + str(tickets))
538
 
539
  # Create the dropdown
540
  ticket_choices = {ticket["title"]: ticket for ticket in tickets}
@@ -549,7 +552,7 @@ with gr.Blocks() as demo:
549
 
550
  # # Create the dropdown
551
  # global ticket_choices
552
- print("tickets in bug triage: " + str(tickets))
553
  ticket_choices = {ticket["title"]: ticket for ticket in tickets}
554
  ticket_titles = [ticket["title"] for ticket in tickets]
555
 
 
18
  import time
19
  import together
20
 
21
+ os.environ['OPENAI_API_KEY']='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'
22
  os.environ['ACTIVELOOP_TOKEN']='eyJhbGciOiJIUzUxMiIsImlhdCI6MTY4MTU5NTgyOCwiZXhwIjoxNzEzMjE4MTU5fQ.eyJpZCI6ImFpc3dhcnlhcyJ9.eoiMFZsS20zzMXXupFbowUlLdgIgf_MA1ck_DByzREeoQvNm8GPhKEfqea2y1Qak-ud2jo9dhSTBTfRe1ztezw'
23
 
24
 
 
27
  from langchain.text_splitter import CharacterTextSplitter
28
 
29
  import subprocess
30
+ # repo_name = "https://github.com/sourcegraph/cody.git"
31
 
32
  from langchain.callbacks.base import BaseCallbackHandler
33
  from langchain.schema import LLMResult
 
86
  global ticket_choices
87
  tickets = []
88
 
89
+ repoName = "https://github.com/sourcegraph/cody.git"
90
 
91
  embeddings = OpenAIEmbeddings(disallowed_special=())
92
 
 
100
 
101
  def index_repo(textbox: str, dropdown: str) -> Response:
102
 
103
+ print("IN INDEX_REPO")
104
  mapping = {
105
  "Langchain" : "https://github.com/langchain-ai/langchain.git",
106
  "Weaviate": "https://github.com/weaviate/weaviate.git",
 
110
  "GenerativeAgents": "https://github.com/joonspk-research/generative_agents.git"
111
  }
112
 
 
 
 
113
  if textbox != "":
114
  repo = textbox
115
  else:
 
122
 
123
  print("Repo name after setting the value: " + str(repoName))
124
  activeloop_username = "aiswaryas"
125
+ dataset_path = f"hub://{activeloop_username}/" + pathName + "1000"
126
+ print(dataset_path)
127
 
128
  try:
129
  db = DeepLake(dataset_path=dataset_path,
 
142
  try:
143
  docs = []
144
  for dirpath, dirnames, filenames in os.walk(root_dir):
145
+ print("rootdir: " + str(root_dir))
146
  for file in filenames:
147
+ print(file)
148
  try:
149
  loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')
150
  docs.extend(loader.load_and_split())
 
153
  pass
154
 
155
  activeloop_username = "aiswaryas"
156
+ dataset_path = f"hub://{activeloop_username}/" + pathName + "1000"
157
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
158
  texts = text_splitter.split_documents(docs)
159
 
 
163
  read_only=False)
164
  # Do this in chunks to avoid hitting the ratelimit immediately
165
  for i in range(0, len(texts), 500):
166
+ print("Adding documents " + str(i))
167
  db.add_documents(texts[i:i+500])
168
  time.sleep(.1)
169
 
 
181
  # db = DeepLake(dataset_path=dataset_path,
182
  # embedding_function=embeddings,
183
  # token=os.environ['ACTIVELOOP_TOKEN'], read_only=False)
184
+
185
  else:
186
  print("Dataset already exists")
187
 
 
197
  print("REPO name in bug triage: " + str(repoName))
198
  repo = "/".join(repoName[:-4].split("/")[-2:])
199
  tickets = fetchGithubIssues(repo, 10)
200
+ # print("tickets: " + str(tickets))
201
 
202
  # Create the dropdown
203
  ticket_choices = {ticket["title"]: ticket for ticket in tickets}
 
218
  github = repoName[:-4]
219
  print(github)
220
  try:
221
+ embeddings = OpenAIEmbeddings(openai_api_key="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")
222
  pathName = github.split('/')[-1]
223
+ dataset_path = "hub://aiswaryas/" + pathName + "1000"
224
 
225
  db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
226
 
 
241
  callback_manager=CallbackManager(
242
  [StreamingGradioCallbackHandler(q)]
243
  ),
244
+ openai_api_key="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",
245
  )
246
  qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
247
  chat_history = []
 
294
  "comments_url": issue["comments_url"],
295
  })
296
 
297
+ # print(issues_data)
298
  return issues_data
299
 
300
 
 
306
  input data and generate the responses that are displayed in the UI.
307
  """
308
  pathName = git_clone(repo)
309
+ root_dir = './' + pathName + "1000"
310
 
311
  files, dirs, docs = [], [], []
312
  for dirpath, dirnames, filenames in os.walk(root_dir):
 
320
  print("Exception: " + str(e) + "| File: " + os.path.join(dirpath, file))
321
  pass
322
 
323
+ return dirs
324
 
325
 
326
  def generateDocumentationPerFolder(dir, github):
 
342
 
343
  print(prompt)
344
  try:
345
+ embeddings = OpenAIEmbeddings(openai_api_key="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")
346
  pathName = github.split('/')[-1]
347
  print("PATH NAME: " + str(pathName))
348
+ dataset_path = "hub://aiswaryas/" + pathName + "1000"
349
 
350
  db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
351
 
 
362
  temperature=0.0,
363
  verbose=True,
364
  streaming=True, # Pass `streaming=True` to make sure the client receives the data.
365
+ openai_api_key="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",
366
  )
367
  qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
368
  chat_history = []
 
405
  print(question)
406
 
407
  try:
408
+ embeddings = OpenAIEmbeddings(openai_api_key="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")
409
  pathName = github.split('/')[-1]
410
+ dataset_path = "hub://aiswaryas/" + pathName + "1000"
411
 
412
  db = DeepLake(dataset_path=dataset_path, read_only=True, embedding=embeddings)
413
 
 
427
  callback_manager=CallbackManager(
428
  [StreamingGradioCallbackHandler(q)]
429
  ),
430
+ openai_api_key="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",
431
  )
432
  qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever,max_tokens_limit=8000)
433
 
 
455
  print("Repo name in the bot: " + str(repoName))
456
  github = repoName[:-4]
457
  try:
458
+ embeddings = OpenAIEmbeddings(openai_api_key="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")
459
  pathName = github.split('/')[-1]
460
+ dataset_path = "hub://aiswaryas/" + pathName + "1000"
461
 
462
  db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
463
 
 
477
  callback_manager=CallbackManager(
478
  [StreamingGradioCallbackHandler(q)]
479
  ),
480
+ openai_api_key="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",
481
  )
482
  qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
483
  chat_history = []
 
504
  repoTextBox = gr.Textbox(label="Github Repository")
505
 
506
  gr.Markdown("""Choose from any of the following repositories""")
507
+ ingestedRepos = gr.CheckboxGroup(choices=['Langchain', 'Weaviate', 'OpenAssistant', 'GenerativeAgents','Llama2', "MemeAI"], label="Github Repository", value="Langchain")
508
 
509
  success_response = gr.Textbox(label="")
510
  ingest_btn = gr.Button("Index repo")
 
537
  print("REPO name in bug triage: " + str(repoName))
538
  repo = "/".join(repoName[:-4].split("/")[-2:])
539
  tickets = fetchGithubIssues(repo, 10)
540
+ # print("tickets: " + str(tickets))
541
 
542
  # Create the dropdown
543
  ticket_choices = {ticket["title"]: ticket for ticket in tickets}
 
552
 
553
  # # Create the dropdown
554
  # global ticket_choices
555
+ # print("tickets in bug triage: " + str(tickets))
556
  ticket_choices = {ticket["title"]: ticket for ticket in tickets}
557
  ticket_titles = [ticket["title"] for ticket in tickets]
558