Aiswarya Sankar commited on
Commit
5a8be90
·
1 Parent(s): 9937bad

Remove openai token

Browse files
Files changed (1) hide show
  1. app.py +53 -92
app.py CHANGED
@@ -18,16 +18,11 @@ import random
18
  import time
19
  import together
20
 
21
- os.environ['OPENAI_API_KEY']='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'
22
- os.environ['ACTIVELOOP_TOKEN']='eyJhbGciOiJIUzUxMiIsImlhdCI6MTY4MTU5NTgyOCwiZXhwIjoxNzEzMjE4MTU5fQ.eyJpZCI6ImFpc3dhcnlhcyJ9.eoiMFZsS20zzMXXupFbowUlLdgIgf_MA1ck_DByzREeoQvNm8GPhKEfqea2y1Qak-ud2jo9dhSTBTfRe1ztezw'
23
-
24
-
25
  import os
26
  from langchain.document_loaders import TextLoader
27
  from langchain.text_splitter import CharacterTextSplitter
28
 
29
  import subprocess
30
- # repo_name = "https://github.com/sourcegraph/cody.git"
31
 
32
  from langchain.callbacks.base import BaseCallbackHandler
33
  from langchain.schema import LLMResult
@@ -86,7 +81,7 @@ global tickets
86
  global ticket_choices
87
  tickets = []
88
 
89
- repoName = "https://github.com/sourcegraph/cody.git"
90
 
91
  embeddings = OpenAIEmbeddings(disallowed_special=())
92
 
@@ -100,7 +95,6 @@ def git_clone(repo_url):
100
 
101
  def index_repo(textbox: str, dropdown: str) -> Response:
102
 
103
- print("IN INDEX_REPO")
104
  mapping = {
105
  "Langchain" : "https://github.com/langchain-ai/langchain.git",
106
  "Weaviate": "https://github.com/weaviate/weaviate.git",
@@ -114,17 +108,12 @@ def index_repo(textbox: str, dropdown: str) -> Response:
114
  repo = textbox
115
  else:
116
  repo = mapping[dropdown[0]]
117
- # repoName = gr.State(repo)
118
 
119
- print("Repo name after setting the value: " + str(repoName))
120
  pathName = git_clone(repo)
121
  root_dir = './' + pathName
122
- print(root_dir)
123
 
124
- print("Repo name after setting the value: " + str(repoName))
125
  activeloop_username = "aiswaryas"
126
  dataset_path = f"hub://{activeloop_username}/" + pathName + "1000"
127
- print(dataset_path)
128
 
129
  try:
130
  db = DeepLake(dataset_path=dataset_path,
@@ -143,7 +132,6 @@ def index_repo(textbox: str, dropdown: str) -> Response:
143
  try:
144
  docs = []
145
  for dirpath, dirnames, filenames in os.walk(root_dir):
146
- print("rootdir: " + str(root_dir))
147
  for file in filenames:
148
  print(file)
149
  try:
@@ -183,9 +171,6 @@ def index_repo(textbox: str, dropdown: str) -> Response:
183
  # embedding_function=embeddings,
184
  # token=os.environ['ACTIVELOOP_TOKEN'], read_only=False)
185
 
186
- else:
187
- print("Dataset already exists")
188
-
189
  except Exception as e:
190
  return Response(
191
  result= "Failed to index github repo",
@@ -202,8 +187,6 @@ def index_repo(textbox: str, dropdown: str) -> Response:
202
  ticket_choices = {ticket["title"]: ticket for ticket in tickets}
203
  ticket_titles = [ticket["title"] for ticket in tickets]
204
 
205
- print("Repo name before return: " + str(repoName))
206
-
207
  return {
208
  success_response: "SUCCESS",
209
  launch_product: gr.update(visible=True)
@@ -213,14 +196,15 @@ def index_repo(textbox: str, dropdown: str) -> Response:
213
  def answer_questions(question: str, github: str, **kwargs) -> Response:
214
 
215
  global repoName
216
- print("Repo name")
217
  github = repoName[:-4]
218
- print(github)
 
219
  try:
220
- embeddings = OpenAIEmbeddings(openai_api_key="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")
221
  pathName = github.split('/')[-1]
222
  dataset_path = "hub://aiswaryas/" + pathName + "1000"
223
 
 
224
  db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
225
 
226
  print("finished indexing repo")
@@ -240,7 +224,6 @@ def answer_questions(question: str, github: str, **kwargs) -> Response:
240
  callback_manager=CallbackManager(
241
  [StreamingGradioCallbackHandler(q)]
242
  ),
243
- openai_api_key="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",
244
  )
245
  qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
246
  chat_history = []
@@ -293,7 +276,6 @@ def fetchGithubIssues(repo: str, num_issues:int, **kwargs) -> Response:
293
  "comments_url": issue["comments_url"],
294
  })
295
 
296
- # print(issues_data)
297
  return issues_data
298
 
299
 
@@ -339,43 +321,34 @@ def generateDocumentationPerFolder(dir, github):
339
  an overview of that function.
340
  """.format(dir, github)
341
 
342
- print(prompt)
343
- try:
344
- embeddings = OpenAIEmbeddings(openai_api_key="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")
345
- pathName = github.split('/')[-1]
346
- print("PATH NAME: " + str(pathName))
347
- dataset_path = "hub://aiswaryas/" + pathName + "1000"
348
-
349
- db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
350
 
351
- # print("finished indexing repo")
352
- retriever = db.as_retriever()
353
- retriever.search_kwargs['distance_metric'] = 'cos'
354
- retriever.search_kwargs['fetch_k'] = 100
355
- retriever.search_kwargs['maximal_marginal_relevance'] = True
356
- retriever.search_kwargs['k'] = 20
357
 
358
- # streaming_handler = kwargs.get('streaming_handler')
359
- model = ChatOpenAI(
360
- model_name='gpt-3.5-turbo-16k',
361
- temperature=0.0,
362
- verbose=True,
363
- streaming=True, # Pass `streaming=True` to make sure the client receives the data.
364
- openai_api_key="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",
365
- )
366
- qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
367
- chat_history = []
368
- return qa({"question": prompt, "chat_history": chat_history})["answer"]
369
 
370
- except Exception as e:
371
- print (str(e))
372
- return "Failed to generate documentation"
 
 
 
 
 
 
 
373
 
374
- # history[-1][1] = ""
375
- # for char in qa({"question": prompt, "chat_history": chat_history}):
376
- # history[-1][1] += char
377
- # time.sleep(0.01)
378
- # yield history
379
 
380
 
381
  def solveGithubIssue(ticket, history) -> Response:
@@ -383,7 +356,6 @@ def solveGithubIssue(ticket, history) -> Response:
383
  This endpoint takes in a github issue and then queries the db for the question against the codebase.
384
  """
385
  global repoName
386
- print(history)
387
  global ticket_choices
388
  github = repoName[:-4]
389
 
@@ -398,19 +370,17 @@ def solveGithubIssue(ticket, history) -> Response:
398
  """.format(repoFolder, body)
399
 
400
  q_display = """
401
- How would I approach solving this ticket: {}. Here is a summary of the issue: {}
402
  """.format(title, body)
403
 
404
- print(question)
405
 
406
  try:
407
- embeddings = OpenAIEmbeddings(openai_api_key="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")
408
  pathName = github.split('/')[-1]
409
  dataset_path = "hub://aiswaryas/" + pathName + "1000"
410
 
411
  db = DeepLake(dataset_path=dataset_path, read_only=True, embedding=embeddings)
412
 
413
- # print("finished indexing repo")
414
  retriever = db.as_retriever()
415
  retriever.search_kwargs['distance_metric'] = 'cos'
416
  retriever.search_kwargs['fetch_k'] = 100
@@ -426,7 +396,6 @@ def solveGithubIssue(ticket, history) -> Response:
426
  callback_manager=CallbackManager(
427
  [StreamingGradioCallbackHandler(q)]
428
  ),
429
- openai_api_key="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",
430
  )
431
  qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever,max_tokens_limit=8000)
432
 
@@ -451,16 +420,13 @@ def bot(history, **kwargs):
451
  user_message = history[-1][0]
452
 
453
  global repoName
454
- print("Repo name in the bot: " + str(repoName))
455
  github = repoName[:-4]
456
  try:
457
- embeddings = OpenAIEmbeddings(openai_api_key="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")
458
  pathName = github.split('/')[-1]
459
  dataset_path = "hub://aiswaryas/" + pathName + "1000"
460
 
461
  db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
462
-
463
- print("finished indexing repo")
464
  retriever = db.as_retriever()
465
  retriever.search_kwargs['distance_metric'] = 'cos'
466
  retriever.search_kwargs['fetch_k'] = 100
@@ -476,7 +442,6 @@ def bot(history, **kwargs):
476
  callback_manager=CallbackManager(
477
  [StreamingGradioCallbackHandler(q)]
478
  ),
479
- openai_api_key="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",
480
  )
481
  qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
482
  chat_history = []
@@ -487,6 +452,7 @@ def bot(history, **kwargs):
487
 
488
  history[-1][1] = ""
489
  for char in qa({"question": user_message, "chat_history": chat_history})["answer"]:
 
490
  history[-1][1] += char
491
  yield history
492
 
@@ -507,6 +473,9 @@ with gr.Blocks() as demo:
507
 
508
  success_response = gr.Textbox(label="")
509
  ingest_btn = gr.Button("Index repo")
 
 
 
510
 
511
  with gr.Column(visible=False) as launch_product:
512
 
@@ -533,10 +502,8 @@ with gr.Blocks() as demo:
533
  ), gr.update(visible=True)
534
 
535
  # global ticket_choices, ticket_titles, tickets
536
- print("REPO name in bug triage: " + str(repoName))
537
  repo = "/".join(repoName[:-4].split("/")[-2:])
538
  tickets = fetchGithubIssues(repo, 10)
539
- # print("tickets: " + str(tickets))
540
 
541
  # Create the dropdown
542
  ticket_choices = {ticket["title"]: ticket for ticket in tickets}
@@ -544,14 +511,11 @@ with gr.Blocks() as demo:
544
 
545
  # Here you want to first call the getGithubIssues function
546
  # repo = gr.Interface.get_session_state("repo")
547
- # print("REPO name in bug triage: " + str(repoName))
548
  # repo = "/".join(repoName[:-4].split("/")[-2:])
549
  # tickets = fetchGithubIssues(repo, 10)
550
- # print("tickets: " + str(tickets))
551
 
552
  # # Create the dropdown
553
  # global ticket_choices
554
- # print("tickets in bug triage: " + str(tickets))
555
  ticket_choices = {ticket["title"]: ticket for ticket in tickets}
556
  ticket_titles = [ticket["title"] for ticket in tickets]
557
 
@@ -594,28 +558,25 @@ with gr.Blocks() as demo:
594
 
595
  gr.Markdown(allDocs)
596
 
597
- # def button_click_callback(markdown):
598
- # print("IN BUTTON CLICK CALLBACK")
599
- # docs = generateDocumentationPerFolder("overview", repoName[:-4])
600
- # markdown.update(docs)
601
 
602
- # markdown = gr.Markdown()
603
- # # Generate the left column buttons and their names and wrap each one in a function
604
- # with gr.Row():
605
- # with gr.Column(scale=.5, min_width=300):
606
- # dirNames = generateFolderNamesForRepo(repoName[:-4])
607
- # print(dirNames)
608
- # buttons = [gr.Button(folder_name) for folder_name in dirNames]
609
- # for btn, folder_name in zip(buttons, dirNames):
610
- # btn.click(button_click_callback, [markdown], [markdown] )
611
 
612
 
613
- # # Generate the overall documentation for the main bubble at the same time
614
- # print("REPO NAME IN DOCS: " + str(repoName[:-4]))
615
- # with gr.Column(scale=2, min_width=300):
616
- # docs = generateDocumentationPerFolder("overview", repoName[:-4])
617
- # markdown.update(docs)
618
- # markdown.render()
619
 
620
 
621
  with gr.Tab("Custom Model Finetuning"):
@@ -683,5 +644,5 @@ with gr.Blocks() as demo:
683
  ingest_btn.click(fn=index_repo, inputs=[repoTextBox, ingestedRepos], outputs=[success_response, launch_product], api_name="index_repo")
684
 
685
  demo.queue()
686
- demo.launch(debug=True)
687
 
 
18
  import time
19
  import together
20
 
 
 
 
 
21
  import os
22
  from langchain.document_loaders import TextLoader
23
  from langchain.text_splitter import CharacterTextSplitter
24
 
25
  import subprocess
 
26
 
27
  from langchain.callbacks.base import BaseCallbackHandler
28
  from langchain.schema import LLMResult
 
81
  global ticket_choices
82
  tickets = []
83
 
84
+ repoName = "https://github.com/sphinx-doc/sphinx.git"
85
 
86
  embeddings = OpenAIEmbeddings(disallowed_special=())
87
 
 
95
 
96
  def index_repo(textbox: str, dropdown: str) -> Response:
97
 
 
98
  mapping = {
99
  "Langchain" : "https://github.com/langchain-ai/langchain.git",
100
  "Weaviate": "https://github.com/weaviate/weaviate.git",
 
108
  repo = textbox
109
  else:
110
  repo = mapping[dropdown[0]]
 
111
 
 
112
  pathName = git_clone(repo)
113
  root_dir = './' + pathName
 
114
 
 
115
  activeloop_username = "aiswaryas"
116
  dataset_path = f"hub://{activeloop_username}/" + pathName + "1000"
 
117
 
118
  try:
119
  db = DeepLake(dataset_path=dataset_path,
 
132
  try:
133
  docs = []
134
  for dirpath, dirnames, filenames in os.walk(root_dir):
 
135
  for file in filenames:
136
  print(file)
137
  try:
 
171
  # embedding_function=embeddings,
172
  # token=os.environ['ACTIVELOOP_TOKEN'], read_only=False)
173
 
 
 
 
174
  except Exception as e:
175
  return Response(
176
  result= "Failed to index github repo",
 
187
  ticket_choices = {ticket["title"]: ticket for ticket in tickets}
188
  ticket_titles = [ticket["title"] for ticket in tickets]
189
 
 
 
190
  return {
191
  success_response: "SUCCESS",
192
  launch_product: gr.update(visible=True)
 
196
  def answer_questions(question: str, github: str, **kwargs) -> Response:
197
 
198
  global repoName
 
199
  github = repoName[:-4]
200
+ print("REPO NAME: " + github)
201
+
202
  try:
203
+ embeddings = OpenAIEmbeddings()
204
  pathName = github.split('/')[-1]
205
  dataset_path = "hub://aiswaryas/" + pathName + "1000"
206
 
207
+ print("before reading repo")
208
  db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
209
 
210
  print("finished indexing repo")
 
224
  callback_manager=CallbackManager(
225
  [StreamingGradioCallbackHandler(q)]
226
  ),
 
227
  )
228
  qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
229
  chat_history = []
 
276
  "comments_url": issue["comments_url"],
277
  })
278
 
 
279
  return issues_data
280
 
281
 
 
321
  an overview of that function.
322
  """.format(dir, github)
323
 
324
+ return prompt
325
+ # try:
326
+ # embeddings = OpenAIEmbeddings()
327
+ # pathName = github.split('/')[-1]
328
+ # dataset_path = "hub://aiswaryas/" + pathName + "1000"
 
 
 
329
 
330
+ # db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
 
 
 
 
 
331
 
332
+ # retriever = db.as_retriever()
333
+ # retriever.search_kwargs['distance_metric'] = 'cos'
334
+ # retriever.search_kwargs['fetch_k'] = 100
335
+ # retriever.search_kwargs['maximal_marginal_relevance'] = True
336
+ # retriever.search_kwargs['k'] = 20
 
 
 
 
 
 
337
 
338
+ # # streaming_handler = kwargs.get('streaming_handler')
339
+ # model = ChatOpenAI(
340
+ # model_name='gpt-3.5-turbo-16k',
341
+ # temperature=0.0,
342
+ # verbose=True,
343
+ # streaming=True, # Pass `streaming=True` to make sure the client receives the data.
344
+ # )
345
+ # qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
346
+ # chat_history = []
347
+ # return qa({"question": prompt, "chat_history": chat_history})["answer"]
348
 
349
+ # except Exception as e:
350
+ # print (str(e))
351
+ # return "Failed to generate documentation"
 
 
352
 
353
 
354
  def solveGithubIssue(ticket, history) -> Response:
 
356
  This endpoint takes in a github issue and then queries the db for the question against the codebase.
357
  """
358
  global repoName
 
359
  global ticket_choices
360
  github = repoName[:-4]
361
 
 
370
  """.format(repoFolder, body)
371
 
372
  q_display = """
373
+ Can you explain how to approach solving this ticket: {}. Here is a summary of the issue: {}
374
  """.format(title, body)
375
 
 
376
 
377
  try:
378
+ embeddings = OpenAIEmbeddings()
379
  pathName = github.split('/')[-1]
380
  dataset_path = "hub://aiswaryas/" + pathName + "1000"
381
 
382
  db = DeepLake(dataset_path=dataset_path, read_only=True, embedding=embeddings)
383
 
 
384
  retriever = db.as_retriever()
385
  retriever.search_kwargs['distance_metric'] = 'cos'
386
  retriever.search_kwargs['fetch_k'] = 100
 
396
  callback_manager=CallbackManager(
397
  [StreamingGradioCallbackHandler(q)]
398
  ),
 
399
  )
400
  qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever,max_tokens_limit=8000)
401
 
 
420
  user_message = history[-1][0]
421
 
422
  global repoName
 
423
  github = repoName[:-4]
424
  try:
425
+ embeddings = OpenAIEmbeddings()
426
  pathName = github.split('/')[-1]
427
  dataset_path = "hub://aiswaryas/" + pathName + "1000"
428
 
429
  db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
 
 
430
  retriever = db.as_retriever()
431
  retriever.search_kwargs['distance_metric'] = 'cos'
432
  retriever.search_kwargs['fetch_k'] = 100
 
442
  callback_manager=CallbackManager(
443
  [StreamingGradioCallbackHandler(q)]
444
  ),
 
445
  )
446
  qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
447
  chat_history = []
 
452
 
453
  history[-1][1] = ""
454
  for char in qa({"question": user_message, "chat_history": chat_history})["answer"]:
455
+ print(char)
456
  history[-1][1] += char
457
  yield history
458
 
 
473
 
474
  success_response = gr.Textbox(label="")
475
  ingest_btn = gr.Button("Index repo")
476
+ ticketDropdown = gr.Dropdown()
477
+
478
+ repoTextBox.submit(fetchGithubIssues, [], ticketDropdown)
479
 
480
  with gr.Column(visible=False) as launch_product:
481
 
 
502
  ), gr.update(visible=True)
503
 
504
  # global ticket_choices, ticket_titles, tickets
 
505
  repo = "/".join(repoName[:-4].split("/")[-2:])
506
  tickets = fetchGithubIssues(repo, 10)
 
507
 
508
  # Create the dropdown
509
  ticket_choices = {ticket["title"]: ticket for ticket in tickets}
 
511
 
512
  # Here you want to first call the getGithubIssues function
513
  # repo = gr.Interface.get_session_state("repo")
 
514
  # repo = "/".join(repoName[:-4].split("/")[-2:])
515
  # tickets = fetchGithubIssues(repo, 10)
 
516
 
517
  # # Create the dropdown
518
  # global ticket_choices
 
519
  ticket_choices = {ticket["title"]: ticket for ticket in tickets}
520
  ticket_titles = [ticket["title"] for ticket in tickets]
521
 
 
558
 
559
  gr.Markdown(allDocs)
560
 
561
+ def button_click_callback(markdown):
562
+ docs = generateDocumentationPerFolder("overview", repoName[:-4])
563
+ markdown.update(docs)
 
564
 
565
+ markdown = gr.Markdown()
566
+ # Generate the left column buttons and their names and wrap each one in a function
567
+ with gr.Row():
568
+ with gr.Column(scale=.5, min_width=300):
569
+ dirNames = generateFolderNamesForRepo(repoName[:-4])
570
+ buttons = [gr.Button(folder_name) for folder_name in dirNames]
571
+ for btn, folder_name in zip(buttons, dirNames):
572
+ btn.click(button_click_callback, [markdown], [markdown] )
 
573
 
574
 
575
+ # Generate the overall documentation for the main bubble at the same time
576
+ with gr.Column(scale=2, min_width=300):
577
+ docs = generateDocumentationPerFolder("overview", repoName[:-4])
578
+ markdown.update(docs)
579
+ markdown.render()
 
580
 
581
 
582
  with gr.Tab("Custom Model Finetuning"):
 
644
  ingest_btn.click(fn=index_repo, inputs=[repoTextBox, ingestedRepos], outputs=[success_response, launch_product], api_name="index_repo")
645
 
646
  demo.queue()
647
+ demo.launch(debug=True, share=True)
648