Spaces:

aiswaryasankar
/

entelligence.ai

Sleeping

App Files Files Community

Aiswarya Sankar commited on Sep 23, 2023

Commit

5a8be90

1 Parent(s): 9937bad

Remove openai token

Browse files

Files changed (1) hide show

app.py +53 -92

app.py CHANGED Viewed

@@ -18,16 +18,11 @@ import random
 import time
 import together
-os.environ['OPENAI_API_KEY']='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'
-os.environ['ACTIVELOOP_TOKEN']='eyJhbGciOiJIUzUxMiIsImlhdCI6MTY4MTU5NTgyOCwiZXhwIjoxNzEzMjE4MTU5fQ.eyJpZCI6ImFpc3dhcnlhcyJ9.eoiMFZsS20zzMXXupFbowUlLdgIgf_MA1ck_DByzREeoQvNm8GPhKEfqea2y1Qak-ud2jo9dhSTBTfRe1ztezw'
 import os
 from langchain.document_loaders import TextLoader
 from langchain.text_splitter import CharacterTextSplitter
 import subprocess
-# repo_name = "https://github.com/sourcegraph/cody.git"
 from langchain.callbacks.base import BaseCallbackHandler
 from langchain.schema import LLMResult
@@ -86,7 +81,7 @@ global tickets
 global ticket_choices
 tickets = []
-repoName = "https://github.com/sourcegraph/cody.git"
 embeddings = OpenAIEmbeddings(disallowed_special=())
@@ -100,7 +95,6 @@ def git_clone(repo_url):
 def index_repo(textbox: str, dropdown: str) -> Response:
-    print("IN INDEX_REPO")
     mapping = {
         "Langchain" : "https://github.com/langchain-ai/langchain.git",
         "Weaviate": "https://github.com/weaviate/weaviate.git",
@@ -114,17 +108,12 @@ def index_repo(textbox: str, dropdown: str) -> Response:
         repo = textbox
     else:
         repo = mapping[dropdown[0]]
-    # repoName = gr.State(repo)
-    print("Repo name after setting the value: " + str(repoName))
     pathName = git_clone(repo)
     root_dir = './' + pathName
-    print(root_dir)
-    print("Repo name after setting the value: " + str(repoName))
     activeloop_username = "aiswaryas"
     dataset_path = f"hub://{activeloop_username}/" + pathName + "1000"
-    print(dataset_path)
     try:
         db = DeepLake(dataset_path=dataset_path,
@@ -143,7 +132,6 @@ def index_repo(textbox: str, dropdown: str) -> Response:
             try:
                 docs = []
                 for dirpath, dirnames, filenames in os.walk(root_dir):
-                    print("rootdir: " + str(root_dir))
                     for file in filenames:
                         print(file)
                         try:
@@ -183,9 +171,6 @@ def index_repo(textbox: str, dropdown: str) -> Response:
             #                 embedding_function=embeddings,
             #                 token=os.environ['ACTIVELOOP_TOKEN'], read_only=False)
-        else:
-            print("Dataset already exists")
     except Exception as e:
         return Response(
             result= "Failed to index github repo",
@@ -202,8 +187,6 @@ def index_repo(textbox: str, dropdown: str) -> Response:
     ticket_choices = {ticket["title"]: ticket for ticket in tickets}
     ticket_titles = [ticket["title"] for ticket in tickets]
-    print("Repo name before return: " + str(repoName))
     return {
         success_response: "SUCCESS",
         launch_product: gr.update(visible=True)
@@ -213,14 +196,15 @@ def index_repo(textbox: str, dropdown: str) -> Response:
 def answer_questions(question: str, github: str, **kwargs) -> Response:
     global repoName
-    print("Repo name")
     github = repoName[:-4]
-    print(github)
     try:
-        embeddings = OpenAIEmbeddings(openai_api_key="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")
         pathName = github.split('/')[-1]
         dataset_path = "hub://aiswaryas/" + pathName + "1000"
         db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
         print("finished indexing repo")
@@ -240,7 +224,6 @@ def answer_questions(question: str, github: str, **kwargs) -> Response:
             callback_manager=CallbackManager(
                 [StreamingGradioCallbackHandler(q)]
             ),
-            openai_api_key="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",
         )
         qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
         chat_history = []
@@ -293,7 +276,6 @@ def fetchGithubIssues(repo: str, num_issues:int, **kwargs) -> Response:
                 "comments_url": issue["comments_url"],
             })
-    # print(issues_data)
     return  issues_data
@@ -339,43 +321,34 @@ def generateDocumentationPerFolder(dir, github):
             an overview of that function.
         """.format(dir, github)
-    print(prompt)
-    try:
-        embeddings = OpenAIEmbeddings(openai_api_key="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")
-        pathName = github.split('/')[-1]
-        print("PATH NAME: " + str(pathName))
-        dataset_path = "hub://aiswaryas/" + pathName + "1000"
-        db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
-        # print("finished indexing repo")
-        retriever = db.as_retriever()
-        retriever.search_kwargs['distance_metric'] = 'cos'
-        retriever.search_kwargs['fetch_k'] = 100
-        retriever.search_kwargs['maximal_marginal_relevance'] = True
-        retriever.search_kwargs['k'] = 20
-        # streaming_handler = kwargs.get('streaming_handler')
-        model = ChatOpenAI(
-            model_name='gpt-3.5-turbo-16k',
-            temperature=0.0,
-            verbose=True,
-            streaming=True,  # Pass `streaming=True` to make sure the client receives the data.
-            openai_api_key="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",
-        )
-        qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
-        chat_history = []
-        return qa({"question": prompt, "chat_history": chat_history})["answer"]
-    except Exception as e:
-        print (str(e))
-        return "Failed to generate documentation"
-    #   history[-1][1] = ""
-    #   for char in qa({"question": prompt, "chat_history": chat_history}):
-    #     history[-1][1] += char
-    #     time.sleep(0.01)
-    #     yield history
 def solveGithubIssue(ticket, history) -> Response:
@@ -383,7 +356,6 @@ def solveGithubIssue(ticket, history) -> Response:
         This endpoint takes in a github issue and then queries the db for the question against the codebase.
     """
     global repoName
-    print(history)
     global ticket_choices
     github = repoName[:-4]
@@ -398,19 +370,17 @@ def solveGithubIssue(ticket, history) -> Response:
         """.format(repoFolder, body)
     q_display = """
-            How would I approach solving this ticket: {}.  Here is a summary of the issue: {}
         """.format(title, body)
-    print(question)
     try:
-        embeddings = OpenAIEmbeddings(openai_api_key="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")
         pathName = github.split('/')[-1]
         dataset_path = "hub://aiswaryas/" + pathName + "1000"
         db = DeepLake(dataset_path=dataset_path, read_only=True, embedding=embeddings)
-        # print("finished indexing repo")
         retriever = db.as_retriever()
         retriever.search_kwargs['distance_metric'] = 'cos'
         retriever.search_kwargs['fetch_k'] = 100
@@ -426,7 +396,6 @@ def solveGithubIssue(ticket, history) -> Response:
             callback_manager=CallbackManager(
                 [StreamingGradioCallbackHandler(q)]
             ),
-            openai_api_key="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",
         )
         qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever,max_tokens_limit=8000)
@@ -451,16 +420,13 @@ def bot(history, **kwargs):
     user_message = history[-1][0]
     global repoName
-    print("Repo name in the bot: " + str(repoName))
     github = repoName[:-4]
     try:
-        embeddings = OpenAIEmbeddings(openai_api_key="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")
         pathName = github.split('/')[-1]
         dataset_path = "hub://aiswaryas/" + pathName + "1000"
         db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
-        print("finished indexing repo")
         retriever = db.as_retriever()
         retriever.search_kwargs['distance_metric'] = 'cos'
         retriever.search_kwargs['fetch_k'] = 100
@@ -476,7 +442,6 @@ def bot(history, **kwargs):
             callback_manager=CallbackManager(
                 [StreamingGradioCallbackHandler(q)]
             ),
-            openai_api_key="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",
         )
         qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
         chat_history = []
@@ -487,6 +452,7 @@ def bot(history, **kwargs):
     history[-1][1] = ""
     for char in qa({"question": user_message, "chat_history": chat_history})["answer"]:
         history[-1][1] += char
         yield history
@@ -507,6 +473,9 @@ with gr.Blocks() as demo:
     success_response = gr.Textbox(label="")
     ingest_btn = gr.Button("Index repo")
     with gr.Column(visible=False) as launch_product:
@@ -533,10 +502,8 @@ with gr.Blocks() as demo:
                 ), gr.update(visible=True)
             # global ticket_choices, ticket_titles, tickets
-            print("REPO name in bug triage: " + str(repoName))
             repo = "/".join(repoName[:-4].split("/")[-2:])
             tickets = fetchGithubIssues(repo, 10)
-            # print("tickets: " + str(tickets))
             # Create the dropdown
             ticket_choices = {ticket["title"]: ticket for ticket in tickets}
@@ -544,14 +511,11 @@ with gr.Blocks() as demo:
             # Here you want to first call the getGithubIssues function
             # repo = gr.Interface.get_session_state("repo")
-            #   print("REPO name in bug triage: " + str(repoName))
             #   repo = "/".join(repoName[:-4].split("/")[-2:])
             #   tickets = fetchGithubIssues(repo, 10)
-            #   print("tickets: " + str(tickets))
             #   # Create the dropdown
             #   global ticket_choices
-            # print("tickets in bug triage: " + str(tickets))
             ticket_choices = {ticket["title"]: ticket for ticket in tickets}
             ticket_titles = [ticket["title"] for ticket in tickets]
@@ -594,28 +558,25 @@ with gr.Blocks() as demo:
             gr.Markdown(allDocs)
-            # def button_click_callback(markdown):
-            #     print("IN BUTTON CLICK CALLBACK")
-            #     docs = generateDocumentationPerFolder("overview", repoName[:-4])
-            #     markdown.update(docs)
-            # markdown = gr.Markdown()
-            # # Generate the left column buttons and their names and wrap each one in a function
-            # with gr.Row():
-            #     with gr.Column(scale=.5, min_width=300):
-            #         dirNames = generateFolderNamesForRepo(repoName[:-4])
-            #         print(dirNames)
-            #         buttons = [gr.Button(folder_name) for folder_name in dirNames]
-            #         for btn, folder_name in zip(buttons, dirNames):
-            #             btn.click(button_click_callback, [markdown], [markdown] )
-            #     # Generate the overall documentation for the main bubble at the same time
-            #     print("REPO NAME IN DOCS: " + str(repoName[:-4]))
-            #     with gr.Column(scale=2, min_width=300):
-            #         docs = generateDocumentationPerFolder("overview", repoName[:-4])
-            #         markdown.update(docs)
-            #         markdown.render()
         with gr.Tab("Custom Model Finetuning"):
@@ -683,5 +644,5 @@ with gr.Blocks() as demo:
     ingest_btn.click(fn=index_repo, inputs=[repoTextBox, ingestedRepos], outputs=[success_response, launch_product], api_name="index_repo")
 demo.queue()
-demo.launch(debug=True)

 import time
 import together
 import os
 from langchain.document_loaders import TextLoader
 from langchain.text_splitter import CharacterTextSplitter
 import subprocess
 from langchain.callbacks.base import BaseCallbackHandler
 from langchain.schema import LLMResult
 global ticket_choices
 tickets = []
+repoName = "https://github.com/sphinx-doc/sphinx.git"
 embeddings = OpenAIEmbeddings(disallowed_special=())
 def index_repo(textbox: str, dropdown: str) -> Response:
     mapping = {
         "Langchain" : "https://github.com/langchain-ai/langchain.git",
         "Weaviate": "https://github.com/weaviate/weaviate.git",
         repo = textbox
     else:
         repo = mapping[dropdown[0]]
     pathName = git_clone(repo)
     root_dir = './' + pathName
     activeloop_username = "aiswaryas"
     dataset_path = f"hub://{activeloop_username}/" + pathName + "1000"
     try:
         db = DeepLake(dataset_path=dataset_path,
             try:
                 docs = []
                 for dirpath, dirnames, filenames in os.walk(root_dir):
                     for file in filenames:
                         print(file)
                         try:
             #                 embedding_function=embeddings,
             #                 token=os.environ['ACTIVELOOP_TOKEN'], read_only=False)
     except Exception as e:
         return Response(
             result= "Failed to index github repo",
     ticket_choices = {ticket["title"]: ticket for ticket in tickets}
     ticket_titles = [ticket["title"] for ticket in tickets]
     return {
         success_response: "SUCCESS",
         launch_product: gr.update(visible=True)
 def answer_questions(question: str, github: str, **kwargs) -> Response:
     global repoName
     github = repoName[:-4]
+    print("REPO NAME: " + github)
     try:
+        embeddings = OpenAIEmbeddings()
         pathName = github.split('/')[-1]
         dataset_path = "hub://aiswaryas/" + pathName + "1000"
+        print("before reading repo")
         db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
         print("finished indexing repo")
             callback_manager=CallbackManager(
                 [StreamingGradioCallbackHandler(q)]
             ),
         )
         qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
         chat_history = []
                 "comments_url": issue["comments_url"],
             })
     return  issues_data
             an overview of that function.
         """.format(dir, github)
+    return prompt
+    # try:
+    #     embeddings = OpenAIEmbeddings()
+    #     pathName = github.split('/')[-1]
+    #     dataset_path = "hub://aiswaryas/" + pathName + "1000"
+    #     db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
+    #     retriever = db.as_retriever()
+    #     retriever.search_kwargs['distance_metric'] = 'cos'
+    #     retriever.search_kwargs['fetch_k'] = 100
+    #     retriever.search_kwargs['maximal_marginal_relevance'] = True
+    #     retriever.search_kwargs['k'] = 20
+    #     # streaming_handler = kwargs.get('streaming_handler')
+    #     model = ChatOpenAI(
+    #         model_name='gpt-3.5-turbo-16k',
+    #         temperature=0.0,
+    #         verbose=True,
+    #         streaming=True,  # Pass `streaming=True` to make sure the client receives the data.
+    #     )
+    #     qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
+    #     chat_history = []
+    #     return qa({"question": prompt, "chat_history": chat_history})["answer"]
+    # except Exception as e:
+    #     print (str(e))
+    #     return "Failed to generate documentation"
 def solveGithubIssue(ticket, history) -> Response:
         This endpoint takes in a github issue and then queries the db for the question against the codebase.
     """
     global repoName
     global ticket_choices
     github = repoName[:-4]
         """.format(repoFolder, body)
     q_display = """
+            Can you explain how to approach solving this ticket: {}.  Here is a summary of the issue: {}
         """.format(title, body)
     try:
+        embeddings = OpenAIEmbeddings()
         pathName = github.split('/')[-1]
         dataset_path = "hub://aiswaryas/" + pathName + "1000"
         db = DeepLake(dataset_path=dataset_path, read_only=True, embedding=embeddings)
         retriever = db.as_retriever()
         retriever.search_kwargs['distance_metric'] = 'cos'
         retriever.search_kwargs['fetch_k'] = 100
             callback_manager=CallbackManager(
                 [StreamingGradioCallbackHandler(q)]
             ),
         )
         qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever,max_tokens_limit=8000)
     user_message = history[-1][0]
     global repoName
     github = repoName[:-4]
     try:
+        embeddings = OpenAIEmbeddings()
         pathName = github.split('/')[-1]
         dataset_path = "hub://aiswaryas/" + pathName + "1000"
         db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
         retriever = db.as_retriever()
         retriever.search_kwargs['distance_metric'] = 'cos'
         retriever.search_kwargs['fetch_k'] = 100
             callback_manager=CallbackManager(
                 [StreamingGradioCallbackHandler(q)]
             ),
         )
         qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
         chat_history = []
     history[-1][1] = ""
     for char in qa({"question": user_message, "chat_history": chat_history})["answer"]:
+        print(char)
         history[-1][1] += char
         yield history
     success_response = gr.Textbox(label="")
     ingest_btn = gr.Button("Index repo")
+    ticketDropdown = gr.Dropdown()
+    repoTextBox.submit(fetchGithubIssues, [], ticketDropdown)
     with gr.Column(visible=False) as launch_product:
                 ), gr.update(visible=True)
             # global ticket_choices, ticket_titles, tickets
             repo = "/".join(repoName[:-4].split("/")[-2:])
             tickets = fetchGithubIssues(repo, 10)
             # Create the dropdown
             ticket_choices = {ticket["title"]: ticket for ticket in tickets}
             # Here you want to first call the getGithubIssues function
             # repo = gr.Interface.get_session_state("repo")
             #   repo = "/".join(repoName[:-4].split("/")[-2:])
             #   tickets = fetchGithubIssues(repo, 10)
             #   # Create the dropdown
             #   global ticket_choices
             ticket_choices = {ticket["title"]: ticket for ticket in tickets}
             ticket_titles = [ticket["title"] for ticket in tickets]
             gr.Markdown(allDocs)
+            def button_click_callback(markdown):
+                docs = generateDocumentationPerFolder("overview", repoName[:-4])
+                markdown.update(docs)
+            markdown = gr.Markdown()
+            # Generate the left column buttons and their names and wrap each one in a function
+            with gr.Row():
+                with gr.Column(scale=.5, min_width=300):
+                    dirNames = generateFolderNamesForRepo(repoName[:-4])
+                    buttons = [gr.Button(folder_name) for folder_name in dirNames]
+                    for btn, folder_name in zip(buttons, dirNames):
+                        btn.click(button_click_callback, [markdown], [markdown] )
+                # Generate the overall documentation for the main bubble at the same time
+                with gr.Column(scale=2, min_width=300):
+                    docs = generateDocumentationPerFolder("overview", repoName[:-4])
+                    markdown.update(docs)
+                    markdown.render()
         with gr.Tab("Custom Model Finetuning"):
     ingest_btn.click(fn=index_repo, inputs=[repoTextBox, ingestedRepos], outputs=[success_response, launch_product], api_name="index_repo")
 demo.queue()
+demo.launch(debug=True, share=True)