Spaces:

aiswaryasankar
/

entelligence.ai

Sleeping

App Files Files Community

Aiswarya Sankar commited on Aug 15, 2023

Commit

f3b7606

1 Parent(s): aaea47d

Work with Cody

Browse files

Files changed (1) hide show

app.py +30 -27

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ import random
 import time
 import together
-os.environ['OPENAI_API_KEY']='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'
 os.environ['ACTIVELOOP_TOKEN']='eyJhbGciOiJIUzUxMiIsImlhdCI6MTY4MTU5NTgyOCwiZXhwIjoxNzEzMjE4MTU5fQ.eyJpZCI6ImFpc3dhcnlhcyJ9.eoiMFZsS20zzMXXupFbowUlLdgIgf_MA1ck_DByzREeoQvNm8GPhKEfqea2y1Qak-ud2jo9dhSTBTfRe1ztezw'
@@ -27,7 +27,7 @@ from langchain.document_loaders import TextLoader
 from langchain.text_splitter import CharacterTextSplitter
 import subprocess
-# repo_name = "https://github.com/aiswaryasankar/memeAI.git"
 from langchain.callbacks.base import BaseCallbackHandler
 from langchain.schema import LLMResult
@@ -86,7 +86,7 @@ global tickets
 global ticket_choices
 tickets = []
-repoName = "https://github.com/aiswaryasankar/memeAI.git"
 embeddings = OpenAIEmbeddings(disallowed_special=())
@@ -100,6 +100,7 @@ def git_clone(repo_url):
 def index_repo(textbox: str, dropdown: str) -> Response:
     mapping = {
         "Langchain" : "https://github.com/langchain-ai/langchain.git",
         "Weaviate": "https://github.com/weaviate/weaviate.git",
@@ -109,9 +110,6 @@ def index_repo(textbox: str, dropdown: str) -> Response:
         "GenerativeAgents": "https://github.com/joonspk-research/generative_agents.git"
     }
-    # print(textbox)
-    # print(dropdown[0])
     if textbox != "":
         repo = textbox
     else:
@@ -124,7 +122,8 @@ def index_repo(textbox: str, dropdown: str) -> Response:
     print("Repo name after setting the value: " + str(repoName))
     activeloop_username = "aiswaryas"
-    dataset_path = f"hub://{activeloop_username}/" + pathName
     try:
         db = DeepLake(dataset_path=dataset_path,
@@ -143,7 +142,9 @@ def index_repo(textbox: str, dropdown: str) -> Response:
             try:
                 docs = []
                 for dirpath, dirnames, filenames in os.walk(root_dir):
                     for file in filenames:
                         try:
                             loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')
                             docs.extend(loader.load_and_split())
@@ -152,7 +153,7 @@ def index_repo(textbox: str, dropdown: str) -> Response:
                             pass
                 activeloop_username = "aiswaryas"
-                dataset_path = f"hub://{activeloop_username}/" + pathName
                 text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
                 texts = text_splitter.split_documents(docs)
@@ -162,6 +163,7 @@ def index_repo(textbox: str, dropdown: str) -> Response:
                         read_only=False)
                 # Do this in chunks to avoid hitting the ratelimit immediately
                 for i in range(0, len(texts), 500):
                     db.add_documents(texts[i:i+500])
                     time.sleep(.1)
@@ -179,6 +181,7 @@ def index_repo(textbox: str, dropdown: str) -> Response:
             # db = DeepLake(dataset_path=dataset_path,
             #                 embedding_function=embeddings,
             #                 token=os.environ['ACTIVELOOP_TOKEN'], read_only=False)
         else:
             print("Dataset already exists")
@@ -194,7 +197,7 @@ def index_repo(textbox: str, dropdown: str) -> Response:
     print("REPO name in bug triage: " + str(repoName))
     repo = "/".join(repoName[:-4].split("/")[-2:])
     tickets = fetchGithubIssues(repo, 10)
-    print("tickets: " + str(tickets))
     # Create the dropdown
     ticket_choices = {ticket["title"]: ticket for ticket in tickets}
@@ -215,9 +218,9 @@ def answer_questions(question: str, github: str, **kwargs) -> Response:
     github = repoName[:-4]
     print(github)
     try:
-        embeddings = OpenAIEmbeddings(openai_api_key="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")
         pathName = github.split('/')[-1]
-        dataset_path = "hub://aiswaryas/" + pathName
         db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
@@ -238,7 +241,7 @@ def answer_questions(question: str, github: str, **kwargs) -> Response:
             callback_manager=CallbackManager(
                 [StreamingGradioCallbackHandler(q)]
             ),
-            openai_api_key="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",
         )
         qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
         chat_history = []
@@ -291,7 +294,7 @@ def fetchGithubIssues(repo: str, num_issues:int, **kwargs) -> Response:
                 "comments_url": issue["comments_url"],
             })
-    print(issues_data)
     return  issues_data
@@ -303,7 +306,7 @@ def generateFolderNamesForRepo(repo):
         input data and generate the responses that are displayed in the UI.
     """
     pathName = git_clone(repo)
-    root_dir = './' + pathName
     files, dirs, docs = [], [], []
     for dirpath, dirnames, filenames in os.walk(root_dir):
@@ -317,7 +320,7 @@ def generateFolderNamesForRepo(repo):
                 print("Exception: " + str(e) + "| File: " + os.path.join(dirpath, file))
                 pass
-    return dirs[0]
 def generateDocumentationPerFolder(dir, github):
@@ -339,10 +342,10 @@ def generateDocumentationPerFolder(dir, github):
     print(prompt)
     try:
-        embeddings = OpenAIEmbeddings(openai_api_key="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")
         pathName = github.split('/')[-1]
         print("PATH NAME: " + str(pathName))
-        dataset_path = "hub://aiswaryas/" + pathName
         db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
@@ -359,7 +362,7 @@ def generateDocumentationPerFolder(dir, github):
             temperature=0.0,
             verbose=True,
             streaming=True,  # Pass `streaming=True` to make sure the client receives the data.
-            openai_api_key="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",
         )
         qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
         chat_history = []
@@ -402,9 +405,9 @@ def solveGithubIssue(ticket, history) -> Response:
     print(question)
     try:
-        embeddings = OpenAIEmbeddings(openai_api_key="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")
         pathName = github.split('/')[-1]
-        dataset_path = "hub://aiswaryas/" + pathName
         db = DeepLake(dataset_path=dataset_path, read_only=True, embedding=embeddings)
@@ -424,7 +427,7 @@ def solveGithubIssue(ticket, history) -> Response:
             callback_manager=CallbackManager(
                 [StreamingGradioCallbackHandler(q)]
             ),
-            openai_api_key="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",
         )
         qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever,max_tokens_limit=8000)
@@ -452,9 +455,9 @@ def bot(history, **kwargs):
     print("Repo name in the bot: " + str(repoName))
     github = repoName[:-4]
     try:
-        embeddings = OpenAIEmbeddings(openai_api_key="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")
         pathName = github.split('/')[-1]
-        dataset_path = "hub://aiswaryas/" + pathName
         db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
@@ -474,7 +477,7 @@ def bot(history, **kwargs):
             callback_manager=CallbackManager(
                 [StreamingGradioCallbackHandler(q)]
             ),
-            openai_api_key="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",
         )
         qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
         chat_history = []
@@ -501,7 +504,7 @@ with gr.Blocks() as demo:
     repoTextBox = gr.Textbox(label="Github Repository")
     gr.Markdown("""Choose from any of the following repositories""")
-    ingestedRepos = gr.CheckboxGroup(choices=['Langchain', 'Weaviate', 'OpenAssistant', 'GenerativeAgents','Llama2', "MemeAI"], label="Github Repository", value="MemeAI")
     success_response = gr.Textbox(label="")
     ingest_btn = gr.Button("Index repo")
@@ -534,7 +537,7 @@ with gr.Blocks() as demo:
             print("REPO name in bug triage: " + str(repoName))
             repo = "/".join(repoName[:-4].split("/")[-2:])
             tickets = fetchGithubIssues(repo, 10)
-            print("tickets: " + str(tickets))
             # Create the dropdown
             ticket_choices = {ticket["title"]: ticket for ticket in tickets}
@@ -549,7 +552,7 @@ with gr.Blocks() as demo:
             #   # Create the dropdown
             #   global ticket_choices
-            print("tickets in bug triage: " + str(tickets))
             ticket_choices = {ticket["title"]: ticket for ticket in tickets}
             ticket_titles = [ticket["title"] for ticket in tickets]

 import time
 import together
+os.environ['OPENAI_API_KEY']='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'
 os.environ['ACTIVELOOP_TOKEN']='eyJhbGciOiJIUzUxMiIsImlhdCI6MTY4MTU5NTgyOCwiZXhwIjoxNzEzMjE4MTU5fQ.eyJpZCI6ImFpc3dhcnlhcyJ9.eoiMFZsS20zzMXXupFbowUlLdgIgf_MA1ck_DByzREeoQvNm8GPhKEfqea2y1Qak-ud2jo9dhSTBTfRe1ztezw'
 from langchain.text_splitter import CharacterTextSplitter
 import subprocess
+# repo_name = "https://github.com/sourcegraph/cody.git"
 from langchain.callbacks.base import BaseCallbackHandler
 from langchain.schema import LLMResult
 global ticket_choices
 tickets = []
+repoName = "https://github.com/sourcegraph/cody.git"
 embeddings = OpenAIEmbeddings(disallowed_special=())
 def index_repo(textbox: str, dropdown: str) -> Response:
+    print("IN INDEX_REPO")
     mapping = {
         "Langchain" : "https://github.com/langchain-ai/langchain.git",
         "Weaviate": "https://github.com/weaviate/weaviate.git",
         "GenerativeAgents": "https://github.com/joonspk-research/generative_agents.git"
     }
     if textbox != "":
         repo = textbox
     else:
     print("Repo name after setting the value: " + str(repoName))
     activeloop_username = "aiswaryas"
+    dataset_path = f"hub://{activeloop_username}/" + pathName + "1000"
+    print(dataset_path)
     try:
         db = DeepLake(dataset_path=dataset_path,
             try:
                 docs = []
                 for dirpath, dirnames, filenames in os.walk(root_dir):
+                    print("rootdir: " + str(root_dir))
                     for file in filenames:
+                        print(file)
                         try:
                             loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')
                             docs.extend(loader.load_and_split())
                             pass
                 activeloop_username = "aiswaryas"
+                dataset_path = f"hub://{activeloop_username}/" + pathName + "1000"
                 text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
                 texts = text_splitter.split_documents(docs)
                         read_only=False)
                 # Do this in chunks to avoid hitting the ratelimit immediately
                 for i in range(0, len(texts), 500):
+                    print("Adding documents " + str(i))
                     db.add_documents(texts[i:i+500])
                     time.sleep(.1)
             # db = DeepLake(dataset_path=dataset_path,
             #                 embedding_function=embeddings,
             #                 token=os.environ['ACTIVELOOP_TOKEN'], read_only=False)
         else:
             print("Dataset already exists")
     print("REPO name in bug triage: " + str(repoName))
     repo = "/".join(repoName[:-4].split("/")[-2:])
     tickets = fetchGithubIssues(repo, 10)
+    # print("tickets: " + str(tickets))
     # Create the dropdown
     ticket_choices = {ticket["title"]: ticket for ticket in tickets}
     github = repoName[:-4]
     print(github)
     try:
+        embeddings = OpenAIEmbeddings(openai_api_key="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")
         pathName = github.split('/')[-1]
+        dataset_path = "hub://aiswaryas/" + pathName + "1000"
         db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
             callback_manager=CallbackManager(
                 [StreamingGradioCallbackHandler(q)]
             ),
+            openai_api_key="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",
         )
         qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
         chat_history = []
                 "comments_url": issue["comments_url"],
             })
+    # print(issues_data)
     return  issues_data
         input data and generate the responses that are displayed in the UI.
     """
     pathName = git_clone(repo)
+    root_dir = './' + pathName + "1000"
     files, dirs, docs = [], [], []
     for dirpath, dirnames, filenames in os.walk(root_dir):
                 print("Exception: " + str(e) + "| File: " + os.path.join(dirpath, file))
                 pass
+    return dirs
 def generateDocumentationPerFolder(dir, github):
     print(prompt)
     try:
+        embeddings = OpenAIEmbeddings(openai_api_key="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")
         pathName = github.split('/')[-1]
         print("PATH NAME: " + str(pathName))
+        dataset_path = "hub://aiswaryas/" + pathName + "1000"
         db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
             temperature=0.0,
             verbose=True,
             streaming=True,  # Pass `streaming=True` to make sure the client receives the data.
+            openai_api_key="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",
         )
         qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
         chat_history = []
     print(question)
     try:
+        embeddings = OpenAIEmbeddings(openai_api_key="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")
         pathName = github.split('/')[-1]
+        dataset_path = "hub://aiswaryas/" + pathName + "1000"
         db = DeepLake(dataset_path=dataset_path, read_only=True, embedding=embeddings)
             callback_manager=CallbackManager(
                 [StreamingGradioCallbackHandler(q)]
             ),
+            openai_api_key="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",
         )
         qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever,max_tokens_limit=8000)
     print("Repo name in the bot: " + str(repoName))
     github = repoName[:-4]
     try:
+        embeddings = OpenAIEmbeddings(openai_api_key="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")
         pathName = github.split('/')[-1]
+        dataset_path = "hub://aiswaryas/" + pathName + "1000"
         db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
             callback_manager=CallbackManager(
                 [StreamingGradioCallbackHandler(q)]
             ),
+            openai_api_key="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",
         )
         qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
         chat_history = []
     repoTextBox = gr.Textbox(label="Github Repository")
     gr.Markdown("""Choose from any of the following repositories""")
+    ingestedRepos = gr.CheckboxGroup(choices=['Langchain', 'Weaviate', 'OpenAssistant', 'GenerativeAgents','Llama2', "MemeAI"], label="Github Repository", value="Langchain")
     success_response = gr.Textbox(label="")
     ingest_btn = gr.Button("Index repo")
             print("REPO name in bug triage: " + str(repoName))
             repo = "/".join(repoName[:-4].split("/")[-2:])
             tickets = fetchGithubIssues(repo, 10)
+            # print("tickets: " + str(tickets))
             # Create the dropdown
             ticket_choices = {ticket["title"]: ticket for ticket in tickets}
             #   # Create the dropdown
             #   global ticket_choices
+            # print("tickets in bug triage: " + str(tickets))
             ticket_choices = {ticket["title"]: ticket for ticket in tickets}
             ticket_titles = [ticket["title"] for ticket in tickets]