Spaces:

aiswaryasankar
/

entelligence.ai

Sleeping

App Files Files Community

Aiswarya Sankar commited on Aug 14, 2023

Commit

2613437

1 Parent(s): d2e86b9

Update the way the model is chosen

Browse files

Files changed (1) hide show

app.py +347 -272

app.py CHANGED Viewed

@@ -15,8 +15,10 @@ import os
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.vectorstores import DeepLake
 import random
-os.environ['OPENAI_API_KEY']='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'
 os.environ['ACTIVELOOP_TOKEN']='eyJhbGciOiJIUzUxMiIsImlhdCI6MTY4MTU5NTgyOCwiZXhwIjoxNzEzMjE4MTU5fQ.eyJpZCI6ImFpc3dhcnlhcyJ9.eoiMFZsS20zzMXXupFbowUlLdgIgf_MA1ck_DByzREeoQvNm8GPhKEfqea2y1Qak-ud2jo9dhSTBTfRe1ztezw'
@@ -25,15 +27,13 @@ from langchain.document_loaders import TextLoader
 from langchain.text_splitter import CharacterTextSplitter
 import subprocess
-repo_name = "https://github.com/aiswaryasankar/memeAI.git"
 from langchain.callbacks.base import BaseCallbackHandler
 from langchain.schema import LLMResult
 from typing import Any, Union
-global ticket_choices
-job_done = object() # signals the processing is done
 class StreamingGradioCallbackHandler(BaseCallbackHandler):
     def __init__(self, q: SimpleQueue):
@@ -80,8 +80,15 @@ class GithubResponse(BaseModel):
     repo: str
-embeddings = OpenAIEmbeddings(disallowed_special=())
 def git_clone(repo_url):
     subprocess.run(["git", "clone", repo_url])
@@ -91,46 +98,89 @@ def git_clone(repo_url):
     return dirpath
-def index_repo(repo: str) -> Response:
     pathName = git_clone(repo)
     root_dir = './' + pathName
-    docs = []
-    for dirpath, dirnames, filenames in os.walk(root_dir):
-        for file in filenames:
-            try:
-                loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')
-                docs.extend(loader.load_and_split())
-            except Exception as e:
-                print("Exception: " + str(e) + "| File: " + os.path.join(dirpath, file))
-                pass
     activeloop_username = "aiswaryas"
     dataset_path = f"hub://{activeloop_username}/" + pathName
-    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-    texts = text_splitter.split_documents(docs)
-    print(texts)
-    for text in texts:
-        print(text)
     try:
         db = DeepLake(dataset_path=dataset_path,
-                        embedding_function=embeddings,
-                        token=os.environ['ACTIVELOOP_TOKEN'], read_only=False)
         # NOTE: read_only=False because we want to ingest documents
         # NOTE: This will raise a `deeplake.util.exceptions.LockedException` if dataset is already locked
         # NOTE: change it to read_only=True when querying the dataset
-        # Delete dataset if not empty:
-        if len(db.ds) > 0:
-            print("Dataset not empty. Deleting existing dataset...")
-            db.ds.delete()
-            print("Done.")
-            # Reinitialize
-            db = DeepLake(dataset_path=dataset_path,
-                            embedding_function=embeddings,
-                            token=os.environ['ACTIVELOOP_TOKEN'], read_only=False)
     except Exception as e:
         return Response(
@@ -140,29 +190,35 @@ def index_repo(repo: str) -> Response:
             stdout="",
         )
-    try:
-        db.add_documents(texts)
-    except Exception as e:
-        return Response(
-            result= "Failed to index github repo",
-            repo="",
-            error=str(e),
-            stdout="",
-        )
-    finally:
-        db.ds._unlock()
-    return "SUCCESS"
 def answer_questions(question: str, github: str, **kwargs) -> Response:
-    global repo_name
-    github = repo_name[:-4]
     try:
-        embeddings = OpenAIEmbeddings(openai_api_key="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")
         pathName = github.split('/')[-1]
         dataset_path = "hub://aiswaryas/" + pathName
@@ -185,7 +241,7 @@ def answer_questions(question: str, github: str, **kwargs) -> Response:
             callback_manager=CallbackManager(
                 [StreamingGradioCallbackHandler(q)]
             ),
-            openai_api_key="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",
         )
         qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
         chat_history = []
@@ -207,6 +263,7 @@ def answer_questions(question: str, github: str, **kwargs) -> Response:
         stdout="",
     )
 def fetchGithubIssues(repo: str, num_issues:int, **kwargs) -> Response:
     """
         This endpoint should get a list of all the github issues that are open for this repository
@@ -230,12 +287,12 @@ def fetchGithubIssues(repo: str, num_issues:int, **kwargs) -> Response:
         batch.extend(issues.json())
         for issue in issues.json():
-          issues_data.append({
-              "issue_url": issue["url"],
-              "title": issue["title"],
-              "body": issue["body"],
-              "comments_url": issue["comments_url"],
-          })
     print(issues_data)
     return  issues_data
@@ -265,97 +322,96 @@ def generateFolderNamesForRepo(repo):
     return dirs[0]
 def generateDocumentationPerFolder(dir, github):
-  if dir == "overview":
-    prompt= """
-        Summarize the structure of the memeAI repository.  Make a list of all endpoints and their behavior.  Explain
-        how this module is used in the scope of the larger project.  Format the response as code documentation with an
-        Overview, Architecture and Implementation Details.  Within implementation details, list out each function and provide
-        an overview of that function.
-    """.format(dir)
-  else:
-    prompt= """
-        Summarize how {} is implemented in the memeAI repository.  Make a list of all functions and their behavior.  Explain
-        how this module is used in the scope of the larger project.  Format the response as code documentation with an
-        Overview, Architecture and Implementation Details.  Within implementation details, list out each function and provide
-        an overview of that function.
-    """.format(dir)
-  print(prompt)
-  try:
-    embeddings = OpenAIEmbeddings(openai_api_key="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")
-    pathName = github.split('/')[-1]
-    dataset_path = "hub://aiswaryas/" + pathName
-    db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
-    # print("finished indexing repo")
-    retriever = db.as_retriever()
-    retriever.search_kwargs['distance_metric'] = 'cos'
-    retriever.search_kwargs['fetch_k'] = 100
-    retriever.search_kwargs['maximal_marginal_relevance'] = True
-    retriever.search_kwargs['k'] = 20
-    # streaming_handler = kwargs.get('streaming_handler')
-    model = ChatOpenAI(
-        model_name='gpt-4',
-        temperature=0.0,
-        verbose=True,
-        streaming=True,  # Pass `streaming=True` to make sure the client receives the data.
-        openai_api_key="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",
-    )
-    qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
-    chat_history = []
-  except Exception as e:
-    return str(e)
-#   history[-1][1] = ""
-#   for char in qa({"question": prompt, "chat_history": chat_history}):
-#     history[-1][1] += char
-#     time.sleep(0.01)
-#     yield history
-  return qa({"question": prompt, "chat_history": chat_history})["answer"]
-  return response["answer"]
-def generateArchitectureDiagram(folder) -> Response:
-    """
-        This endpoint should generate a Mermaid diagram for the given input files.  It will return the
-    """
 def solveGithubIssue(ticket, history) -> Response:
     """
         This endpoint takes in a github issue and then queries the db for the question against the codebase.
     """
     print(history)
-    global repo_name, ticket_choices
-    github = repo_name[:-4]
     repoFolder = github.split("/")[-1]
     body = ticket_choices[ticket]["body"]
     title = ticket_choices[ticket]["title"]
     question = """
-      Given the code in the {} repo, propose a solution for this ticket {} that includes a
-      high level implementation, narrowing down the root cause of the issue and psuedocode if
-      applicable on how to resolve the issue. If multiple changes are required to address the
-      problem, list out each of the steps and a brief explanation for each one.
-    """.format(repoFolder, body)
     q_display = """
-        How would I approach solving this ticket: {}.  Here is a summary of the issue: {}
-    """.format(title, body)
     print(question)
     try:
-        embeddings = OpenAIEmbeddings(openai_api_key="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")
         pathName = github.split('/')[-1]
         dataset_path = "hub://aiswaryas/" + pathName
-        db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
         # print("finished indexing repo")
         retriever = db.as_retriever()
@@ -373,34 +429,35 @@ def solveGithubIssue(ticket, history) -> Response:
             callback_manager=CallbackManager(
                 [StreamingGradioCallbackHandler(q)]
             ),
-            openai_api_key="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",
         )
-        qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
     except Exception as e:
         return [[str(e), None]]
     history = [[q_display, ""]]
     history[-1][1] = ""
-    for char in qa({"question": prompt, "chat_history": chat_history}):
         history[-1][1] += char
         time.sleep(0.01)
         yield history
-    # return [[qa({"question": question, "chat_history": chat_history})["answer"], None]]
 def user(message, history):
     return "", history + [[message, None]]
 def bot(history, **kwargs):
-    print(history)
     user_message = history[-1][0]
-    global repo_name
-    github = repo_name[:-4]
     try:
-        embeddings = OpenAIEmbeddings(openai_api_key="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")
         pathName = github.split('/')[-1]
         dataset_path = "hub://aiswaryas/" + pathName
@@ -422,7 +479,7 @@ def bot(history, **kwargs):
             callback_manager=CallbackManager(
                 [StreamingGradioCallbackHandler(q)]
             ),
-            openai_api_key="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",
         )
         qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
         chat_history = []
@@ -433,185 +490,203 @@ def bot(history, **kwargs):
     history[-1][1] = ""
     for char in qa({"question": user_message, "chat_history": chat_history})["answer"]:
-      history[-1][1] += char
-      yield history
 with gr.Blocks() as demo:
-    gr.Markdown("""
-    # Entelligence AI
-    Enabling your product team to ship product 10x faster.
-    """)
     repoTextBox = gr.Textbox(label="Github Repository")
-    repo_name = "https://github.com/aiswaryasankar/memeAI.git"
-    # def update_state(value):
-    #   repo_name.value = value
-    #   return value
-    # repoTextBox.change(update_state, repoTextBox)
-    # print(repo_name.value)
     success_response = gr.Textbox(label="")
     ingest_btn = gr.Button("Index repo")
-    ingest_btn.click(fn=index_repo, inputs=repoTextBox, outputs=success_response, api_name="index_repo")
-    # Toggle visibility of the chat, bugs, docs, model windows
-    with gr.Tab("Code Chat"):
-        chatbot = gr.Chatbot()
-        msg = gr.Textbox()
-        clear = gr.Button("Clear")
-        msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
-            bot, chatbot, chatbot
-        )
-        clear.click(lambda: None, None, chatbot, queue=False)
-    index = 0
-    with gr.Tab("Bug Triage"):
-      # Display the titles in the dropdown
-      def create_ticket_dropdown(tickets):
-        return gr.Dropdown.update(
-            choices=titles, value=titles[0]
-        ), gr.update(visible=True)
-      # Here you want to first call the getGithubIssues function
-      # repo = gr.Interface.get_session_state("repo")
-      print(repo_name)
-      repo = "/".join(repo_name[:-4].split("/")[-2:])
-      tickets = fetchGithubIssues(repo, 10)
-      # Create the dropdown
-      global ticket_choices
-      ticket_choices = {ticket["title"]: ticket for ticket in tickets}
-      ticket_titles = [ticket["title"] for ticket in tickets]
-      ticketDropdown = gr.Dropdown(choices=ticket_titles, title="Github Issues")
-      # Extract the ticket title, body for the selected ticket
-      chatbot = gr.Chatbot()
-      msg = gr.Textbox()
-      clear = gr.Button("Clear")
-      if index == 0:
-        msg.submit(solveGithubIssue, [ticketDropdown, chatbot], [msg, chatbot], queue=False).then(
-            bot, chatbot, chatbot
-        )
-        ticketDropdown.change(solveGithubIssue, inputs=[ticketDropdown, chatbot], outputs=[chatbot])
-        index += 1
-      else:
-        msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
-            bot, chatbot, chatbot
-        )
-        index += 1
-      clear.click(lambda: None, None, chatbot, queue=False)
-    with gr.Tab("AI Code Documentation"):
-      # First parse through the folder structure and store that as a list of clickable buttons
-      gr.Markdown("""
-        ## AI Generated Code Documentation
-        Code documentation comes in 3 flavors - internal engineering, external API documentation and product documentation.  Each offers different layers of abstraction over the code base.
-      """)
-      #   docs = generateDocumentationPerFolder("overview", repo_name)
-      #   markdown = gr.Markdown(value=docs)
-      def button_click_callback(label):
-        docs = generateDocumentationPerFolder(label, repo_name[:-4])
-        markdown.update(docs)
-      # Generate the left column buttons and their names and wrap each one in a function
-      with gr.Row():
-        with gr.Column(scale=.5, min_width=300):
-            dirNames = generateFolderNamesForRepo(repo_name[:-4])
-            print(dirNames)
-            buttons = [gr.Button(folder_name, onclick=button_click_callback) for folder_name in dirNames]
-        # Generate the overall documentation for the main bubble at the same time
-        with gr.Column(scale=2, min_width=300):
-            docs = generateDocumentationPerFolder("overview", repo_name[:-4])
-            markdown = gr.Markdown(value=docs)
-            # markdown.update(docs)
-      # For each folder, generate a diagram and 2-3 prompts that dive deeper into explaining content
-      # Render all the content in the UI
-      #
-    with gr.Tab("Custom Model Finetuning"):
-      # First provide a summary of offering
-      gr.Markdown("""
-        ## Enterprise Custom Model Finetuning
-        Finetuning code generation models directly on your enterprise code base has shown up to 10% increase in model suggestion acceptance rate.
-        """)
-      # Choose base model - radio with model size
-      gr.Radio(choices=["Santacoder (1.1B parameter model)", "Incoder (6B parameter model)", "Codegen (16B parameter model)", "Starcoder (15.5B parameter model)"] , value="Starcoder (15.5B parameter model)")
-      # Choose existing code base or input a new code base for finetuning -
-      with gr.Row():
-        gr.Markdown("""
-            If you'd like to use the current code base, click this toggle otherwise input the entire code base below.
-        """)
-        existing_repo = gr.Checkbox(value=True, label="Use existing repository")
-      gr.Textbox(label="Input repository", visible=False)
-      # Allow option to remove generated files etc
-      gr.Markdown("""
-        Finetuned model performance is highly dependent on training data quality. We have currently found that excluding the following file types improves performance. If you'd like to include them, please toggle them.
-      """)
-      file_types = gr.CheckboxGroup(choices=['.bin', '.gen', '.git', '.gz','.jpg', '.lz', '.midi', '.mpq','.png', '.tz'], label="Removed file types")
-      # Based on data above, we should show a field for estimated fine tuning cost
-      # Then we should show the chart for loss
-      def wandb_report(url):
-        iframe = f'<iframe src={url} style="border:none;height:1024px;width:100%">'
-        return gr.HTML(iframe)
-      submit_btn = gr.Button("Start Training")
-      with gr.Column(visible=False) as start_training:
-        # Include the epoch loss table
-        epoch_loss = gr.Dataframe(
-            headers=["Step", "Training Loss", "Validation Loss"],
-            datatype=["number", "number", "number"],
-            row_count=5,
-            col_count=(3, "fixed"),
-            value=[[500, 1.868200, 1.548535], [1000, 1.450100, 1.518277], [1500, 1.659000, 1.486497],
-            [2000, 1.364900, 1.452842], [2500, 1.406300, 1.405151], [3000, 1.276000, 1.346159]]
-        )
-        # After you start training you should see the Wandb report
-        report_url = 'https://wandb.ai/aiswaryasankar/aiswarya-santacoder-finetuning/reports/Aiswarya-Santacoder-Finetuning--Vmlldzo0ODM3MDA4'
-        report = wandb_report(report_url)
-        # Include a playground to compare different models on given tasks
-        # Link to the generated huggingface spaces model if you opt into it
-        # Toggle to select model for the remaining functionality
-      def startTraining(): # existing_repo, file_types
-        start_training= gr.update(visible=True)
-        # return {
-        #     report: report,
-        #     epoch_loss: epoch_loss,
-        #     start_training: gr.update(visible=True),
-        # }
-      submit_btn.click(
-        startTraining,
-        # inputs=[existing_repo, file_types],
-        # outputs=[start_training], # report, epoch_loss,
-      )
-demo.launch(debug=True)

 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.vectorstores import DeepLake
 import random
+import time
+import together
+os.environ['OPENAI_API_KEY']='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'
 os.environ['ACTIVELOOP_TOKEN']='eyJhbGciOiJIUzUxMiIsImlhdCI6MTY4MTU5NTgyOCwiZXhwIjoxNzEzMjE4MTU5fQ.eyJpZCI6ImFpc3dhcnlhcyJ9.eoiMFZsS20zzMXXupFbowUlLdgIgf_MA1ck_DByzREeoQvNm8GPhKEfqea2y1Qak-ud2jo9dhSTBTfRe1ztezw'
 from langchain.text_splitter import CharacterTextSplitter
 import subprocess
+# repo_name = "https://github.com/aiswaryasankar/memeAI.git"
 from langchain.callbacks.base import BaseCallbackHandler
 from langchain.schema import LLMResult
 from typing import Any, Union
+job_done = object()
 class StreamingGradioCallbackHandler(BaseCallbackHandler):
     def __init__(self, q: SimpleQueue):
     repo: str
+# global repoName
+global ticket_titles
+global tickets
+global ticket_choices
+tickets = []
+repoName = "https://github.com/aiswaryasankar/memeAI.git"
+embeddings = OpenAIEmbeddings(disallowed_special=())
 def git_clone(repo_url):
     subprocess.run(["git", "clone", repo_url])
     return dirpath
+def index_repo(textbox: str, dropdown: str) -> Response:
+    mapping = {
+        "Langchain" : "https://github.com/langchain-ai/langchain.git",
+        "Weaviate": "https://github.com/weaviate/weaviate.git",
+        "Llama2": "https://github.com/facebookresearch/llama.git",
+        "OpenAssistant": "https://github.com/LAION-AI/Open-Assistant.git",
+        "MemeAI": "https://github.com/aiswaryasankar/memeAI.git",
+        "GenerativeAgents": "https://github.com/joonspk-research/generative_agents.git"
+    }
+    # print(textbox)
+    # print(dropdown[0])
+    if textbox != "":
+        repo = textbox
+    else:
+        repo = mapping[dropdown[0]]
+    # repoName = gr.State(repo)
+    print("Repo name after setting the value: " + str(repoName))
     pathName = git_clone(repo)
     root_dir = './' + pathName
+    print("Repo name after setting the value: " + str(repoName))
     activeloop_username = "aiswaryas"
     dataset_path = f"hub://{activeloop_username}/" + pathName
     try:
         db = DeepLake(dataset_path=dataset_path,
+                embedding_function=embeddings,
+                token=os.environ['ACTIVELOOP_TOKEN'],
+                read_only=True,
+                num_workers=10)
         # NOTE: read_only=False because we want to ingest documents
         # NOTE: This will raise a `deeplake.util.exceptions.LockedException` if dataset is already locked
         # NOTE: change it to read_only=True when querying the dataset
+        # If it is empty, then hydrate otherwise leave it alone
+        print(db)
+        if db is None:
+            print("Dataset doesn't exist, fetching data")
+            try:
+                docs = []
+                for dirpath, dirnames, filenames in os.walk(root_dir):
+                    for file in filenames:
+                        try:
+                            loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')
+                            docs.extend(loader.load_and_split())
+                        except Exception as e:
+                            print("Exception: " + str(e) + "| File: " + os.path.join(dirpath, file))
+                            pass
+                activeloop_username = "aiswaryas"
+                dataset_path = f"hub://{activeloop_username}/" + pathName
+                text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+                texts = text_splitter.split_documents(docs)
+                db = DeepLake(dataset_path=dataset_path,
+                        embedding_function=embeddings,
+                        token=os.environ['ACTIVELOOP_TOKEN'],
+                        read_only=False)
+                # Do this in chunks to avoid hitting the ratelimit immediately
+                for i in range(0, len(texts), 500):
+                    db.add_documents(texts[i:i+500])
+                    time.sleep(.1)
+            except Exception as e:
+                return Response(
+                    result= "Failed to index github repo",
+                    repo="",
+                    error=str(e),
+                    stdout="",
+                )
+            # print("Dataset not empty. Deleting existing dataset...")
+            # db.ds.delete()
+            # print("Done.")
+            # # Reinitialize
+            # db = DeepLake(dataset_path=dataset_path,
+            #                 embedding_function=embeddings,
+            #                 token=os.environ['ACTIVELOOP_TOKEN'], read_only=False)
+        else:
+            print("Dataset already exists")
     except Exception as e:
         return Response(
             stdout="",
         )
+    global ticket_choices, ticket_titles, tickets
+    print("REPO name in bug triage: " + str(repoName))
+    repo = "/".join(repoName[:-4].split("/")[-2:])
+    tickets = fetchGithubIssues(repo, 10)
+    print("tickets: " + str(tickets))
+    # Create the dropdown
+    ticket_choices = {ticket["title"]: ticket for ticket in tickets}
+    ticket_titles = [ticket["title"] for ticket in tickets]
+    print("Repo name before return: " + str(repoName))
+    return {
+        success_response: "SUCCESS",
+        # repoName: repoName,
+        # repoTextBox: repoName,
+        # ingestedRepos: ingestedRepos,
+        launch_product: gr.update(visible=True)
+    }
 def answer_questions(question: str, github: str, **kwargs) -> Response:
+    global repoName
+    print("Repo name")
+    github = repoName[:-4]
+    print(github)
     try:
+        embeddings = OpenAIEmbeddings(openai_api_key="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")
         pathName = github.split('/')[-1]
         dataset_path = "hub://aiswaryas/" + pathName
             callback_manager=CallbackManager(
                 [StreamingGradioCallbackHandler(q)]
             ),
+            openai_api_key="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",
         )
         qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
         chat_history = []
         stdout="",
     )
 def fetchGithubIssues(repo: str, num_issues:int, **kwargs) -> Response:
     """
         This endpoint should get a list of all the github issues that are open for this repository
         batch.extend(issues.json())
         for issue in issues.json():
+            issues_data.append({
+                "issue_url": issue["url"],
+                "title": issue["title"],
+                "body": issue["body"],
+                "comments_url": issue["comments_url"],
+            })
     print(issues_data)
     return  issues_data
     return dirs[0]
 def generateDocumentationPerFolder(dir, github):
+    if dir == "overview":
+        prompt= """
+            Summarize the structure of the {} repository.  Make a list of all endpoints and their behavior.  Explain
+            how this module is used in the scope of the larger project.  Format the response as code documentation with an
+            Overview, Architecture and Implementation Details.  Within implementation details, list out each function and provide
+            an overview of that function.
+        """.format(github)
+    else:
+        prompt= """
+            Summarize how {} is implemented in the {} repository.  Make a list of all functions and their behavior.  Explain
+            how this module is used in the scope of the larger project.  Format the response as code documentation with an
+            Overview, Architecture and Implementation Details.  Within implementation details, list out each function and provide
+            an overview of that function.
+        """.format(dir, github)
+    print(prompt)
+    try:
+        embeddings = OpenAIEmbeddings(openai_api_key="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")
+        pathName = github.split('/')[-1]
+        print("PATH NAME: " + str(pathName))
+        dataset_path = "hub://aiswaryas/" + pathName
+        db = DeepLake(dataset_path=dataset_path, read_only=True, embedding_function=embeddings)
+        # print("finished indexing repo")
+        retriever = db.as_retriever()
+        retriever.search_kwargs['distance_metric'] = 'cos'
+        retriever.search_kwargs['fetch_k'] = 100
+        retriever.search_kwargs['maximal_marginal_relevance'] = True
+        retriever.search_kwargs['k'] = 20
+        # streaming_handler = kwargs.get('streaming_handler')
+        model = ChatOpenAI(
+            model_name='gpt-4',
+            temperature=0.0,
+            verbose=True,
+            streaming=True,  # Pass `streaming=True` to make sure the client receives the data.
+            openai_api_key="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",
+        )
+        qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
+        chat_history = []
+        return qa({"question": prompt, "chat_history": chat_history})["answer"]
+    except Exception as e:
+        print (str(e))
+        return "Failed to generate documentation"
+    #   history[-1][1] = ""
+    #   for char in qa({"question": prompt, "chat_history": chat_history}):
+    #     history[-1][1] += char
+    #     time.sleep(0.01)
+    #     yield history
 def solveGithubIssue(ticket, history) -> Response:
     """
         This endpoint takes in a github issue and then queries the db for the question against the codebase.
     """
+    global repoName
     print(history)
+    global ticket_choices
+    github = repoName[:-4]
     repoFolder = github.split("/")[-1]
     body = ticket_choices[ticket]["body"]
     title = ticket_choices[ticket]["title"]
     question = """
+        Given the code in the {} repo, propose a solution for this ticket {} that includes a
+        high level implementation, narrowing down the root cause of the issue and psuedocode if
+        applicable on how to resolve the issue. If multiple changes are required to address the
+        problem, list out each of the steps and a brief explanation for each one.
+        """.format(repoFolder, body)
     q_display = """
+            How would I approach solving this ticket: {}.  Here is a summary of the issue: {}
+        """.format(title, body)
     print(question)
     try:
+        embeddings = OpenAIEmbeddings(openai_api_key="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")
         pathName = github.split('/')[-1]
         dataset_path = "hub://aiswaryas/" + pathName
+        db = DeepLake(dataset_path=dataset_path, read_only=True, embedding=embeddings)
         # print("finished indexing repo")
         retriever = db.as_retriever()
             callback_manager=CallbackManager(
                 [StreamingGradioCallbackHandler(q)]
             ),
+            openai_api_key="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",
         )
+        qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever,max_tokens_limit=8000)
     except Exception as e:
         return [[str(e), None]]
     history = [[q_display, ""]]
     history[-1][1] = ""
+    chat_history = []
+    for char in qa({"question": question, "chat_history": chat_history})["answer"]:
         history[-1][1] += char
         time.sleep(0.01)
         yield history
 def user(message, history):
     return "", history + [[message, None]]
 def bot(history, **kwargs):
     user_message = history[-1][0]
+    global repoName
+    print("Repo name in the bot: " + str(repoName))
+    github = repoName[:-4]
     try:
+        embeddings = OpenAIEmbeddings(openai_api_key="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")
         pathName = github.split('/')[-1]
         dataset_path = "hub://aiswaryas/" + pathName
             callback_manager=CallbackManager(
                 [StreamingGradioCallbackHandler(q)]
             ),
+            openai_api_key="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",
         )
         qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)
         chat_history = []
     history[-1][1] = ""
     for char in qa({"question": user_message, "chat_history": chat_history})["answer"]:
+        history[-1][1] += char
+        yield history
 with gr.Blocks() as demo:
+    # repoName = gr.State(value="https://github.com/sourcegraph/cody.git")
+    gr.Markdown("""
+        <h1 align="center"> Entelligence AI </h1>
+        <p style="text-align: center; font-size:36">Enabling your product team to ship product 10x faster.</p>
+        """)
     repoTextBox = gr.Textbox(label="Github Repository")
+    gr.Markdown("""Choose from any of the following repositories""")
+    ingestedRepos = gr.CheckboxGroup(choices=['Langchain', 'Weaviate', 'OpenAssistant', 'GenerativeAgents','Llama2', "MemeAI"], label="Github Repository", value="MemeAI")
     success_response = gr.Textbox(label="")
     ingest_btn = gr.Button("Index repo")
+    with gr.Column(visible=False) as launch_product:
+        # Toggle visibility of the chat, bugs, docs, model windows
+        with gr.Tab("Code Chat"):
+            chatbot = gr.Chatbot()
+            msg = gr.Textbox()
+            clear = gr.Button("Clear")
+            msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+                bot, chatbot, chatbot
+            )
+            clear.click(lambda: None, None, chatbot, queue=False)
+        index = 0
+        with gr.Tab("Bug Triage"):
+            # Display the titles in the dropdown
+            def create_ticket_dropdown(tickets):
+                return gr.Dropdown.update(
+                    choices=ticket_titles, value=ticket_titles[0]
+                ), gr.update(visible=True)
+            # global ticket_choices, ticket_titles, tickets
+            print("REPO name in bug triage: " + str(repoName))
+            repo = "/".join(repoName[:-4].split("/")[-2:])
+            tickets = fetchGithubIssues(repo, 10)
+            print("tickets: " + str(tickets))
+            # Create the dropdown
+            ticket_choices = {ticket["title"]: ticket for ticket in tickets}
+            ticket_titles = [ticket["title"] for ticket in tickets]
+            # Here you want to first call the getGithubIssues function
+            # repo = gr.Interface.get_session_state("repo")
+            #   print("REPO name in bug triage: " + str(repoName))
+            #   repo = "/".join(repoName[:-4].split("/")[-2:])
+            #   tickets = fetchGithubIssues(repo, 10)
+            #   print("tickets: " + str(tickets))
+            #   # Create the dropdown
+            #   global ticket_choices
+            print("tickets in bug triage: " + str(tickets))
+            ticket_choices = {ticket["title"]: ticket for ticket in tickets}
+            ticket_titles = [ticket["title"] for ticket in tickets]
+            ticketDropdown = gr.Dropdown(choices=ticket_titles, title="Github Issues")
+            # Extract the ticket title, body for the selected ticket
+            chatbot = gr.Chatbot()
+            msg = gr.Textbox()
+            clear = gr.Button("Clear")
+            if index == 0:
+                msg.submit(solveGithubIssue, [ticketDropdown, chatbot], [msg, chatbot], queue=False).then(
+                    bot, chatbot, chatbot
+                )
+                ticketDropdown.change(solveGithubIssue, inputs=[ticketDropdown, chatbot], outputs=[chatbot])
+            else:
+                msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+                    bot, chatbot, chatbot
+                )
+            clear.click(lambda: None, None, chatbot, queue=False)
+        with gr.Tab("AI Code Documentation"):
+            #   global repoName
+            # First parse through the folder structure and store that as a list of clickable buttons
+            gr.Markdown("""
+                ## AI Generated Code Documentation
+                Code documentation comes in 3 flavors - internal engineering, external API documentation and product documentation.  Each offers different layers of abstraction over the code base.
+            """)
+            #   docs = generateDocumentationPerFolder("overview", repo_name)
+            markdown = gr.Markdown()
+            def button_click_callback(markdown):
+                print("IN BUTTON CLICK CALLBACK")
+                docs = generateDocumentationPerFolder("overview", repoName[:-4])
+                markdown.update(docs)
+            # Generate the left column buttons and their names and wrap each one in a function
+            with gr.Row():
+                with gr.Column(scale=.5, min_width=300):
+                    dirNames = generateFolderNamesForRepo(repoName[:-4])
+                    print(dirNames)
+                    buttons = [gr.Button(folder_name, onclick=button_click_callback) for folder_name in dirNames]
+                    for btn, folder_name in zip(buttons, dirNames):
+                        btn.click(button_click_callback, [markdown], [markdown] )
+                # Generate the overall documentation for the main bubble at the same time
+                print("REPO NAME IN DOCS: " + str(repoName[:-4]))
+                with gr.Column(scale=2, min_width=300):
+                    docs = generateDocumentationPerFolder("overview", repoName[:-4])
+                    markdown.update(docs)
+            # For each folder, generate a diagram and 2-3 prompts that dive deeper into explaining content
+            # Render all the content in the UI
+            #
+        with gr.Tab("Custom Model Finetuning"):
+            # First provide a summary of offering
+            gr.Markdown("""
+                # Enterprise Custom Model Finetuning
+                Finetuning code generation models directly on your enterprise code base has shown up to 10% increase in model suggestion acceptance rate.
+                """)
+            # Choose base model - radio with model size
+            gr.Radio(choices=["Santacoder (1.1B parameter model)", "Incoder (6B parameter model)", "Codegen (16B parameter model)", "Starcoder (15.5B parameter model)"] , value="Starcoder (15.5B parameter model)")
+            # Choose existing code base or input a new code base for finetuning -
+            with gr.Row():
+                gr.Markdown("""
+                    If you'd like to use the current code base, click this toggle otherwise input the entire code base below.
+                """)
+                existing_repo = gr.Checkbox(value=True, label="Use existing repository")
+            gr.Textbox(label="Input repository", visible=False)
+            # Allow option to remove generated files etc etc
+            gr.Markdown("""
+                Finetuned model performance is highly dependent on training data quality. We have currently found that excluding the following file types improves performance. If you'd like to include them, please toggle them.
+            """)
+            file_types = gr.CheckboxGroup(choices=['.bin', '.gen', '.git', '.gz','.jpg', '.lz', '.midi', '.mpq','.png', '.tz'], label="Removed file types")
+            # Based on data above, we should show a field for estimated fine tuning cost
+            # Then we should show the chart for loss
+            def wandb_report(url):
+                iframe = f'<iframe src={url} style="border:none;height:1024px;width:100%">'
+                return gr.HTML(iframe)
+            submit_btn = gr.Button("Start Training")
+            with gr.Column(visible=False) as start_training:
+                # Include the epoch loss table
+                epoch_loss = gr.Dataframe(
+                    headers=["Step", "Training Loss", "Validation Loss"],
+                    datatype=["number", "number", "number"],
+                    row_count=5,
+                    col_count=(3, "fixed"),
+                    value=[[500, 1.868200, 1.548535], [1000, 1.450100, 1.518277], [1500, 1.659000, 1.486497],
+                    [2000, 1.364900, 1.452842], [2500, 1.406300, 1.405151], [3000, 1.276000, 1.346159]]
+                )
+                # After you start training you should see the Wandb report
+                report_url = 'https://wandb.ai/aiswaryasankar/aiswarya-santacoder-finetuning/reports/Aiswarya-Santacoder-Finetuning--Vmlldzo0ODM3MDA4'
+                report = wandb_report(report_url)
+                # Include a playground to compare different models on given tasks
+                # Link to the generated huggingface spaces model if you opt into it
+                # Toggle to select model for the remaining functionality
+            def startTraining(existing_repo, file_types):
+                return {
+                    start_training: gr.update(visible=True),
+                }
+            submit_btn.click(
+                startTraining,
+                inputs=[existing_repo, file_types],
+                outputs=[start_training], # report, epoch_loss,
+            )
+    ingest_btn.click(fn=index_repo, inputs=[repoTextBox, ingestedRepos], outputs=[success_response, launch_product], api_name="index_repo")
+demo.queue()
+demo.launch(debug=True, share=True)