Spaces:

S0ham075
/

gitbot

Sleeping

App Files Files Community

S0ham075 commited on Feb 2, 2024

Commit

b7064d3

1 Parent(s): 9e30ce6

first

Browse files

Files changed (9) hide show

.gitignore +1 -0
Dockerfile +11 -0
LICENSE +21 -0
agent.py +127 -0
app.py +58 -0
compose.yml +29 -0
main.py +110 -0
query.py +65 -0
requirements.txt +11 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .env

Dockerfile ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM python:3.8
+WORKDIR /app
+RUN pip install --upgrade pip
+COPY requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+CMD ["streamlit","run","app.py" ]
+# CMD [ "python","agent.py" ]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 s0ham075
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

agent.py ADDED Viewed

	@@ -0,0 +1,127 @@

+from langchain.chains.conversation.memory import ConversationBufferWindowMemory
+from langchain_together import Together
+from main import get_repo_name
+import os
+import requests
+import json
+llm = Together(
+    model="mistralai/Mixtral-8x7B-Instruct-v0.1",
+    temperature=0,
+    max_tokens=1024,
+    together_api_key="d8ec7106bd0c268bf4672dba83272b86054fbe849eba82f3f75ceb17e6d57eb0"
+)
+#tools
+from langchain.agents import Tool
+from langchain.tools import BaseTool
+from langchain_community.utilities import SerpAPIWrapper
+search = SerpAPIWrapper()
+search_tool = Tool(
+    name = "search",
+    func=search.run,
+    description="useful for when you need to answer questions about current events. You should ask targeted questions"
+)
+from langchain_community.utilities import StackExchangeAPIWrapper
+stackexchange = StackExchangeAPIWrapper()
+stackexchange_tool = Tool(
+    name="error-search",
+    func=stackexchange.run,
+    description="useful for when you need information regarding a programming error. You should pass the error directly"
+)
+from langchain_community.vectorstores import Qdrant
+import qdrant_client
+from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
+embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5")
+client = qdrant_client.QdrantClient(
+    os.getenv("QDRANT_HOST"),
+    api_key=os.getenv("QDRANT_API_KEY")
+)
+# Import things that are needed generically
+from langchain.pydantic_v1 import BaseModel, Field
+from langchain.tools import  tool
+from query import answer_query
+@tool
+def get_repo_issues(repo_url : str)->str:
+    """ Use this tool to get the issues about the repo , it is very important that you pass the repo url directly in the input and nothing else"""
+    # Extract owner and repo names from the URL
+    parts = repo_url.rstrip('.git').split('/')
+    owner, repo = parts[-2], parts[-1]
+    # GitHub API endpoint for issues
+    api_url = f'https://api.github.com/repos/{owner}/{repo}/issues'
+    try:
+        # Make GET request to GitHub API
+        response = requests.get(api_url)
+        # Check if the request was successful (status code 200)
+        if response.status_code == 200:
+            # Parse JSON response and return issues
+            result =  response.json()
+            return json.dumps(result)
+        else:
+            return f"Error: Unable to fetch issues. Status code: {response.status_code}"
+    except Exception as e:
+        return f"Error: {str(e)}"
+@tool
+def retrieve_repo(question: str)->str:
+    """" use this to get code from the repository or the project.You should look for file or Folder name or code snippets regarding the query.The input you give to this tool should be detailed
+    if the question is a general question regarding the project for ex - "what is the repo about " then try to find the readme file"""
+    result = answer_query(question,os.getenv("collection_name"))
+    # docs = vectorstore.similarity_search(question)
+    # for doc in docs:
+    #     result += " " + doc.page_content
+    return result
+from langchain import hub
+from langchain.agents import initialize_agent,create_react_agent,AgentExecutor
+tools = [retrieve_repo,stackexchange_tool,search_tool]
+# Get the prompt to use - you can modify this!
+prompt = hub.pull("hwchase17/react")
+prompt.template = """You are  Coding assistant , who answers questions based on the github repo or project,
+Answer the following questions as best you can , However dont make up anything on your own, always try to look for relevant documents in the repo
+. You have access to the following tools:
+  {tools}
+Use the following format:
+Question: the input question you must answer
+Thought: you should always think about what to do
+Action: the action to take, should be one of [{tool_names}]( if you are going to use retrieve repo tool -
+IT IS VERY IMPORTANT TO ASK A DETAILED AND LENGTHY QUESTION TO GET QUALITY RESPONSE)
+Action Input: the input to the action
+Observation:  the result of the action,(here it is mandatory to check wether the observation is related to the question or not, if not repeat the process untill you are satisfied)
+... (this Thought/Action/Action Input/Observation can repeat N times)
+Thought: I now know the final answer,
+Final Answer:(in case of retrieve repo tool , the observation from the tool should be your final answer directly) the final answer to the original input question,You should first explain the concept in a clear and concise manner and  You should try to  provide code snippets for better understanding
+Begin!
+Question: {input}
+Thought:{agent_scratchpad}"""
+memory = ConversationBufferWindowMemory(
+    memory_key='chat_history',
+    k=3,
+    return_messages=True
+)
+#agent = create_react_agent(llm, tools, prompt)
+# Create an agent executor by passing in the agent and tools
+conversational_agent = create_react_agent(
+    tools=tools,
+    llm=llm,
+    prompt=prompt
+)
+agent_executor = AgentExecutor(agent=conversational_agent, tools=tools,handle_parsing_errors=True,verbose=True)
+def agent_query(query):
+    result = agent_executor.invoke({"input":query});
+    print(result)
+    return result["output"]

app.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import streamlit as st
+import pandas as pd
+import random
+import time
+import os
+from query import answer_query
+from main import repository_loader,get_repo_name
+from agent import agent_query
+st.title("Git Bot - v 0.02")
+if 'flag' not in st.session_state:
+    st.session_state['flag'] = True
+url = st.sidebar.text_input("Github url")
+if url and st.session_state.flag:
+    with st.spinner('Embedding your Repository...'):
+      os.environ["collection_name"] =url
+      repository_loader(url)
+      st.session_state.flag = False
+    st.success('Done!')
+# Initialize chat history
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+# Display chat messages from history on app rerun
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+# React to user input
+if prompt := st.chat_input("What is up?"):
+    # Display user message in chat message container
+    with st.chat_message("user"):
+        st.markdown(prompt)
+    # Add user message to chat history
+    st.session_state.messages.append({"role": "user", "content": prompt})
+    if not url:
+        st.warning('Please enter your Github Link!', icon='⚠')
+    if url and os.getenv("collection_name"):
+        with st.chat_message("assistant"):
+          message_placeholder = st.empty()
+          full_response = ""
+        #   assistant_response =answer_query(prompt,url)
+          assistant_response = agent_query(prompt)
+        # Simulate stream of response with milliseconds delay
+        # for chunk in assistant_response.split():
+        #     full_response += chunk + " "
+        #     time.sleep(0.05)
+        #     # Add a blinking cursor to simulate typing
+        #     message_placeholder.markdown(full_response + "▌")
+        message_placeholder.markdown(assistant_response)
+        # Add assistant response to chat history
+        st.session_state.messages.append({"role": "assistant", "content": assistant_response})

compose.yml ADDED Viewed

	@@ -0,0 +1,29 @@

+version: "3.8"
+services:
+  server:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - 6333:6333
+      - 8501:8501
+    volumes:
+      - .:/app
+    environment:
+      - QDRANT_API_KEY=SstwpxN4A-cH-pwdocTCighLpo4dX0ldLat39yRe48lVn1wppcH8Ig
+      - QDRANT_HOST=https://6d58fa02-778a-48b9-9c2c-c25875284ec6.us-east4-0.gcp.cloud.qdrant.io
+      - TOGETHER_API_KEY=d8ec7106bd0c268bf4672dba83272b86054fbe849eba82f3f75ceb17e6d57eb0
+      - SERPAPI_API_KEY=dfa5f0e6dfffb9e6749ab4c5dd7e3490d922be171b1adebd6c2a493661999269
+    develop:
+      watch:
+        - path: .
+          action: rebuild
+    deploy:
+      resources:
+        limits:
+          memory: 4096M  # Adjust the memory limit as needed

main.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import os
+import shutil
+import git
+from urllib.parse import urlparse
+local_dir = os.getcwd()
+branch = None
+# Function to extract repository name from URL
+def get_repo_name(url):
+    parsed_url = urlparse(url)
+    # Extract the base name from the path (which is usually the repository name)
+    repo_name = os.path.basename(parsed_url.path)
+    # Remove the ".git" extension if it exists
+    repo_name = repo_name[:-4]
+    return repo_name
+# Function to clone a Git repository
+def clone_repo(url):
+   try:
+        path = os.path.join(local_dir,"staging",get_repo_name(url))
+        # Check if the repository already exists in the specified path
+        if os.path.exists(path):
+           print(f"{get_repo_name(url)} already added in db")
+           return False
+        repo = git.Repo.clone_from(url,path)
+        global branch
+        branch = repo.head.reference
+        print(f"{get_repo_name(url)} cloned succesfully")
+        return True
+   except Exception as e :
+       print(f"Error cloning the git repository: {e}")
+       return False
+def delete_cloned_repo(url):
+    local_path = os.path.join(local_dir,"staging",get_repo_name(url))
+    try:
+        # Check if the local path exists
+        if os.path.exists(local_path):
+            # Use shutil.rmtree to remove the entire directory
+            shutil.rmtree(local_path,ignore_errors=True)
+            print(f"Repository at {local_path} successfully deleted.")
+        else:
+            print(f"Repository at {local_path} does not exist.")
+    except Exception as e:
+        print(f"Error deleting repository: {e}")
+from langchain_community.document_loaders import GitLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import Qdrant
+import qdrant_client
+text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size = 1000,
+    chunk_overlap  = 20,
+)
+# from langchain_together.embeddings import TogetherEmbeddings
+# embeddings2 = TogetherEmbeddings(model="togethercomputer/m2-bert-80M-8k-retrieval",together_api_key="d8ec7106bd0c268bf4672dba83272b86054fbe849eba82f3f75ceb17e6d57eb0")
+client = qdrant_client.QdrantClient(
+    os.getenv("QDRANT_HOST"),
+    api_key=os.getenv("QDRANT_API_KEY")
+)
+from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
+embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5")
+vectorstore = None
+def load_repo(url):
+    collection_config = qdrant_client.http.models.VectorParams(
+        size=384, # 768 for instructor-xl, 1536 for OpenAI
+        distance=qdrant_client.http.models.Distance.COSINE
+    )
+    client.recreate_collection(
+    collection_name=get_repo_name(url),
+    vectors_config=collection_config
+    )
+    vectorstore = Qdrant(
+    client=client,
+    collection_name=get_repo_name(url),
+    embeddings=embeddings
+    )
+    print("collection created")
+    try:
+        loader = GitLoader(repo_path=os.path.join(local_dir,"staging",get_repo_name(url)), branch=branch, file_filter=lambda file_path: not file_path.endswith("package-lock.json"),)
+        data = loader.load()
+        chunks = text_splitter.split_documents(data)
+        print("chunks created")
+        vectorstore.add_documents(chunks)
+        return True
+    except Exception as e:
+        print(f"Error loading and indexing repository: {e}")
+        return False
+def repository_loader(url):
+    result = False
+    if(clone_repo(url)):
+        result = load_repo(url)
+    if result :
+        delete_cloned_repo(url)
+print('HELLO FROM CONTAINER')
+#answer_query("How is the routing done in this project and what are the routes used",'https://github.com/s0ham075/Google-Docs-Frontend.git')
+# delete_cloned_repo()

query.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+from langchain_together import Together
+from langchain_community.vectorstores import Qdrant
+from main import get_repo_name
+import qdrant_client
+import os
+from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
+embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5")
+client = qdrant_client.QdrantClient(
+    os.getenv("QDRANT_HOST"),
+    api_key=os.getenv("QDRANT_API_KEY")
+)
+B_INST, E_INST = "[INST]", "[/INST]"
+B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
+def get_prompt(instruction, new_system_prompt ):
+    SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
+    prompt_template =  B_INST + SYSTEM_PROMPT + instruction + E_INST
+    return prompt_template
+sys_prompt = """You are a helpful, smart and intelligent coding assistant. Always answer as helpfully as possible using the context code provided. Your answers should only answer the question once, you can provide code snippets but make sure you explain them thoroughly
+If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. """
+instruction = """CONTEXT CODE:/n/n {context}/n
+Question: {question}"""
+prompt_template = get_prompt(instruction, sys_prompt)
+llama_prompt = PromptTemplate(
+    template=prompt_template, input_variables=["context", "question"]
+)
+llama2_llm = Together(
+    model="togethercomputer/llama-2-70b-chat",
+    temperature=0.7,
+    max_tokens=1024,
+    together_api_key="d8ec7106bd0c268bf4672dba83272b86054fbe849eba82f3f75ceb17e6d57eb0"
+)
+def process_llm_response(llm_response):
+  response = " "
+  response += llm_response['result'] + "\n\nSources\n"
+  for source in llm_response['source_documents']:
+     response +="Source - "+source.metadata['source'] +"\n"
+  return response
+def answer_query(query,url):
+    vectorstore = Qdrant(
+    client=client,
+    collection_name=get_repo_name(url),
+    embeddings=embeddings
+    )
+    qa_chain = RetrievalQA.from_chain_type(llm= llama2_llm, chain_type_kwargs = {"prompt": llama_prompt},chain_type="stuff",retriever= vectorstore.as_retriever(),return_source_documents = True)
+    return process_llm_response(qa_chain(query))

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+langchain
+qdrant_client
+langchain-community
+langchain-together
+langchainhub
+fastembed
+streamlit
+GitPython
+stackapi
+google-search-results
+requests