Spaces:

bupa1018
/

KadiAPY_Coding_Assistant

Sleeping

File size: 5,238 Bytes

6df5c93
 
 
75495ad
6df5c93
 
21b7541
36637af
d092474
330be71
 
fcfb36c
9a079fe
4b7d38f
196cdec
6df5c93
 
 
 
 
 
ae5beeb
fb23588
6df5c93
 
 
 
 
0de2459
 
 
9125ce3
0de2459
 
9125ce3
0de2459
 
9125ce3
0de2459
 
 
9125ce3
0de2459
 
9125ce3
6df5c93
c7fa549
6df5c93
f79e678
0ae54ee
899338b
0ae54ee
95aa8e9
0ae54ee
aba8f79
0ae54ee
 
f71140c
0ae54ee
7dafa4e
 
0ae54ee
 
 
 
 
 
 
 
 
 
aa10033
0ae54ee
95aa8e9
697cb23
 
1b4e14a
dbf0b95
da0c2cc
9b514b9
 
da0c2cc
9b514b9
 
 
e0af1aa
 
 
da0c2cc
 
 
 
9b514b9
0ae54ee
aa10033
899338b
0ae54ee
0c35020
0ae54ee
2c0ea57
93e3091
ebb0364
6df5c93
 
 
 
 
 
 
 
 
 
 
506afb0
 
 
6df5c93
1afdee3
831abbd
 
1afdee3
831abbd
1afdee3
417adb9
40be4b1
1afdee3
 
 
 
 
 
 
 
 
 
 
8fde75c
 
 
1afdee3
 
 
 
 
 
 
 
 
499e447
 
1afdee3
 
 
8c715b2

import os
import json
import gradio as gr

from huggingface_hub import HfApi, login
from dotenv import load_dotenv

from download_repo import download_gitlab_repo_to_hfspace
from process_repo import extract_repo_files
from chunking import chunk_pythoncode_and_add_metadata, chunk_text_and_add_metadata
from vectorstore import setup_vectorstore
from llm import get_groq_llm
from kadi_apy_bot import KadiAPYBot
from repo_versions import store_message_from_json
from test import manage_hf_files

# Load environment variables from .env file
load_dotenv()

# Load configuration from JSON file

with open("config.json", "r") as file:
    config = json.load(file)

GROQ_API_KEY = os.environ["GROQ_API_KEY"]
HF_TOKEN = os.environ["HF_Token"]


VECTORSTORE_DIRECTORY = config["vectorstore_directory"]
CHUNK_SIZE = config["chunking"]["chunk_size"]
CHUNK_OVERLAP = config["chunking"]["chunk_overlap"]

EMBEDDING_MODEL_NAME = config["embedding_model"]["name"]
EMBEDDING_MODEL_VERSION = config["embedding_model"]["version"]

LLM_MODEL_NAME = config["llm_model"]["name"]
LLM_MODEL_TEMPERATURE = config["llm_model"]["temperature"]

GITLAB_API_URL = config["gitlab"]["api_url"]
GITLAB_PROJECT_ID = config["gitlab"]["project id"]
GITLAB_PROJECT_VERSION = config["gitlab"]["project version"]

DATA_DIR = config["data_dir"]
HF_SPACE_NAME = config["hf_space_name"]

login(HF_TOKEN)
hf_api = HfApi()


def initialize():
    global kadiAPY_bot


    
    download_gitlab_repo_to_hfspace(GITLAB_API_URL, GITLAB_PROJECT_ID, GITLAB_PROJECT_VERSION, DATA_DIR, hf_api, HF_SPACE_NAME)
    
    code_texts, code_references = extract_repo_files(DATA_DIR, ['kadi_apy'], [])
    doc_texts, doc_references = extract_repo_files(DATA_DIR, ['docs'], [])
    
    print("Length of code_texts: ", len(code_texts))
    print("Length of doc_files: ", len(doc_texts))
    
    code_chunks = chunk_pythoncode_and_add_metadata(code_texts, code_references)
    doc_chunks = chunk_text_and_add_metadata(doc_texts, doc_references, CHUNK_SIZE, CHUNK_OVERLAP)

    print(f"Total number of code_chunks: {len(code_chunks)}")
    print(f"Total number of doc_chunks: {len(doc_chunks)}")

    vectorstore = setup_vectorstore(doc_chunks + code_chunks, EMBEDDING_MODEL_NAME, VECTORSTORE_DIRECTORY)
    llm = get_groq_llm(LLM_MODEL_NAME, LLM_MODEL_TEMPERATURE, GROQ_API_KEY)

    kadiAPY_bot = KadiAPYBot(llm, vectorstore)


#result = store_message_from_json(HF_TOKEN)
#print(result)
#manage_hf_files(HF_TOKEN)
#initialize()

# Get the current working directory
working_directory = os.getcwd()

# Walk through the working directory and list all files
print("Listing all files starting from the current working directory:")
for root, dirs, files in os.walk(working_directory):
    for file in files:
        file_path = os.path.join(root, file)
        print(file_path)





def bot_kadi(history):
    user_query = history[-1][0]   
    response = kadiAPY_bot.process_query(user_query)
    history[-1] = (user_query, response)

    yield history  



# Gradio utils
def check_input_text(text):
    if not text:
        gr.Warning("Please input a question.")
        raise TypeError
    return True

def add_text(history, text):
    history = history + [(text, None)]
    yield history, ""


import gradio as gr

def main():
    with gr.Blocks() as demo:
        gr.Markdown("## KadiAPY - AI Coding-Assistant")
        gr.Markdown("AI assistant for KadiAPY based on RAG architecture powered by LLM")

        with gr.Tab("KadiAPY - AI Assistant"):
            with gr.Row():
                with gr.Column(scale=10):
                    chatbot = gr.Chatbot([], elem_id="chatbot", label="Kadi Bot", bubble_full_width=False, show_copy_button=True, height=600)
                    user_txt = gr.Textbox(label="Question", placeholder="Type in your question and press Enter or click Submit")

                    with gr.Row():
                        with gr.Column(scale=1):
                            submit_btn = gr.Button("Submit", variant="primary")
                        with gr.Column(scale=1):
                            clear_btn = gr.Button("Clear", variant="stop")

                    gr.Examples(
                        examples=[
                            "Who is working on Kadi4Mat?",
                            "How do i install the Kadi-Apy library?",
                            "How do i install the Kadi-Apy library for development?",
                            "I need a method to upload a file to a record",
                        ],
                        inputs=user_txt,
                        outputs=chatbot,
                        fn=add_text,
                        label="Try asking...",
                        cache_examples=False,
                        examples_per_page=3,
                    )

            user_txt.submit(check_input_text, user_txt, None).success(add_text, [chatbot, user_txt], [chatbot, user_txt]).then(bot_kadi, [chatbot], [chatbot])
            submit_btn.click(check_input_text, user_txt, None).success(add_text, [chatbot, user_txt], [chatbot, user_txt]).then(bot_kadi, [chatbot], [chatbot])
            clear_btn.click(lambda: None, None, chatbot, queue=False)

    demo.launch() 

    
if __name__ == "__main__":
    main()