File size: 4,722 Bytes
6df5c93
 
 
75495ad
6df5c93
 
21b7541
36637af
d092474
330be71
 
fcfb36c
9a079fe
6df5c93
 
 
 
 
 
ae5beeb
fb23588
6df5c93
 
 
 
 
0de2459
 
 
9125ce3
0de2459
 
9125ce3
0de2459
 
9125ce3
0de2459
 
 
9125ce3
0de2459
 
9125ce3
6df5c93
c7fa549
6df5c93
f79e678
0ae54ee
899338b
0ae54ee
 
aba8f79
0ae54ee
 
f71140c
0ae54ee
7dafa4e
 
0ae54ee
 
 
 
 
 
 
 
 
 
aa10033
0ae54ee
 
2172305
0ae54ee
aa10033
899338b
0ae54ee
0c35020
0ae54ee
2c0ea57
93e3091
ebb0364
6df5c93
 
 
 
 
 
 
 
 
 
 
506afb0
 
 
6df5c93
1afdee3
831abbd
 
1afdee3
831abbd
1afdee3
417adb9
40be4b1
1afdee3
 
 
 
 
 
 
 
 
 
 
8fde75c
 
 
1afdee3
 
 
 
 
 
 
 
 
499e447
 
1afdee3
 
 
8c715b2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import os
import json
import gradio as gr

from huggingface_hub import HfApi, login
from dotenv import load_dotenv

from download_repo import download_gitlab_repo_to_hfspace
from process_repo import extract_repo_files
from chunking import chunk_pythoncode_and_add_metadata, chunk_text_and_add_metadata
from vectorstore import setup_vectorstore
from llm import get_groq_llm
from kadi_apy_bot import KadiAPYBot

# Load environment variables from .env file
load_dotenv()

# Load configuration from JSON file

with open("config.json", "r") as file:
    config = json.load(file)

GROQ_API_KEY = os.environ["GROQ_API_KEY"]
HF_TOKEN = os.environ["HF_Token"]


VECTORSTORE_DIRECTORY = config["vectorstore_directory"]
CHUNK_SIZE = config["chunking"]["chunk_size"]
CHUNK_OVERLAP = config["chunking"]["chunk_overlap"]

EMBEDDING_MODEL_NAME = config["embedding_model"]["name"]
EMBEDDING_MODEL_VERSION = config["embedding_model"]["version"]

LLM_MODEL_NAME = config["llm_model"]["name"]
LLM_MODEL_TEMPERATURE = config["llm_model"]["temperature"]

GITLAB_API_URL = config["gitlab"]["api_url"]
GITLAB_PROJECT_ID = config["gitlab"]["project id"]
GITLAB_PROJECT_VERSION = config["gitlab"]["project version"]

DATA_DIR = config["data_dir"]
HF_SPACE_NAME = config["hf_space_name"]

login(HF_TOKEN)
hf_api = HfApi()


def initialize():
    global kadiAPY_bot

    
    download_gitlab_repo_to_hfspace(GITLAB_API_URL, GITLAB_PROJECT_ID, GITLAB_PROJECT_VERSION, DATA_DIR, hf_api, HF_SPACE_NAME)
    
    code_texts, code_references = extract_repo_files(DATA_DIR, ['kadi_apy'], [])
    doc_texts, doc_references = extract_repo_files(DATA_DIR, ['docs'], [])
    
    print("Length of code_texts: ", len(code_texts))
    print("Length of doc_files: ", len(doc_texts))
    
    code_chunks = chunk_pythoncode_and_add_metadata(code_texts, code_references)
    doc_chunks = chunk_text_and_add_metadata(doc_texts, doc_references, CHUNK_SIZE, CHUNK_OVERLAP)

    print(f"Total number of code_chunks: {len(code_chunks)}")
    print(f"Total number of doc_chunks: {len(doc_chunks)}")

    vectorstore = setup_vectorstore(doc_chunks + code_chunks, EMBEDDING_MODEL_NAME, VECTORSTORE_DIRECTORY)
    llm = get_groq_llm(LLM_MODEL_NAME, LLM_MODEL_TEMPERATURE, GROQ_API_KEY)

    kadiAPY_bot = KadiAPYBot(llm, vectorstore)

initialize()
    
def bot_kadi(history):
    user_query = history[-1][0]   
    response = kadiAPY_bot.process_query(user_query)
    history[-1] = (user_query, response)

    yield history  



# Gradio utils
def check_input_text(text):
    if not text:
        gr.Warning("Please input a question.")
        raise TypeError
    return True

def add_text(history, text):
    history = history + [(text, None)]
    yield history, ""


import gradio as gr

def main():
    with gr.Blocks() as demo:
        gr.Markdown("## KadiAPY - AI Coding-Assistant")
        gr.Markdown("AI assistant for KadiAPY based on RAG architecture powered by LLM")

        with gr.Tab("KadiAPY - AI Assistant"):
            with gr.Row():
                with gr.Column(scale=10):
                    chatbot = gr.Chatbot([], elem_id="chatbot", label="Kadi Bot", bubble_full_width=False, show_copy_button=True, height=600)
                    user_txt = gr.Textbox(label="Question", placeholder="Type in your question and press Enter or click Submit")

                    with gr.Row():
                        with gr.Column(scale=1):
                            submit_btn = gr.Button("Submit", variant="primary")
                        with gr.Column(scale=1):
                            clear_btn = gr.Button("Clear", variant="stop")

                    gr.Examples(
                        examples=[
                            "Who is working on Kadi4Mat?",
                            "How do i install the Kadi-Apy library?",
                            "How do i install the Kadi-Apy library for development?",
                            "I need a method to upload a file to a record",
                        ],
                        inputs=user_txt,
                        outputs=chatbot,
                        fn=add_text,
                        label="Try asking...",
                        cache_examples=False,
                        examples_per_page=3,
                    )

            user_txt.submit(check_input_text, user_txt, None).success(add_text, [chatbot, user_txt], [chatbot, user_txt]).then(bot_kadi, [chatbot], [chatbot])
            submit_btn.click(check_input_text, user_txt, None).success(add_text, [chatbot, user_txt], [chatbot, user_txt]).then(bot_kadi, [chatbot], [chatbot])
            clear_btn.click(lambda: None, None, chatbot, queue=False)

    demo.launch() 

    
if __name__ == "__main__":
    main()