File size: 4,745 Bytes
6df5c93
 
 
df2b26b
6df5c93
 
21b7541
36637af
d092474
330be71
 
fcfb36c
9a079fe
4b7d38f
6df5c93
 
 
 
 
 
ae5beeb
fb23588
6df5c93
 
 
 
 
0de2459
 
 
9125ce3
0de2459
 
9125ce3
0de2459
 
9125ce3
0de2459
 
 
9125ce3
0de2459
 
9125ce3
6df5c93
c7fa549
6df5c93
f79e678
0ae54ee
899338b
0ae54ee
95aa8e9
0ae54ee
aba8f79
0ae54ee
 
f71140c
0ae54ee
7dafa4e
 
0ae54ee
 
 
 
 
 
 
 
 
 
aa10033
0ae54ee
a74f77b
da0c2cc
 
 
31d2d4e
 
 
 
9b514b9
31d2d4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
506afb0
6df5c93
31d2d4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8c715b2
31d2d4e
8c715b2
31d2d4e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import os
import json
import gradio as gr
import streamlit as st
from huggingface_hub import HfApi, login
from dotenv import load_dotenv

from download_repo import download_gitlab_repo_to_hfspace
from process_repo import extract_repo_files
from chunking import chunk_pythoncode_and_add_metadata, chunk_text_and_add_metadata
from vectorstore import setup_vectorstore
from llm import get_groq_llm
from kadi_apy_bot import KadiAPYBot
from repo_versions import store_message_from_json

# Load environment variables from .env file
load_dotenv()

# Load configuration from JSON file

with open("config.json", "r") as file:
    config = json.load(file)

GROQ_API_KEY = os.environ["GROQ_API_KEY"]
HF_TOKEN = os.environ["HF_Token"]


VECTORSTORE_DIRECTORY = config["vectorstore_directory"]
CHUNK_SIZE = config["chunking"]["chunk_size"]
CHUNK_OVERLAP = config["chunking"]["chunk_overlap"]

EMBEDDING_MODEL_NAME = config["embedding_model"]["name"]
EMBEDDING_MODEL_VERSION = config["embedding_model"]["version"]

LLM_MODEL_NAME = config["llm_model"]["name"]
LLM_MODEL_TEMPERATURE = config["llm_model"]["temperature"]

GITLAB_API_URL = config["gitlab"]["api_url"]
GITLAB_PROJECT_ID = config["gitlab"]["project id"]
GITLAB_PROJECT_VERSION = config["gitlab"]["project version"]

DATA_DIR = config["data_dir"]
HF_SPACE_NAME = config["hf_space_name"]

login(HF_TOKEN)
hf_api = HfApi()


def initialize():
    global kadiAPY_bot


    
    download_gitlab_repo_to_hfspace(GITLAB_API_URL, GITLAB_PROJECT_ID, GITLAB_PROJECT_VERSION, DATA_DIR, hf_api, HF_SPACE_NAME)
    
    code_texts, code_references = extract_repo_files(DATA_DIR, ['kadi_apy'], [])
    doc_texts, doc_references = extract_repo_files(DATA_DIR, ['docs'], [])
    
    print("Length of code_texts: ", len(code_texts))
    print("Length of doc_files: ", len(doc_texts))
    
    code_chunks = chunk_pythoncode_and_add_metadata(code_texts, code_references)
    doc_chunks = chunk_text_and_add_metadata(doc_texts, doc_references, CHUNK_SIZE, CHUNK_OVERLAP)

    print(f"Total number of code_chunks: {len(code_chunks)}")
    print(f"Total number of doc_chunks: {len(doc_chunks)}")

    vectorstore = setup_vectorstore(doc_chunks + code_chunks, EMBEDDING_MODEL_NAME, VECTORSTORE_DIRECTORY)
    llm = get_groq_llm(LLM_MODEL_NAME, LLM_MODEL_TEMPERATURE, GROQ_API_KEY)

    kadiAPY_bot = KadiAPYBot(llm, vectorstore)

initialize()



def bot_kadi(history):
    user_query = history[-1][0]   
    response = kadiAPY_bot.process_query(user_query)
    history[-1] = (user_query, response)

    yield history  



# Gradio utils
def check_input_text(text):
    if not text:
        gr.Warning("Please input a question.")
        raise TypeError
    return True

def add_text(history, text):
    history = history + [(text, None)]
    yield history, ""


import gradio as gr

def main():
    with gr.Blocks() as demo:
        gr.Markdown("## KadiAPY - AI Coding-Assistant")
        gr.Markdown("AI assistant for KadiAPY based on RAG architecture powered by LLM")

        with gr.Tab("KadiAPY - AI Assistant"):
            with gr.Row():
                with gr.Column(scale=10):
                    chatbot = gr.Chatbot([], elem_id="chatbot", label="Kadi Bot", bubble_full_width=False, show_copy_button=True, height=600)
                    user_txt = gr.Textbox(label="Question", placeholder="Type in your question and press Enter or click Submit")

                    with gr.Row():
                        with gr.Column(scale=1):
                            submit_btn = gr.Button("Submit", variant="primary")
                        with gr.Column(scale=1):
                            clear_btn = gr.Button("Clear", variant="stop")

                    gr.Examples(
                        examples=[
                            "Write me a python script with which can convert plain JSON to a Kadi4Mat-compatible extra metadata structure",
                            "I need a method to upload a file to a record. The id of the record is 3",
                        ],
                        inputs=user_txt,
                        outputs=chatbot,
                        fn=add_text,
                        label="Try asking...",
                        cache_examples=False,
                        examples_per_page=3,
                    )

            user_txt.submit(check_input_text, user_txt, None).success(add_text, [chatbot, user_txt], [chatbot, user_txt]).then(bot_kadi, [chatbot], [chatbot])
            submit_btn.click(check_input_text, user_txt, None).success(add_text, [chatbot, user_txt], [chatbot, user_txt]).then(bot_kadi, [chatbot], [chatbot])
            clear_btn.click(lambda: None, None, chatbot, queue=False)

    demo.launch() 

    
if __name__ == "__main__":
    main()