Spaces:
Sleeping
Sleeping
File size: 4,745 Bytes
6df5c93 df2b26b 6df5c93 21b7541 36637af d092474 330be71 fcfb36c 9a079fe 4b7d38f 6df5c93 ae5beeb fb23588 6df5c93 0de2459 9125ce3 0de2459 9125ce3 0de2459 9125ce3 0de2459 9125ce3 0de2459 9125ce3 6df5c93 c7fa549 6df5c93 f79e678 0ae54ee 899338b 0ae54ee 95aa8e9 0ae54ee aba8f79 0ae54ee f71140c 0ae54ee 7dafa4e 0ae54ee aa10033 0ae54ee a74f77b da0c2cc 31d2d4e 9b514b9 31d2d4e 506afb0 6df5c93 31d2d4e 8c715b2 31d2d4e 8c715b2 31d2d4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import os
import json
import gradio as gr
import streamlit as st
from huggingface_hub import HfApi, login
from dotenv import load_dotenv
from download_repo import download_gitlab_repo_to_hfspace
from process_repo import extract_repo_files
from chunking import chunk_pythoncode_and_add_metadata, chunk_text_and_add_metadata
from vectorstore import setup_vectorstore
from llm import get_groq_llm
from kadi_apy_bot import KadiAPYBot
from repo_versions import store_message_from_json
# Load environment variables from .env file
load_dotenv()
# Load configuration from JSON file
with open("config.json", "r") as file:
config = json.load(file)
GROQ_API_KEY = os.environ["GROQ_API_KEY"]
HF_TOKEN = os.environ["HF_Token"]
VECTORSTORE_DIRECTORY = config["vectorstore_directory"]
CHUNK_SIZE = config["chunking"]["chunk_size"]
CHUNK_OVERLAP = config["chunking"]["chunk_overlap"]
EMBEDDING_MODEL_NAME = config["embedding_model"]["name"]
EMBEDDING_MODEL_VERSION = config["embedding_model"]["version"]
LLM_MODEL_NAME = config["llm_model"]["name"]
LLM_MODEL_TEMPERATURE = config["llm_model"]["temperature"]
GITLAB_API_URL = config["gitlab"]["api_url"]
GITLAB_PROJECT_ID = config["gitlab"]["project id"]
GITLAB_PROJECT_VERSION = config["gitlab"]["project version"]
DATA_DIR = config["data_dir"]
HF_SPACE_NAME = config["hf_space_name"]
login(HF_TOKEN)
hf_api = HfApi()
def initialize():
global kadiAPY_bot
download_gitlab_repo_to_hfspace(GITLAB_API_URL, GITLAB_PROJECT_ID, GITLAB_PROJECT_VERSION, DATA_DIR, hf_api, HF_SPACE_NAME)
code_texts, code_references = extract_repo_files(DATA_DIR, ['kadi_apy'], [])
doc_texts, doc_references = extract_repo_files(DATA_DIR, ['docs'], [])
print("Length of code_texts: ", len(code_texts))
print("Length of doc_files: ", len(doc_texts))
code_chunks = chunk_pythoncode_and_add_metadata(code_texts, code_references)
doc_chunks = chunk_text_and_add_metadata(doc_texts, doc_references, CHUNK_SIZE, CHUNK_OVERLAP)
print(f"Total number of code_chunks: {len(code_chunks)}")
print(f"Total number of doc_chunks: {len(doc_chunks)}")
vectorstore = setup_vectorstore(doc_chunks + code_chunks, EMBEDDING_MODEL_NAME, VECTORSTORE_DIRECTORY)
llm = get_groq_llm(LLM_MODEL_NAME, LLM_MODEL_TEMPERATURE, GROQ_API_KEY)
kadiAPY_bot = KadiAPYBot(llm, vectorstore)
initialize()
def bot_kadi(history):
user_query = history[-1][0]
response = kadiAPY_bot.process_query(user_query)
history[-1] = (user_query, response)
yield history
# Gradio utils
def check_input_text(text):
if not text:
gr.Warning("Please input a question.")
raise TypeError
return True
def add_text(history, text):
history = history + [(text, None)]
yield history, ""
import gradio as gr
def main():
with gr.Blocks() as demo:
gr.Markdown("## KadiAPY - AI Coding-Assistant")
gr.Markdown("AI assistant for KadiAPY based on RAG architecture powered by LLM")
with gr.Tab("KadiAPY - AI Assistant"):
with gr.Row():
with gr.Column(scale=10):
chatbot = gr.Chatbot([], elem_id="chatbot", label="Kadi Bot", bubble_full_width=False, show_copy_button=True, height=600)
user_txt = gr.Textbox(label="Question", placeholder="Type in your question and press Enter or click Submit")
with gr.Row():
with gr.Column(scale=1):
submit_btn = gr.Button("Submit", variant="primary")
with gr.Column(scale=1):
clear_btn = gr.Button("Clear", variant="stop")
gr.Examples(
examples=[
"Write me a python script with which can convert plain JSON to a Kadi4Mat-compatible extra metadata structure",
"I need a method to upload a file to a record. The id of the record is 3",
],
inputs=user_txt,
outputs=chatbot,
fn=add_text,
label="Try asking...",
cache_examples=False,
examples_per_page=3,
)
user_txt.submit(check_input_text, user_txt, None).success(add_text, [chatbot, user_txt], [chatbot, user_txt]).then(bot_kadi, [chatbot], [chatbot])
submit_btn.click(check_input_text, user_txt, None).success(add_text, [chatbot, user_txt], [chatbot, user_txt]).then(bot_kadi, [chatbot], [chatbot])
clear_btn.click(lambda: None, None, chatbot, queue=False)
demo.launch()
if __name__ == "__main__":
main() |