Spaces:
Sleeping
Sleeping
File size: 6,422 Bytes
6df5c93 75495ad 6df5c93 21b7541 36637af d092474 330be71 fcfb36c 330be71 6df5c93 ae5beeb fb23588 6df5c93 0de2459 9125ce3 0de2459 9125ce3 0de2459 9125ce3 0de2459 9125ce3 0de2459 9125ce3 6df5c93 f79e678 0ae54ee c4c0f13 0ae54ee df3cd55 0ae54ee 99bb0aa fbe4134 761a499 db1cea6 01ba6b0 475d7f4 83510c8 475d7f4 1faad78 c2bc215 01ba6b0 00668f3 339ddfc a7e724a def348c 7efc081 075591d 01ba6b0 475d7f4 075591d 01ba6b0 475d7f4 fbe4134 99bb0aa fbe4134 99bb0aa 2172305 0ae54ee 0c35020 0ae54ee 2c0ea57 93e3091 ebb0364 6df5c93 506afb0 6df5c93 1afdee3 831abbd 1afdee3 831abbd 1afdee3 417adb9 40be4b1 1afdee3 8fde75c 1afdee3 499e447 1afdee3 8c715b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
import os
import json
import gradio as gr
from huggingface_hub import HfApi, login
from dotenv import load_dotenv
from download_repo import download_gitlab_repo_to_hfspace
from process_repo import extract_repo_files
from chunking import chunk_pythoncode_and_add_metadata, chunk_text_and_add_metadata
from vectorstore import setup_vectorstore
from llm import get_groq_llm
from ragchain import RAGChain
# Load environment variables from .env file
load_dotenv()
# Load configuration from JSON file
with open("config.json", "r") as file:
config = json.load(file)
GROQ_API_KEY = os.environ["GROQ_API_KEY"]
HF_TOKEN = os.environ["HF_Token"]
VECTORSTORE_DIRECTORY = config["vectorstore_directory"]
CHUNK_SIZE = config["chunking"]["chunk_size"]
CHUNK_OVERLAP = config["chunking"]["chunk_overlap"]
EMBEDDING_MODEL_NAME = config["embedding_model"]["name"]
EMBEDDING_MODEL_VERSION = config["embedding_model"]["version"]
LLM_MODEL_NAME = config["llm_model"]["name"]
LLM_MODEL_TEMPERATURE = config["llm_model"]["temperature"]
GITLAB_API_URL = config["gitlab"]["api_url"]
GITLAB_PROJECT_ID = config["gitlab"]["project id"]
GITLAB_PROJECT_VERSION = config["gitlab"]["project version"]
DATA_DIR = config["data_dir"]
HF_SPACE_NAME = config["hf_space_name"]
login(HF_TOKEN)
api = HfApi()
def initialize():
global vectorstore, chunks, llm
download_gitlab_repo_to_hfspace(GITLAB_API_URL, GITLAB_PROJECT_ID, GITLAB_PROJECT_VERSION, DATA_DIR)
code_texts, code_references = extract_repo_files(DATA_DIR, ['kadi_apy'], [])
doc_texts, doc_references = extract_files_and_filepath_from_dir(DATA_DIR, ['docs/source/'], [])
doc_texts, doc_references = extract_repo_files(DATA_DIR, [], [])
print("LEEEEEEEEEEEENGTH of code_texts: ", len(code_texts))
print("LEEEEEEEEEEEENGTH of doc_files: ", len(doc_texts))
code_chunks = chunk_pythoncode_and_add_metadata(code_texts, code_references)
doc_chunks = chunk_text_and_add_metadata(doc_texts, doc_references, CHUNK_SIZE, CHUNK_OVERLAP)
print(f"Total number of code_chunks: {len(code_chunks)}")
print(f"Total number of doc_chunks: {len(doc_chunks)}")
vectorstore = setup_vectorstore(doc_chunks + code_chunks, EMBEDDING_MODEL_NAME, VECTORSTORE_DIRECTORY)
llm = get_groq_llm(LLM_MODEL_NAME, LLM_MODEL_TEMPERATURE, GROQ_API_KEY)
initialize()
def rag_workflow(query):
"""
RAGChain class to perform the complete RAG workflow.
"""
# Assume 'llm' and 'vectorstore' are already initialized instances
rag_chain = RAGChain(llm, vectorstore)
"""
Pre-Retrieval-Stage
"""
# predict which python library to search in: (standard) kadiAPY-library or kadiAPY-cli-library
code_library_usage_prediction = rag_chain.predict_library_usage(query)
print(f"Predicted library usage: {code_library_usage_prediction}")
rewritten_query = rag_chain.rewrite_query(query)
print(f"\n\n Rewritten query: {rewritten_query}\n\n")
"""
Retrieval-Stage
"""
kadiAPY_doc_documents = rag_chain.retrieve_contexts(query, k=5, filter={"usage": "doc"})
kadiAPY_code_documents = rag_chain.retrieve_contexts(str(rewritten_query.content), k=3, filter={"usage": code_library_usage_prediction})
print("Retrieved Document Contexts:", kadiAPY_doc_documents)
print("Retrieved Code Contexts:", kadiAPY_code_documents)
"""
Pre-Generation-Stage
Adding each doc's metadata to the retrieved content (docs & code snippets)
"""
formatted_doc_snippets = rag_chain.format_documents(kadiAPY_doc_documents)
formatted_code_snippets = rag_chain.format_documents(kadiAPY_code_documents)
#print("FORMATTED Retrieved Document Contexts:", formatted_doc_snippets)
#print("FORMATTED Retrieved Code Contexts:" , formatted_code_snippets)
"""
Generation-Stage
"""
response = rag_chain.generate_response(query, formatted_doc_snippets, formatted_code_snippets)
print("Generated Response:", response)
return response
def bot_kadi(history):
user_query = history[-1][0]
response = rag_workflow(user_query)
history[-1] = (user_query, response)
yield history
# Gradio utils
def check_input_text(text):
if not text:
gr.Warning("Please input a question.")
raise TypeError
return True
def add_text(history, text):
history = history + [(text, None)]
yield history, ""
import gradio as gr
def main():
with gr.Blocks() as demo:
gr.Markdown("## KadiAPY - AI Coding-Assistant")
gr.Markdown("AI assistant for KadiAPY based on RAG architecture powered by LLM")
with gr.Tab("KadiAPY - AI Assistant"):
with gr.Row():
with gr.Column(scale=10):
chatbot = gr.Chatbot([], elem_id="chatbot", label="Kadi Bot", bubble_full_width=False, show_copy_button=True, height=600)
user_txt = gr.Textbox(label="Question", placeholder="Type in your question and press Enter or click Submit")
with gr.Row():
with gr.Column(scale=1):
submit_btn = gr.Button("Submit", variant="primary")
with gr.Column(scale=1):
clear_btn = gr.Button("Clear", variant="stop")
gr.Examples(
examples=[
"Who is working on Kadi4Mat?",
"How do i install the Kadi-Apy library?",
"How do i install the Kadi-Apy library for development?",
"I need a method to upload a file to a record",
],
inputs=user_txt,
outputs=chatbot,
fn=add_text,
label="Try asking...",
cache_examples=False,
examples_per_page=3,
)
user_txt.submit(check_input_text, user_txt, None).success(add_text, [chatbot, user_txt], [chatbot, user_txt]).then(bot_kadi, [chatbot], [chatbot])
submit_btn.click(check_input_text, user_txt, None).success(add_text, [chatbot, user_txt], [chatbot, user_txt]).then(bot_kadi, [chatbot], [chatbot])
clear_btn.click(lambda: None, None, chatbot, queue=False)
demo.launch()
if __name__ == "__main__":
main() |