Spaces:
Sleeping
Sleeping
import gradio as gr | |
import logging | |
import os | |
import json | |
from langchain.vectorstores import FAISS | |
from app.document_handling import get_embeddings | |
from app.config import BASE_DB_PATH | |
from app.utils.database_handling import list_databases | |
def create_chunks_viewer_tab(): | |
"""Crea il tab per visualizzare i chunks dei database.""" | |
def load_chunks(db_name): | |
"""Carica la lista dei chunks dal database selezionato.""" | |
if not db_name: | |
return gr.Dropdown(choices=[], interactive=False), "Seleziona un database" | |
try: | |
metadata_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}", "metadata.json") | |
vectorstore_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") | |
# Carica metadati e vectorstore | |
with open(metadata_path, 'r') as f: | |
metadata = json.load(f) | |
embeddings = get_embeddings() | |
vectorstore = FAISS.load_local(vectorstore_path, embeddings, allow_dangerous_deserialization=True) | |
# Crea lista di chunks con formato "Chunk X - Titolo (File)" | |
chunk_list = [] | |
current_index = 0 | |
for doc in metadata: | |
for i in range(doc['chunks']): | |
# Recupera il contenuto del chunk per verifica | |
doc_id = list(vectorstore.docstore._dict.keys())[current_index] | |
chunk_metadata = vectorstore.docstore._dict[doc_id].metadata | |
chunk_list.append(f"Chunk {current_index} - {doc['title']} ({doc['filename']})") | |
current_index += 1 | |
return gr.Dropdown(choices=chunk_list, interactive=True), "" | |
except Exception as e: | |
logging.error(f"Errore nel caricamento chunks: {e}") | |
return gr.Dropdown(choices=[], interactive=False), f"Errore: {e}" | |
def inspect_chunk(db_name, chunk_id): | |
"""Recupera il contenuto del chunk selezionato.""" | |
if not db_name or not chunk_id: | |
return "Seleziona un database e un chunk" | |
try: | |
db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") | |
embeddings = get_embeddings() | |
vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True) | |
# Estrai il numero del chunk | |
chunk_num = int(chunk_id.split(" - ")[0].replace("Chunk ", "")) | |
# Recupera il chunk usando l'ID univoco | |
doc_ids = list(vectorstore.docstore._dict.keys()) | |
chunk_content = vectorstore.docstore._dict[doc_ids[chunk_num]].page_content | |
return chunk_content | |
except Exception as e: | |
logging.error(f"Errore nell'ispezione del chunk: {e}") | |
return f"Errore nel recupero del contenuto: {e}" | |
with gr.Tab("Visualizza Chunks"): | |
gr.Markdown("## Ispeziona Chunks dei Database") | |
with gr.Row(): | |
with gr.Column(): | |
# Selettori | |
db_selector = gr.Dropdown( | |
choices=list_databases(), | |
label="Seleziona Database", | |
value=list_databases()[0] if list_databases() else None | |
) | |
chunk_selector = gr.Dropdown( | |
choices=[], | |
label="Seleziona Chunk", | |
interactive=False | |
) | |
inspect_button = gr.Button("Visualizza Contenuto") | |
with gr.Column(): | |
# Area visualizzazione contenuto | |
chunk_content = gr.TextArea( | |
label="Contenuto del Chunk", | |
interactive=False, | |
lines=20 | |
) | |
error_box = gr.Textbox( | |
label="Status", | |
visible=True, | |
interactive=False | |
) | |
# Eventi | |
db_selector.change( | |
fn=load_chunks, | |
inputs=[db_selector], | |
outputs=[chunk_selector, error_box] | |
) | |
inspect_button.click( | |
fn=inspect_chunk, | |
inputs=[db_selector, chunk_selector], | |
outputs=[chunk_content] | |
) | |
return {"db_selector": db_selector} |