Spaces:
Sleeping
Sleeping
File size: 4,377 Bytes
9804548 07697cb 9804548 07697cb 9804548 07697cb 9804548 07697cb 9804548 07697cb 9804548 07697cb 9804548 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import gradio as gr
import logging
import os
import json
from langchain.vectorstores import FAISS
from app.document_handling import get_embeddings
from app.config import BASE_DB_PATH
from app.utils.database_handling import list_databases
def create_chunks_viewer_tab():
"""Crea il tab per visualizzare i chunks dei database."""
def load_chunks(db_name):
"""Carica la lista dei chunks dal database selezionato."""
if not db_name:
return gr.Dropdown(choices=[], interactive=False), "Seleziona un database"
try:
metadata_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}", "metadata.json")
vectorstore_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
# Carica metadati e vectorstore
with open(metadata_path, 'r') as f:
metadata = json.load(f)
embeddings = get_embeddings()
vectorstore = FAISS.load_local(vectorstore_path, embeddings, allow_dangerous_deserialization=True)
# Crea lista di chunks con formato "Chunk X - Titolo (File)"
chunk_list = []
current_index = 0
for doc in metadata:
for i in range(doc['chunks']):
# Recupera il contenuto del chunk per verifica
doc_id = list(vectorstore.docstore._dict.keys())[current_index]
chunk_metadata = vectorstore.docstore._dict[doc_id].metadata
chunk_list.append(f"Chunk {current_index} - {doc['title']} ({doc['filename']})")
current_index += 1
return gr.Dropdown(choices=chunk_list, interactive=True), ""
except Exception as e:
logging.error(f"Errore nel caricamento chunks: {e}")
return gr.Dropdown(choices=[], interactive=False), f"Errore: {e}"
def inspect_chunk(db_name, chunk_id):
"""Recupera il contenuto del chunk selezionato."""
if not db_name or not chunk_id:
return "Seleziona un database e un chunk"
try:
db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
embeddings = get_embeddings()
vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
# Estrai il numero del chunk
chunk_num = int(chunk_id.split(" - ")[0].replace("Chunk ", ""))
# Recupera il chunk usando l'ID univoco
doc_ids = list(vectorstore.docstore._dict.keys())
chunk_content = vectorstore.docstore._dict[doc_ids[chunk_num]].page_content
return chunk_content
except Exception as e:
logging.error(f"Errore nell'ispezione del chunk: {e}")
return f"Errore nel recupero del contenuto: {e}"
with gr.Tab("Visualizza Chunks"):
gr.Markdown("## Ispeziona Chunks dei Database")
with gr.Row():
with gr.Column():
# Selettori
db_selector = gr.Dropdown(
choices=list_databases(),
label="Seleziona Database",
value=list_databases()[0] if list_databases() else None
)
chunk_selector = gr.Dropdown(
choices=[],
label="Seleziona Chunk",
interactive=False
)
inspect_button = gr.Button("Visualizza Contenuto")
with gr.Column():
# Area visualizzazione contenuto
chunk_content = gr.TextArea(
label="Contenuto del Chunk",
interactive=False,
lines=20
)
error_box = gr.Textbox(
label="Status",
visible=True,
interactive=False
)
# Eventi
db_selector.change(
fn=load_chunks,
inputs=[db_selector],
outputs=[chunk_selector, error_box]
)
inspect_button.click(
fn=inspect_chunk,
inputs=[db_selector, chunk_selector],
outputs=[chunk_content]
)
return {"db_selector": db_selector} |