Nugh75 commited on
Commit
ac52d7a
·
1 Parent(s): a45dfb0

update vari ma non conclisivi

Browse files
app.py CHANGED
@@ -2,12 +2,12 @@
2
 
3
  import gradio as gr
4
  import logging
 
5
  from app.logging_config import configure_logging
6
- from app.functions.database_handling import list_databases
7
  from ui.chatbot_tab import create_chatbot_tab
8
  from ui.db_management_tab import create_db_management_tab
9
  from ui.document_management_tab import create_document_management_tab
10
- from ui.document_view_tab import create_document_view_tab
11
  from ui.new_features_tab import create_new_features_tab
12
 
13
  # Configura il logging
@@ -16,13 +16,9 @@ configure_logging()
16
  def update_all_dropdowns():
17
  """
18
  Aggiorna tutti i dropdown in tutte le tab.
19
- Nel tuo scenario, hai 6 dropdown totali (2 nella tab DB, 2 nella tab Documenti,
20
- eventualmente 1 nella tab Chatbot, 1 in altre tab, ecc.).
21
- Se ne hai di più o di meno, modifica il numero nel range.
22
  """
23
  databases = list_databases()
24
- # Imposta la prima voce selezionata (value) solo se la lista non è vuota
25
- # e aggiorna le "choices" di tutti i dropdown.
26
  return [gr.update(choices=databases, value=databases[0] if databases else None) for _ in range(6)]
27
 
28
  def main():
@@ -30,22 +26,28 @@ def main():
30
  logging.info("Avvio applicazione")
31
  try:
32
  with gr.Blocks() as rag_chatbot:
 
 
 
33
  gr.Markdown("# Chatbot basato su RAG")
34
  logging.info("Interfaccia Gradio inizializzata")
35
 
36
  # Crea i vari tab dell'interfaccia
37
  create_chatbot_tab()
38
- create_db_management_tab(update_all_dropdowns) # Passiamo la callback
39
- create_document_management_tab(update_all_dropdowns) # Passiamo la callback
40
- create_document_view_tab()
41
  create_new_features_tab()
42
  logging.info("Tab dell'interfaccia creati con successo")
43
-
44
- # Avvia l'app
45
- logging.info("Avvio server Gradio")
46
- rag_chatbot.launch()
47
  except Exception as e:
48
- logging.error(f"Errore durante l'avvio: {str(e)}", exc_info=True)
 
 
 
 
 
49
 
50
  if __name__ == "__main__":
51
  main()
 
2
 
3
  import gradio as gr
4
  import logging
5
+ from watchdog.observers import Observer
6
  from app.logging_config import configure_logging
7
+ from app.functions.database_handling import list_databases, setup_db_observer
8
  from ui.chatbot_tab import create_chatbot_tab
9
  from ui.db_management_tab import create_db_management_tab
10
  from ui.document_management_tab import create_document_management_tab
 
11
  from ui.new_features_tab import create_new_features_tab
12
 
13
  # Configura il logging
 
16
  def update_all_dropdowns():
17
  """
18
  Aggiorna tutti i dropdown in tutte le tab.
 
 
 
19
  """
20
  databases = list_databases()
21
+ logging.info(f"Aggiornamento dropdown con databases: {databases}")
 
22
  return [gr.update(choices=databases, value=databases[0] if databases else None) for _ in range(6)]
23
 
24
  def main():
 
26
  logging.info("Avvio applicazione")
27
  try:
28
  with gr.Blocks() as rag_chatbot:
29
+ # Configura l'observer per la cartella db
30
+ observer = setup_db_observer(update_all_dropdowns)
31
+
32
  gr.Markdown("# Chatbot basato su RAG")
33
  logging.info("Interfaccia Gradio inizializzata")
34
 
35
  # Crea i vari tab dell'interfaccia
36
  create_chatbot_tab()
37
+ create_db_management_tab(update_all_dropdowns)
38
+ create_document_management_tab(update_all_dropdowns)
 
39
  create_new_features_tab()
40
  logging.info("Tab dell'interfaccia creati con successo")
41
+
42
+ # Avvia l'app
43
+ rag_chatbot.launch()
 
44
  except Exception as e:
45
+ logging.error(f"Errore durante l'avvio: {str(e)}")
46
+ finally:
47
+ # Assicurati che l'observer venga fermato
48
+ if 'observer' in locals():
49
+ observer.stop()
50
+ observer.join()
51
 
52
  if __name__ == "__main__":
53
  main()
app/document_handling.py CHANGED
@@ -1,5 +1,5 @@
1
  import logging
2
- import gradio as gr # Aggiunto import mancante
3
  from langchain_community.vectorstores import FAISS
4
  from langchain_huggingface import HuggingFaceEmbeddings
5
  import os
@@ -10,7 +10,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
  from dataclasses import dataclass
11
  import json
12
  from datetime import datetime
13
- from app.functions.database_handling import BASE_DB_PATH # Aggiungi questo import
14
 
15
  # Initialize the text splitter
16
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
@@ -18,6 +18,16 @@ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=10
18
  # -------------- UTILITY FUNCTIONS --------------
19
  @dataclass
20
  class DocumentMetadata:
 
 
 
 
 
 
 
 
 
 
21
  filename: str
22
  title: str
23
  author: str
@@ -25,6 +35,7 @@ class DocumentMetadata:
25
  chunks: int
26
 
27
  def to_dict(self):
 
28
  return {
29
  "filename": self.filename,
30
  "title": self.title,
@@ -34,8 +45,16 @@ class DocumentMetadata:
34
  }
35
 
36
  def save_metadata(metadata_list, db_name):
37
- """Salva i metadati nel percorso corretto."""
38
- # Usa il percorso base corretto
 
 
 
 
 
 
 
 
39
  db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
40
  metadata_file = os.path.join(db_path, "metadata.json")
41
 
@@ -43,17 +62,29 @@ def save_metadata(metadata_list, db_name):
43
  if not os.path.exists(db_path):
44
  os.makedirs(db_path)
45
 
 
46
  existing_metadata = []
47
  if os.path.exists(metadata_file):
48
  with open(metadata_file, 'r') as f:
49
  existing_metadata = json.load(f)
50
 
 
51
  existing_metadata.extend([m.to_dict() for m in metadata_list])
52
 
 
53
  with open(metadata_file, 'w') as f:
54
  json.dump(existing_metadata, f, indent=2)
55
 
56
  def extract_text_from_pdf(file_path):
 
 
 
 
 
 
 
 
 
57
  with open(file_path, 'rb') as f:
58
  reader = PyPDF2.PdfReader(f)
59
  text = ""
@@ -62,12 +93,25 @@ def extract_text_from_pdf(file_path):
62
  return text
63
 
64
  def extract_text_from_docx(file_path):
 
 
 
 
 
 
 
 
 
65
  doc = Document(file_path)
66
  text = ""
67
  for para in doc.paragraphs:
68
  text += para.text + "\n"
69
  return text
70
 
 
 
 
 
71
  # -------------- CHATBOT TAB FUNCTIONS --------------
72
  def answer_question(question, db_name="default_db"):
73
  db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
@@ -88,6 +132,9 @@ def answer_question(question, db_name="default_db"):
88
  results = [doc.page_content for doc in docs]
89
  return "\n\n".join(results)
90
 
 
 
 
91
  # -------------- DOCUMENT MANAGEMENT TAB FUNCTIONS --------------
92
  def upload_and_index(files, title, author, db_name="default_db"):
93
  if not files:
@@ -218,43 +265,6 @@ def delete_file_from_database(file_name, db_name="default_db"):
218
  except Exception as e:
219
  return f"Errore durante la rimozione del file: {e}"
220
 
221
- # -------------- DOCUMENT VISUALIZATION TAB FUNCTIONS --------------
222
- def list_indexed_documents(db_name="default_db"):
223
- db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Modifica qui
224
- metadata_file = os.path.join(db_path, "metadata.json")
225
-
226
- if not os.path.exists(db_path):
227
- return f"Il database '{db_name}' non esiste."
228
-
229
- if not os.path.exists(metadata_file):
230
- return f"Nessun documento nel database '{db_name}'."
231
-
232
- try:
233
- with open(metadata_file, 'r') as f:
234
- metadata = json.load(f)
235
-
236
- if not metadata:
237
- return "Nessun documento trovato nel database."
238
-
239
- output_lines = ["📚 Documenti nel database:"]
240
- for doc in metadata:
241
- output_lines.extend([
242
- f"\n📄 Documento: {doc['title']}",
243
- f" 📝 Autore: {doc['author']}",
244
- f" 📁 File: {doc['filename']}",
245
- f" 🕒 Caricato il: {doc['upload_date']}",
246
- f" 📑 Chunks: {doc['chunks']}"
247
- ])
248
-
249
- result = "\n".join(output_lines)
250
- logging.info(f"Documenti trovati nel database {db_name}: {result}")
251
- return result
252
-
253
- except Exception as e:
254
- error_msg = f"Errore nella lettura dei metadati: {e}"
255
- logging.error(error_msg)
256
- return error_msg
257
-
258
  # -------------- NEW FEATURES TAB FUNCTIONS --------------
259
  def search_documents(query, db_name="default_db"):
260
  db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Modifica qui
 
1
  import logging
2
+ import gradio as gr
3
  from langchain_community.vectorstores import FAISS
4
  from langchain_huggingface import HuggingFaceEmbeddings
5
  import os
 
10
  from dataclasses import dataclass
11
  import json
12
  from datetime import datetime
13
+ from app.functions.database_handling import BASE_DB_PATH
14
 
15
  # Initialize the text splitter
16
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
 
18
  # -------------- UTILITY FUNCTIONS --------------
19
  @dataclass
20
  class DocumentMetadata:
21
+ """
22
+ Classe per gestire i metadati dei documenti.
23
+
24
+ Attributi:
25
+ filename (str): Nome del file originale
26
+ title (str): Titolo assegnato al documento
27
+ author (str): Autore del documento
28
+ upload_date (str): Data di caricamento
29
+ chunks (int): Numero di chunks in cui è stato diviso il documento
30
+ """
31
  filename: str
32
  title: str
33
  author: str
 
35
  chunks: int
36
 
37
  def to_dict(self):
38
+ """Converte i metadati in un dizionario per il salvataggio JSON."""
39
  return {
40
  "filename": self.filename,
41
  "title": self.title,
 
45
  }
46
 
47
  def save_metadata(metadata_list, db_name):
48
+ """
49
+ Salva i metadati dei documenti nel database specificato.
50
+
51
+ Args:
52
+ metadata_list: Lista di oggetti DocumentMetadata da salvare
53
+ db_name: Nome del database in cui salvare i metadati
54
+
55
+ Note:
56
+ I metadati vengono salvati in un file JSON nella directory del database
57
+ """
58
  db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
59
  metadata_file = os.path.join(db_path, "metadata.json")
60
 
 
62
  if not os.path.exists(db_path):
63
  os.makedirs(db_path)
64
 
65
+ # Carica metadati esistenti se presenti
66
  existing_metadata = []
67
  if os.path.exists(metadata_file):
68
  with open(metadata_file, 'r') as f:
69
  existing_metadata = json.load(f)
70
 
71
+ # Aggiungi nuovi metadati
72
  existing_metadata.extend([m.to_dict() for m in metadata_list])
73
 
74
+ # Salva il file aggiornato
75
  with open(metadata_file, 'w') as f:
76
  json.dump(existing_metadata, f, indent=2)
77
 
78
  def extract_text_from_pdf(file_path):
79
+ """
80
+ Estrae il testo da un file PDF.
81
+
82
+ Args:
83
+ file_path: Percorso del file PDF
84
+
85
+ Returns:
86
+ str: Testo estratto dal PDF
87
+ """
88
  with open(file_path, 'rb') as f:
89
  reader = PyPDF2.PdfReader(f)
90
  text = ""
 
93
  return text
94
 
95
  def extract_text_from_docx(file_path):
96
+ """
97
+ Estrae il testo da un file DOCX.
98
+
99
+ Args:
100
+ file_path: Percorso del file DOCX
101
+
102
+ Returns:
103
+ str: Testo estratto dal documento Word
104
+ """
105
  doc = Document(file_path)
106
  text = ""
107
  for para in doc.paragraphs:
108
  text += para.text + "\n"
109
  return text
110
 
111
+
112
+
113
+
114
+
115
  # -------------- CHATBOT TAB FUNCTIONS --------------
116
  def answer_question(question, db_name="default_db"):
117
  db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
 
132
  results = [doc.page_content for doc in docs]
133
  return "\n\n".join(results)
134
 
135
+
136
+
137
+
138
  # -------------- DOCUMENT MANAGEMENT TAB FUNCTIONS --------------
139
  def upload_and_index(files, title, author, db_name="default_db"):
140
  if not files:
 
265
  except Exception as e:
266
  return f"Errore durante la rimozione del file: {e}"
267
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  # -------------- NEW FEATURES TAB FUNCTIONS --------------
269
  def search_documents(query, db_name="default_db"):
270
  db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Modifica qui
app/functions/database_handling.py CHANGED
@@ -1,6 +1,8 @@
1
  import logging
2
  import os
3
  import shutil
 
 
4
 
5
  # Definisci il percorso base per i database
6
  BASE_DB_PATH = "db"
@@ -64,3 +66,20 @@ def list_databases():
64
  except Exception as e:
65
  logging.error(f"Error listing databases: {e}")
66
  return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import logging
2
  import os
3
  import shutil
4
+ from watchdog.observers import Observer
5
+ from watchdog.events import FileSystemEventHandler
6
 
7
  # Definisci il percorso base per i database
8
  BASE_DB_PATH = "db"
 
66
  except Exception as e:
67
  logging.error(f"Error listing databases: {e}")
68
  return []
69
+
70
+ class DatabaseChangeHandler(FileSystemEventHandler):
71
+ """Handler per monitorare i cambiamenti nella cartella db."""
72
+ def __init__(self, update_callback):
73
+ self.update_callback = update_callback
74
+
75
+ def on_any_event(self, event):
76
+ if event.is_directory: # Monitora solo le directory
77
+ self.update_callback()
78
+
79
+ def setup_db_observer(update_callback):
80
+ """Configura l'observer per la cartella db."""
81
+ event_handler = DatabaseChangeHandler(update_callback)
82
+ observer = Observer()
83
+ observer.schedule(event_handler, BASE_DB_PATH, recursive=False)
84
+ observer.start()
85
+ return observer
ui/db_management_tab.py CHANGED
@@ -1,5 +1,4 @@
1
  import gradio as gr
2
- #from app.document_handling import create_database, modify_database, delete_database, list_databases
3
  from app.functions.database_handling import create_database, modify_database, delete_database, list_databases
4
 
5
  def create_db_management_tab(update_all_dropdowns=None):
@@ -70,3 +69,4 @@ def create_db_management_tab(update_all_dropdowns=None):
70
 
71
  # Ritorna i componenti che vogliamo poter aggiornare/agganciare
72
  return [modify_db_old_name, delete_db_dropdown, create_db_button, modify_db_button, delete_db_button]
 
 
1
  import gradio as gr
 
2
  from app.functions.database_handling import create_database, modify_database, delete_database, list_databases
3
 
4
  def create_db_management_tab(update_all_dropdowns=None):
 
69
 
70
  # Ritorna i componenti che vogliamo poter aggiornare/agganciare
71
  return [modify_db_old_name, delete_db_dropdown, create_db_button, modify_db_button, delete_db_button]
72
+
ui/document_management_tab.py CHANGED
@@ -3,9 +3,19 @@ import logging
3
  from app.document_handling import upload_and_index, list_indexed_files, delete_file_from_database
4
  from app.functions.database_handling import list_databases
5
 
6
- def create_document_management_tab(update_all_dropdowns=None):
7
  """Crea il tab 'Gestione Documenti' dell'interfaccia Gradio."""
8
 
 
 
 
 
 
 
 
 
 
 
9
  def upload_and_index_callback(files, title, author, db_name):
10
  """Carica e indicizza i documenti, quindi aggiorna la lista dei file."""
11
  try:
@@ -66,14 +76,13 @@ def create_document_management_tab(update_all_dropdowns=None):
66
  delete_file_button = gr.Button("Elimina File")
67
  delete_file_output = gr.Textbox(label="Stato Eliminazione")
68
 
69
- # Eventi
70
  upload_button.click(
71
  fn=upload_and_index_callback,
72
  inputs=[file_input, title_input, author_input, db_name_upload],
73
  outputs=upload_output
74
  ).then(
75
- fn=update_all_dropdowns, # <--- callback globale
76
- inputs=[],
77
  outputs=[db_name_upload, db_name_list]
78
  ).then(
79
  fn=list_files_callback,
@@ -92,14 +101,12 @@ def create_document_management_tab(update_all_dropdowns=None):
92
  inputs=[delete_file_input, db_name_list],
93
  outputs=delete_file_output
94
  ).then(
95
- fn=update_all_dropdowns, # <--- callback globale
96
- inputs=[],
97
  outputs=[db_name_upload, db_name_list]
98
  ).then(
99
  fn=list_files_callback,
100
  inputs=[db_name_list],
101
  outputs=list_output
102
  )
103
-
104
- # Ritorna i dropdown (e altri componenti, se servono) per poterli aggiornare
105
- return [db_name_upload, db_name_list, upload_button, list_button, delete_file_button]
 
3
  from app.document_handling import upload_and_index, list_indexed_files, delete_file_from_database
4
  from app.functions.database_handling import list_databases
5
 
6
+ def create_document_management_tab(update_all_dropdowns):
7
  """Crea il tab 'Gestione Documenti' dell'interfaccia Gradio."""
8
 
9
+ def refresh_dropdowns():
10
+ """Aggiorna localmente i dropdown con la lista aggiornata dei database."""
11
+ databases = list_databases()
12
+ logging.info(f"Aggiornamento dropdown con databases: {databases}")
13
+ updates = [
14
+ gr.update(choices=databases, value=databases[0] if databases else None),
15
+ gr.update(choices=databases, value=databases[0] if databases else None)
16
+ ]
17
+ return updates
18
+
19
  def upload_and_index_callback(files, title, author, db_name):
20
  """Carica e indicizza i documenti, quindi aggiorna la lista dei file."""
21
  try:
 
76
  delete_file_button = gr.Button("Elimina File")
77
  delete_file_output = gr.Textbox(label="Stato Eliminazione")
78
 
79
+ # Eventi modificati
80
  upload_button.click(
81
  fn=upload_and_index_callback,
82
  inputs=[file_input, title_input, author_input, db_name_upload],
83
  outputs=upload_output
84
  ).then(
85
+ fn=update_all_dropdowns, # Usa la funzione globale
 
86
  outputs=[db_name_upload, db_name_list]
87
  ).then(
88
  fn=list_files_callback,
 
101
  inputs=[delete_file_input, db_name_list],
102
  outputs=delete_file_output
103
  ).then(
104
+ fn=update_all_dropdowns, # Usa la funzione globale
 
105
  outputs=[db_name_upload, db_name_list]
106
  ).then(
107
  fn=list_files_callback,
108
  inputs=[db_name_list],
109
  outputs=list_output
110
  )
111
+
112
+ return [db_name_upload, db_name_list]
 
ui/document_view_tab.py DELETED
@@ -1,41 +0,0 @@
1
- # ui/document_view_tab.py
2
-
3
- import gradio as gr
4
- from app.document_handling import list_indexed_documents
5
- from app.functions.database_handling import list_databases
6
- def create_document_view_tab():
7
- """Crea il tab 'Visualizza Documenti Indicizzati' dell'interfaccia Gradio."""
8
-
9
- def list_docs_callback(db_name):
10
- """Elenca i documenti indicizzati nel database specificato."""
11
- documents = list_indexed_documents(db_name)
12
- return "\n".join(documents)
13
-
14
- # Ottieni la lista dei database
15
- databases = list_databases()
16
-
17
- with gr.Tab("Visualizza Documenti Indicizzati"):
18
- with gr.Column():
19
- gr.Markdown("### Documenti nel Database")
20
- db_name_list = gr.Dropdown(
21
- choices=databases,
22
- label="Seleziona Database",
23
- value="default_db",
24
- interactive=True
25
- )
26
- list_button = gr.Button("Visualizza Documenti")
27
- list_output = gr.Textbox(
28
- label="Elenco Documenti",
29
- lines=10,
30
- interactive=False,
31
- value="Clicca 'Visualizza Documenti' per vedere l'elenco"
32
- )
33
-
34
- list_button.click(
35
- fn=list_docs_callback,
36
- inputs=[db_name_list],
37
- outputs=[list_output],
38
- api_name="list_docs"
39
- )
40
-
41
- return