Nugh75 commited on
Commit
a45dfb0
Β·
1 Parent(s): b2638ec

mossi i database in db

Browse files
app/document_handling.py CHANGED
@@ -10,9 +10,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
  from dataclasses import dataclass
11
  import json
12
  from datetime import datetime
 
13
 
14
  # Initialize the text splitter
15
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
16
 
17
  # -------------- UTILITY FUNCTIONS --------------
18
  @dataclass
@@ -33,9 +34,15 @@ class DocumentMetadata:
33
  }
34
 
35
  def save_metadata(metadata_list, db_name):
36
- db_path = f"faiss_index_{db_name}"
 
 
37
  metadata_file = os.path.join(db_path, "metadata.json")
38
 
 
 
 
 
39
  existing_metadata = []
40
  if os.path.exists(metadata_file):
41
  with open(metadata_file, 'r') as f:
@@ -63,7 +70,7 @@ def extract_text_from_docx(file_path):
63
 
64
  # -------------- CHATBOT TAB FUNCTIONS --------------
65
  def answer_question(question, db_name="default_db"):
66
- db_path = f"faiss_index_{db_name}"
67
  if not os.path.exists(db_path):
68
  logging.warning(f"L'indice FAISS per il database {db_name} non esiste.")
69
  return "Database non trovato."
@@ -132,7 +139,7 @@ def upload_and_index(files, title, author, db_name="default_db"):
132
 
133
  if documents:
134
  try:
135
- db_path = f"faiss_index_{db_name}"
136
  os.makedirs(db_path, exist_ok=True)
137
 
138
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
@@ -154,7 +161,7 @@ def upload_and_index(files, title, author, db_name="default_db"):
154
  return "Nessun documento processato."
155
 
156
  def list_indexed_files(db_name="default_db"):
157
- db_path = f"faiss_index_{db_name}"
158
  metadata_file = os.path.join(db_path, "metadata.json")
159
 
160
  if not os.path.exists(metadata_file):
@@ -188,7 +195,7 @@ def delete_file_from_database(file_name, db_name="default_db"):
188
  da FAISS. Attualmente, la funzione gestisce un 'file_list.txt',
189
  ma devi adattarla alle tue esigenze di rimozione dei chunk.
190
  """
191
- db_path = f"faiss_index_{db_name}"
192
  file_list_path = os.path.join(db_path, "file_list.txt")
193
 
194
  if not os.path.exists(file_list_path):
@@ -213,7 +220,7 @@ def delete_file_from_database(file_name, db_name="default_db"):
213
 
214
  # -------------- DOCUMENT VISUALIZATION TAB FUNCTIONS --------------
215
  def list_indexed_documents(db_name="default_db"):
216
- db_path = f"faiss_index_{db_name}"
217
  metadata_file = os.path.join(db_path, "metadata.json")
218
 
219
  if not os.path.exists(db_path):
@@ -250,7 +257,7 @@ def list_indexed_documents(db_name="default_db"):
250
 
251
  # -------------- NEW FEATURES TAB FUNCTIONS --------------
252
  def search_documents(query, db_name="default_db"):
253
- db_path = f"faiss_index_{db_name}"
254
  if not os.path.exists(db_path):
255
  logging.warning(f"L'indice FAISS per il database '{db_name}' non esiste.")
256
  return "Database non trovato."
 
10
  from dataclasses import dataclass
11
  import json
12
  from datetime import datetime
13
+ from app.functions.database_handling import BASE_DB_PATH # Aggiungi questo import
14
 
15
  # Initialize the text splitter
16
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
17
 
18
  # -------------- UTILITY FUNCTIONS --------------
19
  @dataclass
 
34
  }
35
 
36
  def save_metadata(metadata_list, db_name):
37
+ """Salva i metadati nel percorso corretto."""
38
+ # Usa il percorso base corretto
39
+ db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
40
  metadata_file = os.path.join(db_path, "metadata.json")
41
 
42
+ # Crea la directory se non esiste
43
+ if not os.path.exists(db_path):
44
+ os.makedirs(db_path)
45
+
46
  existing_metadata = []
47
  if os.path.exists(metadata_file):
48
  with open(metadata_file, 'r') as f:
 
70
 
71
  # -------------- CHATBOT TAB FUNCTIONS --------------
72
  def answer_question(question, db_name="default_db"):
73
+ db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
74
  if not os.path.exists(db_path):
75
  logging.warning(f"L'indice FAISS per il database {db_name} non esiste.")
76
  return "Database non trovato."
 
139
 
140
  if documents:
141
  try:
142
+ db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Modifica qui
143
  os.makedirs(db_path, exist_ok=True)
144
 
145
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
161
  return "Nessun documento processato."
162
 
163
  def list_indexed_files(db_name="default_db"):
164
+ db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Modifica qui
165
  metadata_file = os.path.join(db_path, "metadata.json")
166
 
167
  if not os.path.exists(metadata_file):
 
195
  da FAISS. Attualmente, la funzione gestisce un 'file_list.txt',
196
  ma devi adattarla alle tue esigenze di rimozione dei chunk.
197
  """
198
+ db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Modifica qui
199
  file_list_path = os.path.join(db_path, "file_list.txt")
200
 
201
  if not os.path.exists(file_list_path):
 
220
 
221
  # -------------- DOCUMENT VISUALIZATION TAB FUNCTIONS --------------
222
  def list_indexed_documents(db_name="default_db"):
223
+ db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Modifica qui
224
  metadata_file = os.path.join(db_path, "metadata.json")
225
 
226
  if not os.path.exists(db_path):
 
257
 
258
  # -------------- NEW FEATURES TAB FUNCTIONS --------------
259
  def search_documents(query, db_name="default_db"):
260
+ db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Modifica qui
261
  if not os.path.exists(db_path):
262
  logging.warning(f"L'indice FAISS per il database '{db_name}' non esiste.")
263
  return "Database non trovato."
app/functions/database_handling.py CHANGED
@@ -2,10 +2,17 @@ import logging
2
  import os
3
  import shutil
4
 
 
 
 
 
 
 
 
5
  # -------------- DATABASE MANAGEMENT TAB FUNCTIONS --------------
6
  def create_database(db_name):
7
  logging.info(f"Creating database: {db_name}")
8
- db_path = f"faiss_index_{db_name}"
9
 
10
  if os.path.exists(db_path):
11
  return f"Il database '{db_name}' esiste giΓ ."
@@ -19,7 +26,7 @@ def create_database(db_name):
19
  return f"Errore nella creazione del database: {e}"
20
 
21
  def delete_database(db_name):
22
- db_path = f"faiss_index_{db_name}"
23
  if not os.path.exists(db_path):
24
  return f"Il database '{db_name}' non esiste."
25
  try:
@@ -31,8 +38,8 @@ def delete_database(db_name):
31
  return f"Impossibile eliminare il database '{db_name}': {e}"
32
 
33
  def modify_database(old_db_name, new_db_name):
34
- old_db_path = f"faiss_index_{old_db_name}"
35
- new_db_path = f"faiss_index_{new_db_name}"
36
  if not os.path.exists(old_db_path):
37
  return f"Il database '{old_db_name}' non esiste."
38
  if os.path.exists(new_db_path):
@@ -46,8 +53,8 @@ def modify_database(old_db_name, new_db_name):
46
  def list_databases():
47
  try:
48
  databases = []
49
- for item in os.listdir():
50
- if os.path.isdir(item) and item.startswith("faiss_index_"):
51
  db_name = item.replace("faiss_index_", "")
52
  databases.append(db_name)
53
  # Ensure "default_db" is in the list
 
2
  import os
3
  import shutil
4
 
5
+ # Definisci il percorso base per i database
6
+ BASE_DB_PATH = "db"
7
+
8
+ # Crea la cartella db se non esiste
9
+ if not os.path.exists(BASE_DB_PATH):
10
+ os.makedirs(BASE_DB_PATH)
11
+
12
  # -------------- DATABASE MANAGEMENT TAB FUNCTIONS --------------
13
  def create_database(db_name):
14
  logging.info(f"Creating database: {db_name}")
15
+ db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
16
 
17
  if os.path.exists(db_path):
18
  return f"Il database '{db_name}' esiste giΓ ."
 
26
  return f"Errore nella creazione del database: {e}"
27
 
28
  def delete_database(db_name):
29
+ db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
30
  if not os.path.exists(db_path):
31
  return f"Il database '{db_name}' non esiste."
32
  try:
 
38
  return f"Impossibile eliminare il database '{db_name}': {e}"
39
 
40
  def modify_database(old_db_name, new_db_name):
41
+ old_db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{old_db_name}")
42
+ new_db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{new_db_name}")
43
  if not os.path.exists(old_db_path):
44
  return f"Il database '{old_db_name}' non esiste."
45
  if os.path.exists(new_db_path):
 
53
  def list_databases():
54
  try:
55
  databases = []
56
+ for item in os.listdir(BASE_DB_PATH):
57
+ if os.path.isdir(os.path.join(BASE_DB_PATH, item)) and item.startswith("faiss_index_"):
58
  db_name = item.replace("faiss_index_", "")
59
  databases.append(db_name)
60
  # Ensure "default_db" is in the list
app/llm_handling.py CHANGED
@@ -8,9 +8,7 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
8
  import gradio as gr
9
 
10
  from app.config import OPENAI_API_KEY
11
- # Se hai funzioni per gestire i database (list_databases, ensure_default_db, ecc.),
12
- # importale dal modulo corretto:
13
- # from app.document_handling import list_databases, ensure_default_db
14
 
15
  logging.basicConfig(level=logging.INFO)
16
 
@@ -33,13 +31,14 @@ def answer_question(question, db_name, chat_history=None):
33
 
34
  try:
35
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
36
- db_path = f"faiss_index_{db_name}"
37
 
 
38
  if not os.path.exists(db_path):
39
- logging.warning(f"Database {db_name} non trovato.")
40
  return [
41
  {"role": "user", "content": question},
42
- {"role": "assistant", "content": "Database non trovato"}
43
  ]
44
 
45
  # Carica l'indice FAISS
@@ -92,7 +91,7 @@ def delete_database(db_name):
92
  Cancella il database FAISS corrispondente a 'db_name'.
93
  Restituisce un messaggio di stato e l'aggiornamento del dropdown in Gradio.
94
  """
95
- db_path = f"faiss_index_{db_name}"
96
  if not os.path.exists(db_path):
97
  return f"Il database {db_name} non esiste.", gr.Dropdown.update(choices=[])
98
  try:
 
8
  import gradio as gr
9
 
10
  from app.config import OPENAI_API_KEY
11
+ from app.functions.database_handling import BASE_DB_PATH # Aggiungi questo import
 
 
12
 
13
  logging.basicConfig(level=logging.INFO)
14
 
 
31
 
32
  try:
33
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
34
+ db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Percorso corretto
35
 
36
+ logging.info(f"Verifico esistenza database in: {db_path}")
37
  if not os.path.exists(db_path):
38
+ logging.warning(f"Database {db_name} non trovato in {db_path}")
39
  return [
40
  {"role": "user", "content": question},
41
+ {"role": "assistant", "content": f"Database non trovato in {db_path}"}
42
  ]
43
 
44
  # Carica l'indice FAISS
 
91
  Cancella il database FAISS corrispondente a 'db_name'.
92
  Restituisce un messaggio di stato e l'aggiornamento del dropdown in Gradio.
93
  """
94
+ db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Percorso corretto
95
  if not os.path.exists(db_path):
96
  return f"Il database {db_name} non esiste.", gr.Dropdown.update(choices=[])
97
  try:
{faiss_index β†’ db/faiss_index}/index.faiss RENAMED
File without changes
{faiss_index β†’ db/faiss_index}/index.pkl RENAMED
File without changes
db/faiss_index_Daniele2/index.faiss ADDED
Binary file (3.12 kB). View file
 
{faiss_index_E-learning β†’ db/faiss_index_Daniele2}/index.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0ec4d3c22f17861b941c079acdf82d250fdafd351e9b05ab3877110a3bbdade
3
- size 25352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e79bcca55b5153ea71218a3d2204c01ec1eccf59162fd4547d19956a4750d04e
3
+ size 2958
db/faiss_index_Daniele2/metadata.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "filename": "istruzioni obiettivi di apprendimento.pdf",
4
+ "title": "Obiettivi di apprendimento",
5
+ "author": "Daniele",
6
+ "upload_date": "2025-01-02 15:14:19",
7
+ "chunks": 2
8
+ }
9
+ ]
{faiss_index_default_db β†’ db/faiss_index_default_db}/index.faiss RENAMED
File without changes
{faiss_index_default_db β†’ db/faiss_index_default_db}/index.pkl RENAMED
File without changes
faiss_index_E-learning/index.faiss DELETED
Binary file (66.1 kB)
 
faiss_index_E-learning/metadata.json DELETED
@@ -1,16 +0,0 @@
1
- [
2
- {
3
- "filename": "istruzioni obiettivi di apprendimento.pdf",
4
- "title": "Obiettivi",
5
- "author": "Daniele",
6
- "upload_date": "2024-12-31 19:21:10",
7
- "chunks": 6
8
- },
9
- {
10
- "filename": "mastery_Bloom.pdf",
11
- "title": "Mastery Learingi",
12
- "author": "Bloom",
13
- "upload_date": "2024-12-31 20:25:00",
14
- "chunks": 43
15
- }
16
- ]