Spaces:
Sleeping
Sleeping
mossi i database in db
Browse files- app/document_handling.py +15 -8
- app/functions/database_handling.py +13 -6
- app/llm_handling.py +6 -7
- {faiss_index β db/faiss_index}/index.faiss +0 -0
- {faiss_index β db/faiss_index}/index.pkl +0 -0
- db/faiss_index_Daniele2/index.faiss +0 -0
- {faiss_index_E-learning β db/faiss_index_Daniele2}/index.pkl +2 -2
- db/faiss_index_Daniele2/metadata.json +9 -0
- {faiss_index_default_db β db/faiss_index_default_db}/index.faiss +0 -0
- {faiss_index_default_db β db/faiss_index_default_db}/index.pkl +0 -0
- faiss_index_E-learning/index.faiss +0 -0
- faiss_index_E-learning/metadata.json +0 -16
app/document_handling.py
CHANGED
@@ -10,9 +10,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
10 |
from dataclasses import dataclass
|
11 |
import json
|
12 |
from datetime import datetime
|
|
|
13 |
|
14 |
# Initialize the text splitter
|
15 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=
|
16 |
|
17 |
# -------------- UTILITY FUNCTIONS --------------
|
18 |
@dataclass
|
@@ -33,9 +34,15 @@ class DocumentMetadata:
|
|
33 |
}
|
34 |
|
35 |
def save_metadata(metadata_list, db_name):
|
36 |
-
|
|
|
|
|
37 |
metadata_file = os.path.join(db_path, "metadata.json")
|
38 |
|
|
|
|
|
|
|
|
|
39 |
existing_metadata = []
|
40 |
if os.path.exists(metadata_file):
|
41 |
with open(metadata_file, 'r') as f:
|
@@ -63,7 +70,7 @@ def extract_text_from_docx(file_path):
|
|
63 |
|
64 |
# -------------- CHATBOT TAB FUNCTIONS --------------
|
65 |
def answer_question(question, db_name="default_db"):
|
66 |
-
db_path = f"faiss_index_{db_name}"
|
67 |
if not os.path.exists(db_path):
|
68 |
logging.warning(f"L'indice FAISS per il database {db_name} non esiste.")
|
69 |
return "Database non trovato."
|
@@ -132,7 +139,7 @@ def upload_and_index(files, title, author, db_name="default_db"):
|
|
132 |
|
133 |
if documents:
|
134 |
try:
|
135 |
-
db_path = f"faiss_index_{db_name}"
|
136 |
os.makedirs(db_path, exist_ok=True)
|
137 |
|
138 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
@@ -154,7 +161,7 @@ def upload_and_index(files, title, author, db_name="default_db"):
|
|
154 |
return "Nessun documento processato."
|
155 |
|
156 |
def list_indexed_files(db_name="default_db"):
|
157 |
-
db_path = f"faiss_index_{db_name}"
|
158 |
metadata_file = os.path.join(db_path, "metadata.json")
|
159 |
|
160 |
if not os.path.exists(metadata_file):
|
@@ -188,7 +195,7 @@ def delete_file_from_database(file_name, db_name="default_db"):
|
|
188 |
da FAISS. Attualmente, la funzione gestisce un 'file_list.txt',
|
189 |
ma devi adattarla alle tue esigenze di rimozione dei chunk.
|
190 |
"""
|
191 |
-
db_path = f"faiss_index_{db_name}"
|
192 |
file_list_path = os.path.join(db_path, "file_list.txt")
|
193 |
|
194 |
if not os.path.exists(file_list_path):
|
@@ -213,7 +220,7 @@ def delete_file_from_database(file_name, db_name="default_db"):
|
|
213 |
|
214 |
# -------------- DOCUMENT VISUALIZATION TAB FUNCTIONS --------------
|
215 |
def list_indexed_documents(db_name="default_db"):
|
216 |
-
db_path = f"faiss_index_{db_name}"
|
217 |
metadata_file = os.path.join(db_path, "metadata.json")
|
218 |
|
219 |
if not os.path.exists(db_path):
|
@@ -250,7 +257,7 @@ def list_indexed_documents(db_name="default_db"):
|
|
250 |
|
251 |
# -------------- NEW FEATURES TAB FUNCTIONS --------------
|
252 |
def search_documents(query, db_name="default_db"):
|
253 |
-
db_path = f"faiss_index_{db_name}"
|
254 |
if not os.path.exists(db_path):
|
255 |
logging.warning(f"L'indice FAISS per il database '{db_name}' non esiste.")
|
256 |
return "Database non trovato."
|
|
|
10 |
from dataclasses import dataclass
|
11 |
import json
|
12 |
from datetime import datetime
|
13 |
+
from app.functions.database_handling import BASE_DB_PATH # Aggiungi questo import
|
14 |
|
15 |
# Initialize the text splitter
|
16 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
|
17 |
|
18 |
# -------------- UTILITY FUNCTIONS --------------
|
19 |
@dataclass
|
|
|
34 |
}
|
35 |
|
36 |
def save_metadata(metadata_list, db_name):
|
37 |
+
"""Salva i metadati nel percorso corretto."""
|
38 |
+
# Usa il percorso base corretto
|
39 |
+
db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
|
40 |
metadata_file = os.path.join(db_path, "metadata.json")
|
41 |
|
42 |
+
# Crea la directory se non esiste
|
43 |
+
if not os.path.exists(db_path):
|
44 |
+
os.makedirs(db_path)
|
45 |
+
|
46 |
existing_metadata = []
|
47 |
if os.path.exists(metadata_file):
|
48 |
with open(metadata_file, 'r') as f:
|
|
|
70 |
|
71 |
# -------------- CHATBOT TAB FUNCTIONS --------------
|
72 |
def answer_question(question, db_name="default_db"):
|
73 |
+
db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
|
74 |
if not os.path.exists(db_path):
|
75 |
logging.warning(f"L'indice FAISS per il database {db_name} non esiste.")
|
76 |
return "Database non trovato."
|
|
|
139 |
|
140 |
if documents:
|
141 |
try:
|
142 |
+
db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Modifica qui
|
143 |
os.makedirs(db_path, exist_ok=True)
|
144 |
|
145 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
|
|
161 |
return "Nessun documento processato."
|
162 |
|
163 |
def list_indexed_files(db_name="default_db"):
|
164 |
+
db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Modifica qui
|
165 |
metadata_file = os.path.join(db_path, "metadata.json")
|
166 |
|
167 |
if not os.path.exists(metadata_file):
|
|
|
195 |
da FAISS. Attualmente, la funzione gestisce un 'file_list.txt',
|
196 |
ma devi adattarla alle tue esigenze di rimozione dei chunk.
|
197 |
"""
|
198 |
+
db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Modifica qui
|
199 |
file_list_path = os.path.join(db_path, "file_list.txt")
|
200 |
|
201 |
if not os.path.exists(file_list_path):
|
|
|
220 |
|
221 |
# -------------- DOCUMENT VISUALIZATION TAB FUNCTIONS --------------
|
222 |
def list_indexed_documents(db_name="default_db"):
|
223 |
+
db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Modifica qui
|
224 |
metadata_file = os.path.join(db_path, "metadata.json")
|
225 |
|
226 |
if not os.path.exists(db_path):
|
|
|
257 |
|
258 |
# -------------- NEW FEATURES TAB FUNCTIONS --------------
|
259 |
def search_documents(query, db_name="default_db"):
|
260 |
+
db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Modifica qui
|
261 |
if not os.path.exists(db_path):
|
262 |
logging.warning(f"L'indice FAISS per il database '{db_name}' non esiste.")
|
263 |
return "Database non trovato."
|
app/functions/database_handling.py
CHANGED
@@ -2,10 +2,17 @@ import logging
|
|
2 |
import os
|
3 |
import shutil
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
# -------------- DATABASE MANAGEMENT TAB FUNCTIONS --------------
|
6 |
def create_database(db_name):
|
7 |
logging.info(f"Creating database: {db_name}")
|
8 |
-
db_path = f"faiss_index_{db_name}"
|
9 |
|
10 |
if os.path.exists(db_path):
|
11 |
return f"Il database '{db_name}' esiste giΓ ."
|
@@ -19,7 +26,7 @@ def create_database(db_name):
|
|
19 |
return f"Errore nella creazione del database: {e}"
|
20 |
|
21 |
def delete_database(db_name):
|
22 |
-
db_path = f"faiss_index_{db_name}"
|
23 |
if not os.path.exists(db_path):
|
24 |
return f"Il database '{db_name}' non esiste."
|
25 |
try:
|
@@ -31,8 +38,8 @@ def delete_database(db_name):
|
|
31 |
return f"Impossibile eliminare il database '{db_name}': {e}"
|
32 |
|
33 |
def modify_database(old_db_name, new_db_name):
|
34 |
-
old_db_path = f"faiss_index_{old_db_name}"
|
35 |
-
new_db_path = f"faiss_index_{new_db_name}"
|
36 |
if not os.path.exists(old_db_path):
|
37 |
return f"Il database '{old_db_name}' non esiste."
|
38 |
if os.path.exists(new_db_path):
|
@@ -46,8 +53,8 @@ def modify_database(old_db_name, new_db_name):
|
|
46 |
def list_databases():
|
47 |
try:
|
48 |
databases = []
|
49 |
-
for item in os.listdir():
|
50 |
-
if os.path.isdir(item) and item.startswith("faiss_index_"):
|
51 |
db_name = item.replace("faiss_index_", "")
|
52 |
databases.append(db_name)
|
53 |
# Ensure "default_db" is in the list
|
|
|
2 |
import os
|
3 |
import shutil
|
4 |
|
5 |
+
# Definisci il percorso base per i database
|
6 |
+
BASE_DB_PATH = "db"
|
7 |
+
|
8 |
+
# Crea la cartella db se non esiste
|
9 |
+
if not os.path.exists(BASE_DB_PATH):
|
10 |
+
os.makedirs(BASE_DB_PATH)
|
11 |
+
|
12 |
# -------------- DATABASE MANAGEMENT TAB FUNCTIONS --------------
|
13 |
def create_database(db_name):
|
14 |
logging.info(f"Creating database: {db_name}")
|
15 |
+
db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
|
16 |
|
17 |
if os.path.exists(db_path):
|
18 |
return f"Il database '{db_name}' esiste giΓ ."
|
|
|
26 |
return f"Errore nella creazione del database: {e}"
|
27 |
|
28 |
def delete_database(db_name):
|
29 |
+
db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
|
30 |
if not os.path.exists(db_path):
|
31 |
return f"Il database '{db_name}' non esiste."
|
32 |
try:
|
|
|
38 |
return f"Impossibile eliminare il database '{db_name}': {e}"
|
39 |
|
40 |
def modify_database(old_db_name, new_db_name):
|
41 |
+
old_db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{old_db_name}")
|
42 |
+
new_db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{new_db_name}")
|
43 |
if not os.path.exists(old_db_path):
|
44 |
return f"Il database '{old_db_name}' non esiste."
|
45 |
if os.path.exists(new_db_path):
|
|
|
53 |
def list_databases():
|
54 |
try:
|
55 |
databases = []
|
56 |
+
for item in os.listdir(BASE_DB_PATH):
|
57 |
+
if os.path.isdir(os.path.join(BASE_DB_PATH, item)) and item.startswith("faiss_index_"):
|
58 |
db_name = item.replace("faiss_index_", "")
|
59 |
databases.append(db_name)
|
60 |
# Ensure "default_db" is in the list
|
app/llm_handling.py
CHANGED
@@ -8,9 +8,7 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
8 |
import gradio as gr
|
9 |
|
10 |
from app.config import OPENAI_API_KEY
|
11 |
-
|
12 |
-
# importale dal modulo corretto:
|
13 |
-
# from app.document_handling import list_databases, ensure_default_db
|
14 |
|
15 |
logging.basicConfig(level=logging.INFO)
|
16 |
|
@@ -33,13 +31,14 @@ def answer_question(question, db_name, chat_history=None):
|
|
33 |
|
34 |
try:
|
35 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
36 |
-
db_path = f"faiss_index_{db_name}"
|
37 |
|
|
|
38 |
if not os.path.exists(db_path):
|
39 |
-
logging.warning(f"Database {db_name} non trovato
|
40 |
return [
|
41 |
{"role": "user", "content": question},
|
42 |
-
{"role": "assistant", "content": "Database non trovato"}
|
43 |
]
|
44 |
|
45 |
# Carica l'indice FAISS
|
@@ -92,7 +91,7 @@ def delete_database(db_name):
|
|
92 |
Cancella il database FAISS corrispondente a 'db_name'.
|
93 |
Restituisce un messaggio di stato e l'aggiornamento del dropdown in Gradio.
|
94 |
"""
|
95 |
-
db_path = f"faiss_index_{db_name}"
|
96 |
if not os.path.exists(db_path):
|
97 |
return f"Il database {db_name} non esiste.", gr.Dropdown.update(choices=[])
|
98 |
try:
|
|
|
8 |
import gradio as gr
|
9 |
|
10 |
from app.config import OPENAI_API_KEY
|
11 |
+
from app.functions.database_handling import BASE_DB_PATH # Aggiungi questo import
|
|
|
|
|
12 |
|
13 |
logging.basicConfig(level=logging.INFO)
|
14 |
|
|
|
31 |
|
32 |
try:
|
33 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
34 |
+
db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Percorso corretto
|
35 |
|
36 |
+
logging.info(f"Verifico esistenza database in: {db_path}")
|
37 |
if not os.path.exists(db_path):
|
38 |
+
logging.warning(f"Database {db_name} non trovato in {db_path}")
|
39 |
return [
|
40 |
{"role": "user", "content": question},
|
41 |
+
{"role": "assistant", "content": f"Database non trovato in {db_path}"}
|
42 |
]
|
43 |
|
44 |
# Carica l'indice FAISS
|
|
|
91 |
Cancella il database FAISS corrispondente a 'db_name'.
|
92 |
Restituisce un messaggio di stato e l'aggiornamento del dropdown in Gradio.
|
93 |
"""
|
94 |
+
db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Percorso corretto
|
95 |
if not os.path.exists(db_path):
|
96 |
return f"Il database {db_name} non esiste.", gr.Dropdown.update(choices=[])
|
97 |
try:
|
{faiss_index β db/faiss_index}/index.faiss
RENAMED
File without changes
|
{faiss_index β db/faiss_index}/index.pkl
RENAMED
File without changes
|
db/faiss_index_Daniele2/index.faiss
ADDED
Binary file (3.12 kB). View file
|
|
{faiss_index_E-learning β db/faiss_index_Daniele2}/index.pkl
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e79bcca55b5153ea71218a3d2204c01ec1eccf59162fd4547d19956a4750d04e
|
3 |
+
size 2958
|
db/faiss_index_Daniele2/metadata.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"filename": "istruzioni obiettivi di apprendimento.pdf",
|
4 |
+
"title": "Obiettivi di apprendimento",
|
5 |
+
"author": "Daniele",
|
6 |
+
"upload_date": "2025-01-02 15:14:19",
|
7 |
+
"chunks": 2
|
8 |
+
}
|
9 |
+
]
|
{faiss_index_default_db β db/faiss_index_default_db}/index.faiss
RENAMED
File without changes
|
{faiss_index_default_db β db/faiss_index_default_db}/index.pkl
RENAMED
File without changes
|
faiss_index_E-learning/index.faiss
DELETED
Binary file (66.1 kB)
|
|
faiss_index_E-learning/metadata.json
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
[
|
2 |
-
{
|
3 |
-
"filename": "istruzioni obiettivi di apprendimento.pdf",
|
4 |
-
"title": "Obiettivi",
|
5 |
-
"author": "Daniele",
|
6 |
-
"upload_date": "2024-12-31 19:21:10",
|
7 |
-
"chunks": 6
|
8 |
-
},
|
9 |
-
{
|
10 |
-
"filename": "mastery_Bloom.pdf",
|
11 |
-
"title": "Mastery Learingi",
|
12 |
-
"author": "Bloom",
|
13 |
-
"upload_date": "2024-12-31 20:25:00",
|
14 |
-
"chunks": 43
|
15 |
-
}
|
16 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|