Spaces:
Sleeping
Sleeping
update llm handling
Browse files- app/llm_handling.py +17 -19
app/llm_handling.py
CHANGED
@@ -12,12 +12,13 @@ import edge_tts
|
|
12 |
from pathlib import Path
|
13 |
import requests
|
14 |
from tenacity import retry, stop_after_attempt, wait_exponential
|
15 |
-
import json
|
16 |
|
17 |
from app.config import OPENAI_API_KEY
|
18 |
from app.functions.database_handling import BASE_DB_PATH # Aggiungi questo import
|
19 |
from app.configs.prompts import SYSTEM_PROMPTS
|
20 |
|
|
|
|
|
21 |
logging.basicConfig(level=logging.INFO)
|
22 |
local_ip="192.168.82.5:1234"
|
23 |
|
@@ -87,7 +88,7 @@ def clean_markdown(text):
|
|
87 |
text = re.sub(r'```[\s\S]*?```', '', text) # blocchi codice
|
88 |
text = re.sub(r'`.*?`', '', text) # codice inline
|
89 |
text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # link
|
90 |
-
text = re.sub(r'\*\*(.*?)
|
91 |
text = re.sub(r'\*(.*?)\*', r'\1', text) # italic
|
92 |
return text.strip()
|
93 |
|
@@ -142,6 +143,13 @@ def test_local_connection():
|
|
142 |
except:
|
143 |
return False
|
144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
|
146 |
def answer_question(question, db_name, prompt_type="tutor", chat_history=None, llm_type=LLMType.OPENAI_GPT_4O_MINI):
|
147 |
"""
|
@@ -178,31 +186,21 @@ def answer_question(question, db_name, prompt_type="tutor", chat_history=None, l
|
|
178 |
# Cerca i documenti (chunk) più simili
|
179 |
relevant_docs = vectorstore.similarity_search(question, k=5)
|
180 |
|
181 |
-
|
182 |
-
|
183 |
-
metadata_dict = {}
|
184 |
-
if os.path.exists(metadata_file):
|
185 |
-
with open(metadata_file, 'r') as f:
|
186 |
-
metadata_list = json.load(f)
|
187 |
-
# Crea un dizionario per lookup veloce usando il filename come chiave
|
188 |
-
metadata_dict = {m["filename"]: m for m in metadata_list}
|
189 |
|
190 |
-
# Logga i chunk recuperati con
|
191 |
for idx, doc in enumerate(relevant_docs):
|
192 |
logging.info(f"--- Chunk {idx+1} ---")
|
193 |
-
# Recupera i metadata dal documento
|
194 |
source_file = doc.metadata.get("source", "Unknown")
|
195 |
-
chunk_info = f"File: {source_file}"
|
196 |
|
197 |
-
#
|
198 |
if source_file in metadata_dict:
|
199 |
file_metadata = metadata_dict[source_file]
|
200 |
-
|
201 |
-
|
202 |
-
chunk_info += f"\nData caricamento: {file_metadata['upload_date']}"
|
203 |
|
204 |
-
logging.info(
|
205 |
-
logging.info("Contenuto:")
|
206 |
logging.info(doc.page_content)
|
207 |
logging.info("---------------------")
|
208 |
|
|
|
12 |
from pathlib import Path
|
13 |
import requests
|
14 |
from tenacity import retry, stop_after_attempt, wait_exponential
|
|
|
15 |
|
16 |
from app.config import OPENAI_API_KEY
|
17 |
from app.functions.database_handling import BASE_DB_PATH # Aggiungi questo import
|
18 |
from app.configs.prompts import SYSTEM_PROMPTS
|
19 |
|
20 |
+
import json # Prima importa json se non è già importato
|
21 |
+
|
22 |
logging.basicConfig(level=logging.INFO)
|
23 |
local_ip="192.168.82.5:1234"
|
24 |
|
|
|
88 |
text = re.sub(r'```[\s\S]*?```', '', text) # blocchi codice
|
89 |
text = re.sub(r'`.*?`', '', text) # codice inline
|
90 |
text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # link
|
91 |
+
text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) # bold
|
92 |
text = re.sub(r'\*(.*?)\*', r'\1', text) # italic
|
93 |
return text.strip()
|
94 |
|
|
|
143 |
except:
|
144 |
return False
|
145 |
|
146 |
+
def read_metadata(db_path):
|
147 |
+
metadata_file = os.path.join(db_path, "metadata.json")
|
148 |
+
if os.path.exists(metadata_file):
|
149 |
+
with open(metadata_file, 'r') as f:
|
150 |
+
return json.load(f)
|
151 |
+
return []
|
152 |
+
|
153 |
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
|
154 |
def answer_question(question, db_name, prompt_type="tutor", chat_history=None, llm_type=LLMType.OPENAI_GPT_4O_MINI):
|
155 |
"""
|
|
|
186 |
# Cerca i documenti (chunk) più simili
|
187 |
relevant_docs = vectorstore.similarity_search(question, k=5)
|
188 |
|
189 |
+
metadata_list = read_metadata(db_path)
|
190 |
+
metadata_dict = {m["filename"]: m for m in metadata_list}
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
|
192 |
+
# Logga i chunk recuperati con metadata
|
193 |
for idx, doc in enumerate(relevant_docs):
|
194 |
logging.info(f"--- Chunk {idx+1} ---")
|
|
|
195 |
source_file = doc.metadata.get("source", "Unknown")
|
|
|
196 |
|
197 |
+
# Recupera i metadata dal file json
|
198 |
if source_file in metadata_dict:
|
199 |
file_metadata = metadata_dict[source_file]
|
200 |
+
logging.info(f"📚 Titolo: {file_metadata['title']}")
|
201 |
+
logging.info(f"✍️ Autore: {file_metadata['author']}")
|
|
|
202 |
|
203 |
+
logging.info(f"📄 Contenuto:")
|
|
|
204 |
logging.info(doc.page_content)
|
205 |
logging.info("---------------------")
|
206 |
|