Nugh75 commited on
Commit
2a36d42
·
1 Parent(s): 754b268

update llm handling

Browse files
Files changed (1) hide show
  1. app/llm_handling.py +17 -19
app/llm_handling.py CHANGED
@@ -12,12 +12,13 @@ import edge_tts
12
  from pathlib import Path
13
  import requests
14
  from tenacity import retry, stop_after_attempt, wait_exponential
15
- import json
16
 
17
  from app.config import OPENAI_API_KEY
18
  from app.functions.database_handling import BASE_DB_PATH # Aggiungi questo import
19
  from app.configs.prompts import SYSTEM_PROMPTS
20
 
 
 
21
  logging.basicConfig(level=logging.INFO)
22
  local_ip="192.168.82.5:1234"
23
 
@@ -87,7 +88,7 @@ def clean_markdown(text):
87
  text = re.sub(r'```[\s\S]*?```', '', text) # blocchi codice
88
  text = re.sub(r'`.*?`', '', text) # codice inline
89
  text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # link
90
- text = re.sub(r'\*\*(.*?)\*\*\*', r'\1', text) # bold
91
  text = re.sub(r'\*(.*?)\*', r'\1', text) # italic
92
  return text.strip()
93
 
@@ -142,6 +143,13 @@ def test_local_connection():
142
  except:
143
  return False
144
 
 
 
 
 
 
 
 
145
  @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
146
  def answer_question(question, db_name, prompt_type="tutor", chat_history=None, llm_type=LLMType.OPENAI_GPT_4O_MINI):
147
  """
@@ -178,31 +186,21 @@ def answer_question(question, db_name, prompt_type="tutor", chat_history=None, l
178
  # Cerca i documenti (chunk) più simili
179
  relevant_docs = vectorstore.similarity_search(question, k=5)
180
 
181
- # Leggi il file metadata.json
182
- metadata_file = os.path.join(db_path, "metadata.json")
183
- metadata_dict = {}
184
- if os.path.exists(metadata_file):
185
- with open(metadata_file, 'r') as f:
186
- metadata_list = json.load(f)
187
- # Crea un dizionario per lookup veloce usando il filename come chiave
188
- metadata_dict = {m["filename"]: m for m in metadata_list}
189
 
190
- # Logga i chunk recuperati con i loro metadata
191
  for idx, doc in enumerate(relevant_docs):
192
  logging.info(f"--- Chunk {idx+1} ---")
193
- # Recupera i metadata dal documento
194
  source_file = doc.metadata.get("source", "Unknown")
195
- chunk_info = f"File: {source_file}"
196
 
197
- # Aggiungi informazioni dal metadata.json se disponibili
198
  if source_file in metadata_dict:
199
  file_metadata = metadata_dict[source_file]
200
- chunk_info += f"\nTitolo: {file_metadata['title']}"
201
- chunk_info += f"\nAutore: {file_metadata['author']}"
202
- chunk_info += f"\nData caricamento: {file_metadata['upload_date']}"
203
 
204
- logging.info(chunk_info)
205
- logging.info("Contenuto:")
206
  logging.info(doc.page_content)
207
  logging.info("---------------------")
208
 
 
12
  from pathlib import Path
13
  import requests
14
  from tenacity import retry, stop_after_attempt, wait_exponential
 
15
 
16
  from app.config import OPENAI_API_KEY
17
  from app.functions.database_handling import BASE_DB_PATH # Aggiungi questo import
18
  from app.configs.prompts import SYSTEM_PROMPTS
19
 
20
+ import json # Prima importa json se non è già importato
21
+
22
  logging.basicConfig(level=logging.INFO)
23
  local_ip="192.168.82.5:1234"
24
 
 
88
  text = re.sub(r'```[\s\S]*?```', '', text) # blocchi codice
89
  text = re.sub(r'`.*?`', '', text) # codice inline
90
  text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # link
91
+ text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) # bold
92
  text = re.sub(r'\*(.*?)\*', r'\1', text) # italic
93
  return text.strip()
94
 
 
143
  except:
144
  return False
145
 
146
+ def read_metadata(db_path):
147
+ metadata_file = os.path.join(db_path, "metadata.json")
148
+ if os.path.exists(metadata_file):
149
+ with open(metadata_file, 'r') as f:
150
+ return json.load(f)
151
+ return []
152
+
153
  @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
154
  def answer_question(question, db_name, prompt_type="tutor", chat_history=None, llm_type=LLMType.OPENAI_GPT_4O_MINI):
155
  """
 
186
  # Cerca i documenti (chunk) più simili
187
  relevant_docs = vectorstore.similarity_search(question, k=5)
188
 
189
+ metadata_list = read_metadata(db_path)
190
+ metadata_dict = {m["filename"]: m for m in metadata_list}
 
 
 
 
 
 
191
 
192
+ # Logga i chunk recuperati con metadata
193
  for idx, doc in enumerate(relevant_docs):
194
  logging.info(f"--- Chunk {idx+1} ---")
 
195
  source_file = doc.metadata.get("source", "Unknown")
 
196
 
197
+ # Recupera i metadata dal file json
198
  if source_file in metadata_dict:
199
  file_metadata = metadata_dict[source_file]
200
+ logging.info(f"📚 Titolo: {file_metadata['title']}")
201
+ logging.info(f"✍️ Autore: {file_metadata['author']}")
 
202
 
203
+ logging.info(f"📄 Contenuto:")
 
204
  logging.info(doc.page_content)
205
  logging.info("---------------------")
206