Spaces:

Nugh75
/

Edurag_beta

Sleeping

App Files Files Community

Nugh75 commited on Jan 4

Commit

2a36d42

1 Parent(s): 754b268

update llm handling

Browse files

Files changed (1) hide show

app/llm_handling.py +17 -19

app/llm_handling.py CHANGED Viewed

@@ -12,12 +12,13 @@ import edge_tts
 from pathlib import Path
 import requests
 from tenacity import retry, stop_after_attempt, wait_exponential
-import json
 from app.config import OPENAI_API_KEY
 from app.functions.database_handling import BASE_DB_PATH  # Aggiungi questo import
 from app.configs.prompts import SYSTEM_PROMPTS
 logging.basicConfig(level=logging.INFO)
 local_ip="192.168.82.5:1234"
@@ -87,7 +88,7 @@ def clean_markdown(text):
     text = re.sub(r'```[\s\S]*?```', '', text)  # blocchi codice
     text = re.sub(r'`.*?`', '', text)           # codice inline
     text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)  # link
-    text = re.sub(r'\*\*(.*?)\*\*\*', r'\1', text)  # bold
     text = re.sub(r'\*(.*?)\*', r'\1', text)      # italic
     return text.strip()
@@ -142,6 +143,13 @@ def test_local_connection():
     except:
         return False
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
 def answer_question(question, db_name, prompt_type="tutor", chat_history=None, llm_type=LLMType.OPENAI_GPT_4O_MINI):
     """
@@ -178,31 +186,21 @@ def answer_question(question, db_name, prompt_type="tutor", chat_history=None, l
         # Cerca i documenti (chunk) più simili
         relevant_docs = vectorstore.similarity_search(question, k=5)
-        # Leggi il file metadata.json
-        metadata_file = os.path.join(db_path, "metadata.json")
-        metadata_dict = {}
-        if os.path.exists(metadata_file):
-            with open(metadata_file, 'r') as f:
-                metadata_list = json.load(f)
-                # Crea un dizionario per lookup veloce usando il filename come chiave
-                metadata_dict = {m["filename"]: m for m in metadata_list}
-        # Logga i chunk recuperati con i loro metadata
         for idx, doc in enumerate(relevant_docs):
             logging.info(f"--- Chunk {idx+1} ---")
-            # Recupera i metadata dal documento
             source_file = doc.metadata.get("source", "Unknown")
-            chunk_info = f"File: {source_file}"
-            # Aggiungi informazioni dal metadata.json se disponibili
             if source_file in metadata_dict:
                 file_metadata = metadata_dict[source_file]
-                chunk_info += f"\nTitolo: {file_metadata['title']}"
-                chunk_info += f"\nAutore: {file_metadata['author']}"
-                chunk_info += f"\nData caricamento: {file_metadata['upload_date']}"
-            logging.info(chunk_info)
-            logging.info("Contenuto:")
             logging.info(doc.page_content)
             logging.info("---------------------")

 from pathlib import Path
 import requests
 from tenacity import retry, stop_after_attempt, wait_exponential
 from app.config import OPENAI_API_KEY
 from app.functions.database_handling import BASE_DB_PATH  # Aggiungi questo import
 from app.configs.prompts import SYSTEM_PROMPTS
+import json  # Prima importa json se non è già importato
 logging.basicConfig(level=logging.INFO)
 local_ip="192.168.82.5:1234"
     text = re.sub(r'```[\s\S]*?```', '', text)  # blocchi codice
     text = re.sub(r'`.*?`', '', text)           # codice inline
     text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)  # link
+    text = re.sub(r'\*\*(.*?)\*\*', r'\1', text)  # bold
     text = re.sub(r'\*(.*?)\*', r'\1', text)      # italic
     return text.strip()
     except:
         return False
+def read_metadata(db_path):
+    metadata_file = os.path.join(db_path, "metadata.json")
+    if os.path.exists(metadata_file):
+        with open(metadata_file, 'r') as f:
+            return json.load(f)
+    return []
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
 def answer_question(question, db_name, prompt_type="tutor", chat_history=None, llm_type=LLMType.OPENAI_GPT_4O_MINI):
     """
         # Cerca i documenti (chunk) più simili
         relevant_docs = vectorstore.similarity_search(question, k=5)
+        metadata_list = read_metadata(db_path)
+        metadata_dict = {m["filename"]: m for m in metadata_list}
+        # Logga i chunk recuperati con metadata
         for idx, doc in enumerate(relevant_docs):
             logging.info(f"--- Chunk {idx+1} ---")
             source_file = doc.metadata.get("source", "Unknown")
+            # Recupera i metadata dal file json
             if source_file in metadata_dict:
                 file_metadata = metadata_dict[source_file]
+                logging.info(f"📚 Titolo: {file_metadata['title']}")
+                logging.info(f"✍️ Autore: {file_metadata['author']}")
+            logging.info(f"📄 Contenuto:")
             logging.info(doc.page_content)
             logging.info("---------------------")