la04 commited on
Commit
05103a4
·
verified ·
1 Parent(s): cb92135

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -10,7 +10,7 @@ import logging
10
  logging.basicConfig(level=logging.INFO)
11
 
12
  # Modelle laden
13
- model = SentenceTransformer('all-mpnet-base-v2')
14
  qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
15
 
16
  # FAISS-Index erstellen
@@ -34,15 +34,19 @@ def extract_text_from_pdf(pdf_path):
34
  for page_num in range(len(doc)):
35
  page = doc.load_page(page_num)
36
  text = page.get_text("text")
 
 
37
  chunks = split_text_into_chunks(text)
38
  text_chunks.extend(chunks)
39
  return text_chunks
40
 
41
  # Kontexte nach Relevanz bewerten
42
  def rank_contexts_by_relevance(query, contexts):
43
- scores = model.encode([query]) @ model.encode(contexts).T
44
- ranked_contexts = sorted(zip(scores[0], contexts), key=lambda x: x[0], reverse=True)
45
- return [context for _, context in ranked_contexts[:5]] # Nur die Top 5 Kontexte
 
 
46
 
47
  # Suche und Bewertung
48
  def search_and_rank_answers(query, index, documents, k=10):
@@ -65,7 +69,8 @@ def search_and_rank_answers(query, index, documents, k=10):
65
 
66
  # Antworten kombinieren
67
  def combine_answers(answers):
68
- return " ".join(answers[:3]) # Kombiniere die Top 3 Antworten
 
69
 
70
  # Gesamtprozess
71
  def chatbot_response(pdf_path, question):
 
10
  logging.basicConfig(level=logging.INFO)
11
 
12
  # Modelle laden
13
+ model = SentenceTransformer('all-MiniLM-L6-v2')
14
  qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
15
 
16
  # FAISS-Index erstellen
 
34
  for page_num in range(len(doc)):
35
  page = doc.load_page(page_num)
36
  text = page.get_text("text")
37
+ if not text.strip(): # Überprüfen, ob der Text leer ist
38
+ logging.warning(f"Leerer Text auf Seite {page_num}")
39
  chunks = split_text_into_chunks(text)
40
  text_chunks.extend(chunks)
41
  return text_chunks
42
 
43
  # Kontexte nach Relevanz bewerten
44
  def rank_contexts_by_relevance(query, contexts):
45
+ query_embedding = model.encode([query])[0].astype('float32')
46
+ context_embeddings = model.encode(contexts)
47
+ scores = np.dot(query_embedding, context_embeddings.T) # Dot-Produkt zur Berechnung der Relevanz
48
+ ranked_contexts = sorted(zip(scores, contexts), key=lambda x: x[0], reverse=True)
49
+ return [context for _, context in ranked_contexts[:5]] # Nur die Top 5 Kontexte zurückgeben
50
 
51
  # Suche und Bewertung
52
  def search_and_rank_answers(query, index, documents, k=10):
 
69
 
70
  # Antworten kombinieren
71
  def combine_answers(answers):
72
+ # Kombiniert die Top 3 Antworten zu einer einzigen Antwort
73
+ return " ".join(answers[:3])
74
 
75
  # Gesamtprozess
76
  def chatbot_response(pdf_path, question):