Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ import logging
|
|
10 |
logging.basicConfig(level=logging.INFO)
|
11 |
|
12 |
# Modelle laden
|
13 |
-
model = SentenceTransformer('all-
|
14 |
qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
|
15 |
|
16 |
# FAISS-Index erstellen
|
@@ -34,15 +34,19 @@ def extract_text_from_pdf(pdf_path):
|
|
34 |
for page_num in range(len(doc)):
|
35 |
page = doc.load_page(page_num)
|
36 |
text = page.get_text("text")
|
|
|
|
|
37 |
chunks = split_text_into_chunks(text)
|
38 |
text_chunks.extend(chunks)
|
39 |
return text_chunks
|
40 |
|
41 |
# Kontexte nach Relevanz bewerten
|
42 |
def rank_contexts_by_relevance(query, contexts):
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
46 |
|
47 |
# Suche und Bewertung
|
48 |
def search_and_rank_answers(query, index, documents, k=10):
|
@@ -65,7 +69,8 @@ def search_and_rank_answers(query, index, documents, k=10):
|
|
65 |
|
66 |
# Antworten kombinieren
|
67 |
def combine_answers(answers):
|
68 |
-
|
|
|
69 |
|
70 |
# Gesamtprozess
|
71 |
def chatbot_response(pdf_path, question):
|
|
|
10 |
logging.basicConfig(level=logging.INFO)
|
11 |
|
12 |
# Modelle laden
|
13 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
14 |
qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
|
15 |
|
16 |
# FAISS-Index erstellen
|
|
|
34 |
for page_num in range(len(doc)):
|
35 |
page = doc.load_page(page_num)
|
36 |
text = page.get_text("text")
|
37 |
+
if not text.strip(): # Überprüfen, ob der Text leer ist
|
38 |
+
logging.warning(f"Leerer Text auf Seite {page_num}")
|
39 |
chunks = split_text_into_chunks(text)
|
40 |
text_chunks.extend(chunks)
|
41 |
return text_chunks
|
42 |
|
43 |
# Kontexte nach Relevanz bewerten
|
44 |
def rank_contexts_by_relevance(query, contexts):
|
45 |
+
query_embedding = model.encode([query])[0].astype('float32')
|
46 |
+
context_embeddings = model.encode(contexts)
|
47 |
+
scores = np.dot(query_embedding, context_embeddings.T) # Dot-Produkt zur Berechnung der Relevanz
|
48 |
+
ranked_contexts = sorted(zip(scores, contexts), key=lambda x: x[0], reverse=True)
|
49 |
+
return [context for _, context in ranked_contexts[:5]] # Nur die Top 5 Kontexte zurückgeben
|
50 |
|
51 |
# Suche und Bewertung
|
52 |
def search_and_rank_answers(query, index, documents, k=10):
|
|
|
69 |
|
70 |
# Antworten kombinieren
|
71 |
def combine_answers(answers):
|
72 |
+
# Kombiniert die Top 3 Antworten zu einer einzigen Antwort
|
73 |
+
return " ".join(answers[:3])
|
74 |
|
75 |
# Gesamtprozess
|
76 |
def chatbot_response(pdf_path, question):
|