Spaces:

la04
/

RAG_test_1

Sleeping

App Files Files Community

la04 commited on Jan 10

Commit

62d5470

verified ·

1 Parent(s): a344264

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -2

app.py CHANGED Viewed

@@ -11,6 +11,8 @@ from transformers import pipeline
 EMBEDDINGS_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 LLM_MODEL_NAME = "google/flan-t5-small"
 # **Dokumente laden und aufteilen**
 def load_and_split_docs(list_file_path):
     if not list_file_path:
@@ -63,7 +65,7 @@ def initialize_llm_chain(temperature, max_tokens, vector_db):
     )
     print(f"Modell {LLM_MODEL_NAME} erfolgreich geladen.")
     llm = HuggingFacePipeline(pipeline=local_pipeline)
-    memory = ConversationBufferMemory(memory_key="chat_history")
     retriever = vector_db.as_retriever()
     return ConversationalRetrievalChain.from_llm(
         llm,
@@ -73,6 +75,18 @@ def initialize_llm_chain(temperature, max_tokens, vector_db):
     )
 # **Konversation mit QA-Kette führen**
 def conversation(qa_chain, message, history):
     if qa_chain is None:
         return None, [{"role": "system", "content": "Der QA-Chain wurde nicht initialisiert!"}], history
@@ -80,7 +94,7 @@ def conversation(qa_chain, message, history):
         return qa_chain, [{"role": "system", "content": "Bitte eine Frage eingeben!"}], history
     try:
         print(f"Frage: {message}")
-        history = history[-5:]  # Beschränke den Verlauf auf die letzten 5 Nachrichten
         response = qa_chain.invoke({"question": message, "chat_history": history})
         response_text = response["answer"]
         sources = [doc.metadata["source"] for doc in response["source_documents"]]

 EMBEDDINGS_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 LLM_MODEL_NAME = "google/flan-t5-small"
+MAX_INPUT_LENGTH = 512  # Maximale Länge der Eingabe für das Modell
 # **Dokumente laden und aufteilen**
 def load_and_split_docs(list_file_path):
     if not list_file_path:
     )
     print(f"Modell {LLM_MODEL_NAME} erfolgreich geladen.")
     llm = HuggingFacePipeline(pipeline=local_pipeline)
+    memory = ConversationBufferMemory(memory_key="chat_history", output_key="answer")  # Speichere nur die Antwort
     retriever = vector_db.as_retriever()
     return ConversationalRetrievalChain.from_llm(
         llm,
     )
 # **Konversation mit QA-Kette führen**
+def truncate_history(history, max_length=MAX_INPUT_LENGTH):
+    total_length = 0
+    truncated_history = []
+    for message in reversed(history):
+        total_length += len(message[0]) + len(message[1])
+        if total_length > max_length:
+            break
+        truncated_history.insert(0, message)
+    return truncated_history
 def conversation(qa_chain, message, history):
     if qa_chain is None:
         return None, [{"role": "system", "content": "Der QA-Chain wurde nicht initialisiert!"}], history
         return qa_chain, [{"role": "system", "content": "Bitte eine Frage eingeben!"}], history
     try:
         print(f"Frage: {message}")
+        history = truncate_history(history)  # Beschränke den Verlauf auf unter 512 Tokens
         response = qa_chain.invoke({"question": message, "chat_history": history})
         response_text = response["answer"]
         sources = [doc.metadata["source"] for doc in response["source_documents"]]