Spaces:

la04
/

RAG_test_1

Sleeping

App Files Files Community

la04 commited on Jan 10

Commit

bf5014c

verified ·

1 Parent(s): 0dda7f4

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -9

app.py CHANGED Viewed

@@ -1,29 +1,34 @@
 import gradio as gr
 import os
-from langchain.vectorstores.faiss import FAISS  # Direktimport
-from langchain.document_loaders import PyPDFLoader
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.chains import ConversationalRetrievalChain
-from langchain.memory import ConversationBufferMemory
-from langchain.llms import HuggingFaceHub
 list_llm = ["google/flan-t5-small", "distilbert-base-uncased"]
 list_llm_simple = [os.path.basename(llm) for llm in list_llm]
 def load_doc(list_file_path):
     loaders = [PyPDFLoader(x) for x in list_file_path]
     pages = []
     for loader in loaders:
-        pages.extend(loader.load())
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=32)
     doc_splits = text_splitter.split_documents(pages)
     return doc_splits
 def create_db(splits):
-    embeddings = HuggingFaceEmbeddings()
     vectordb = FAISS.from_documents(splits, embeddings)
     return vectordb
 def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
     llm = HuggingFaceHub(
         repo_id=llm_model,
@@ -45,17 +50,20 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
     )
     return qa_chain
 def initialize_database(list_file_obj):
     list_file_path = [x.name for x in list_file_obj if x is not None]
     doc_splits = load_doc(list_file_path)
     vector_db = create_db(doc_splits)
     return vector_db, "Datenbank erfolgreich erstellt!"
 def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db):
     llm_name = list_llm[llm_option]
     qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, vector_db)
     return qa_chain, "LLM erfolgreich initialisiert! Chatbot ist bereit."
 def format_chat_history(message, chat_history):
     formatted_chat_history = []
     for user_message, bot_message in chat_history:
@@ -63,6 +71,7 @@ def format_chat_history(message, chat_history):
         formatted_chat_history.append(f"Assistant: {bot_message}")
     return formatted_chat_history
 def conversation(qa_chain, message, history):
     formatted_chat_history = format_chat_history(message, history)
     response = qa_chain({"question": message, "chat_history": formatted_chat_history})
@@ -70,6 +79,7 @@ def conversation(qa_chain, message, history):
     new_history = history + [(message, response_answer)]
     return qa_chain, gr.update(value=""), new_history
 def demo():
     with gr.Blocks() as demo:
         vector_db = gr.State()

 import gradio as gr
 import os
+from langchain.vectorstores import FAISS  # Import für Vektordatenbank FAISS
+from langchain.document_loaders import PyPDFLoader  # PDF-Loader zum Laden der Dokumente
+from langchain.embeddings import HuggingFaceEmbeddings  # Embeddings-Erstellung mit Hugging Face-Modellen
+from langchain.chains import ConversationalRetrievalChain  # Chain für QA-Funktionalität
+from langchain.memory import ConversationBufferMemory  # Speichern des Chat-Verlaufs im Speicher
+from langchain.llms import HuggingFaceHub  # Für das Laden der Modelle von Hugging Face Hub
+from langchain.text_splitter import RecursiveCharacterTextSplitter  # Aufteilen von Dokumenten in Chunks
+# Liste der LLM-Modelle (leichte CPU-freundliche Modelle)
 list_llm = ["google/flan-t5-small", "distilbert-base-uncased"]
 list_llm_simple = [os.path.basename(llm) for llm in list_llm]
+# PDF-Dokument laden und in Chunks aufteilen
 def load_doc(list_file_path):
     loaders = [PyPDFLoader(x) for x in list_file_path]
     pages = []
     for loader in loaders:
+        pages.extend(loader.load())  # Laden der Seiten aus PDF
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=32)  # Chunks für CPU
     doc_splits = text_splitter.split_documents(pages)
     return doc_splits
+# Vektordatenbank erstellen
 def create_db(splits):
+    embeddings = HuggingFaceEmbeddings()  # Erstellen der Embeddings mit Hugging Face
     vectordb = FAISS.from_documents(splits, embeddings)
     return vectordb
+# Initialisierung des ConversationalRetrievalChain
 def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
     llm = HuggingFaceHub(
         repo_id=llm_model,
     )
     return qa_chain
+# Initialisierung der Datenbank
 def initialize_database(list_file_obj):
     list_file_path = [x.name for x in list_file_obj if x is not None]
     doc_splits = load_doc(list_file_path)
     vector_db = create_db(doc_splits)
     return vector_db, "Datenbank erfolgreich erstellt!"
+# Initialisierung des LLM
 def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db):
     llm_name = list_llm[llm_option]
     qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, vector_db)
     return qa_chain, "LLM erfolgreich initialisiert! Chatbot ist bereit."
+# Chat-Historie formatieren
 def format_chat_history(message, chat_history):
     formatted_chat_history = []
     for user_message, bot_message in chat_history:
         formatted_chat_history.append(f"Assistant: {bot_message}")
     return formatted_chat_history
+# Konversationsfunktion
 def conversation(qa_chain, message, history):
     formatted_chat_history = format_chat_history(message, history)
     response = qa_chain({"question": message, "chat_history": formatted_chat_history})
     new_history = history + [(message, response_answer)]
     return qa_chain, gr.update(value=""), new_history
+# Gradio-Frontend
 def demo():
     with gr.Blocks() as demo:
         vector_db = gr.State()