la04 commited on
Commit
bf5014c
·
verified ·
1 Parent(s): 0dda7f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -9
app.py CHANGED
@@ -1,29 +1,34 @@
1
  import gradio as gr
2
  import os
3
- from langchain.vectorstores.faiss import FAISS # Direktimport
4
- from langchain.document_loaders import PyPDFLoader
5
- from langchain.embeddings import HuggingFaceEmbeddings
6
- from langchain.chains import ConversationalRetrievalChain
7
- from langchain.memory import ConversationBufferMemory
8
- from langchain.llms import HuggingFaceHub
 
9
 
 
10
  list_llm = ["google/flan-t5-small", "distilbert-base-uncased"]
11
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
12
 
 
13
  def load_doc(list_file_path):
14
  loaders = [PyPDFLoader(x) for x in list_file_path]
15
  pages = []
16
  for loader in loaders:
17
- pages.extend(loader.load())
18
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=32)
19
  doc_splits = text_splitter.split_documents(pages)
20
  return doc_splits
21
 
 
22
  def create_db(splits):
23
- embeddings = HuggingFaceEmbeddings()
24
  vectordb = FAISS.from_documents(splits, embeddings)
25
  return vectordb
26
 
 
27
  def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
28
  llm = HuggingFaceHub(
29
  repo_id=llm_model,
@@ -45,17 +50,20 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
45
  )
46
  return qa_chain
47
 
 
48
  def initialize_database(list_file_obj):
49
  list_file_path = [x.name for x in list_file_obj if x is not None]
50
  doc_splits = load_doc(list_file_path)
51
  vector_db = create_db(doc_splits)
52
  return vector_db, "Datenbank erfolgreich erstellt!"
53
 
 
54
  def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db):
55
  llm_name = list_llm[llm_option]
56
  qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, vector_db)
57
  return qa_chain, "LLM erfolgreich initialisiert! Chatbot ist bereit."
58
 
 
59
  def format_chat_history(message, chat_history):
60
  formatted_chat_history = []
61
  for user_message, bot_message in chat_history:
@@ -63,6 +71,7 @@ def format_chat_history(message, chat_history):
63
  formatted_chat_history.append(f"Assistant: {bot_message}")
64
  return formatted_chat_history
65
 
 
66
  def conversation(qa_chain, message, history):
67
  formatted_chat_history = format_chat_history(message, history)
68
  response = qa_chain({"question": message, "chat_history": formatted_chat_history})
@@ -70,6 +79,7 @@ def conversation(qa_chain, message, history):
70
  new_history = history + [(message, response_answer)]
71
  return qa_chain, gr.update(value=""), new_history
72
 
 
73
  def demo():
74
  with gr.Blocks() as demo:
75
  vector_db = gr.State()
 
1
  import gradio as gr
2
  import os
3
+ from langchain.vectorstores import FAISS # Import für Vektordatenbank FAISS
4
+ from langchain.document_loaders import PyPDFLoader # PDF-Loader zum Laden der Dokumente
5
+ from langchain.embeddings import HuggingFaceEmbeddings # Embeddings-Erstellung mit Hugging Face-Modellen
6
+ from langchain.chains import ConversationalRetrievalChain # Chain für QA-Funktionalität
7
+ from langchain.memory import ConversationBufferMemory # Speichern des Chat-Verlaufs im Speicher
8
+ from langchain.llms import HuggingFaceHub # Für das Laden der Modelle von Hugging Face Hub
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter # Aufteilen von Dokumenten in Chunks
10
 
11
+ # Liste der LLM-Modelle (leichte CPU-freundliche Modelle)
12
  list_llm = ["google/flan-t5-small", "distilbert-base-uncased"]
13
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
14
 
15
+ # PDF-Dokument laden und in Chunks aufteilen
16
  def load_doc(list_file_path):
17
  loaders = [PyPDFLoader(x) for x in list_file_path]
18
  pages = []
19
  for loader in loaders:
20
+ pages.extend(loader.load()) # Laden der Seiten aus PDF
21
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=32) # Chunks für CPU
22
  doc_splits = text_splitter.split_documents(pages)
23
  return doc_splits
24
 
25
+ # Vektordatenbank erstellen
26
  def create_db(splits):
27
+ embeddings = HuggingFaceEmbeddings() # Erstellen der Embeddings mit Hugging Face
28
  vectordb = FAISS.from_documents(splits, embeddings)
29
  return vectordb
30
 
31
+ # Initialisierung des ConversationalRetrievalChain
32
  def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
33
  llm = HuggingFaceHub(
34
  repo_id=llm_model,
 
50
  )
51
  return qa_chain
52
 
53
+ # Initialisierung der Datenbank
54
  def initialize_database(list_file_obj):
55
  list_file_path = [x.name for x in list_file_obj if x is not None]
56
  doc_splits = load_doc(list_file_path)
57
  vector_db = create_db(doc_splits)
58
  return vector_db, "Datenbank erfolgreich erstellt!"
59
 
60
+ # Initialisierung des LLM
61
  def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db):
62
  llm_name = list_llm[llm_option]
63
  qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, vector_db)
64
  return qa_chain, "LLM erfolgreich initialisiert! Chatbot ist bereit."
65
 
66
+ # Chat-Historie formatieren
67
  def format_chat_history(message, chat_history):
68
  formatted_chat_history = []
69
  for user_message, bot_message in chat_history:
 
71
  formatted_chat_history.append(f"Assistant: {bot_message}")
72
  return formatted_chat_history
73
 
74
+ # Konversationsfunktion
75
  def conversation(qa_chain, message, history):
76
  formatted_chat_history = format_chat_history(message, history)
77
  response = qa_chain({"question": message, "chat_history": formatted_chat_history})
 
79
  new_history = history + [(message, response_answer)]
80
  return qa_chain, gr.update(value=""), new_history
81
 
82
+ # Gradio-Frontend
83
  def demo():
84
  with gr.Blocks() as demo:
85
  vector_db = gr.State()