la04 commited on
Commit
0dda7f4
·
verified ·
1 Parent(s): 7b3bf1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -20
app.py CHANGED
@@ -1,36 +1,29 @@
1
  import gradio as gr
2
  import os
3
- from langchain.vectorstores import FAISS
4
  from langchain.document_loaders import PyPDFLoader
5
  from langchain.embeddings import HuggingFaceEmbeddings
6
  from langchain.chains import ConversationalRetrievalChain
7
  from langchain.memory import ConversationBufferMemory
8
  from langchain.llms import HuggingFaceHub
9
 
10
- # Liste der Modelle
11
- list_llm = ["google/flan-t5-small", "distilbert-base-uncased"] # Leichte Modelle für CPU
12
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
13
 
14
- # PDF-Dokument laden und aufteilen
15
  def load_doc(list_file_path):
16
  loaders = [PyPDFLoader(x) for x in list_file_path]
17
  pages = []
18
  for loader in loaders:
19
  pages.extend(loader.load())
20
- text_splitter = RecursiveCharacterTextSplitter(
21
- chunk_size=512, # Kleinere Chunks für schnelleres Verarbeiten auf CPU
22
- chunk_overlap=32
23
- )
24
  doc_splits = text_splitter.split_documents(pages)
25
  return doc_splits
26
 
27
- # Erstellen der Vektordatenbank
28
  def create_db(splits):
29
  embeddings = HuggingFaceEmbeddings()
30
  vectordb = FAISS.from_documents(splits, embeddings)
31
  return vectordb
32
 
33
- # Initialisierung des LLM Chains
34
  def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
35
  llm = HuggingFaceHub(
36
  repo_id=llm_model,
@@ -40,12 +33,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
40
  "top_k": top_k,
41
  }
42
  )
43
-
44
- memory = ConversationBufferMemory(
45
- memory_key="chat_history",
46
- return_messages=True
47
- )
48
-
49
  retriever = vector_db.as_retriever()
50
  qa_chain = ConversationalRetrievalChain.from_llm(
51
  llm,
@@ -57,14 +45,12 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
57
  )
58
  return qa_chain
59
 
60
- # Initialisierung der Datenbank
61
  def initialize_database(list_file_obj):
62
  list_file_path = [x.name for x in list_file_obj if x is not None]
63
  doc_splits = load_doc(list_file_path)
64
  vector_db = create_db(doc_splits)
65
  return vector_db, "Datenbank erfolgreich erstellt!"
66
 
67
- # Initialisierung des LLMs
68
  def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db):
69
  llm_name = list_llm[llm_option]
70
  qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, vector_db)
@@ -77,7 +63,6 @@ def format_chat_history(message, chat_history):
77
  formatted_chat_history.append(f"Assistant: {bot_message}")
78
  return formatted_chat_history
79
 
80
- # Chat-Funktion
81
  def conversation(qa_chain, message, history):
82
  formatted_chat_history = format_chat_history(message, history)
83
  response = qa_chain({"question": message, "chat_history": formatted_chat_history})
@@ -85,7 +70,6 @@ def conversation(qa_chain, message, history):
85
  new_history = history + [(message, response_answer)]
86
  return qa_chain, gr.update(value=""), new_history
87
 
88
- # Gradio App erstellen
89
  def demo():
90
  with gr.Blocks() as demo:
91
  vector_db = gr.State()
 
1
  import gradio as gr
2
  import os
3
+ from langchain.vectorstores.faiss import FAISS # Direktimport
4
  from langchain.document_loaders import PyPDFLoader
5
  from langchain.embeddings import HuggingFaceEmbeddings
6
  from langchain.chains import ConversationalRetrievalChain
7
  from langchain.memory import ConversationBufferMemory
8
  from langchain.llms import HuggingFaceHub
9
 
10
+ list_llm = ["google/flan-t5-small", "distilbert-base-uncased"]
 
11
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
12
 
 
13
  def load_doc(list_file_path):
14
  loaders = [PyPDFLoader(x) for x in list_file_path]
15
  pages = []
16
  for loader in loaders:
17
  pages.extend(loader.load())
18
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=32)
 
 
 
19
  doc_splits = text_splitter.split_documents(pages)
20
  return doc_splits
21
 
 
22
  def create_db(splits):
23
  embeddings = HuggingFaceEmbeddings()
24
  vectordb = FAISS.from_documents(splits, embeddings)
25
  return vectordb
26
 
 
27
  def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
28
  llm = HuggingFaceHub(
29
  repo_id=llm_model,
 
33
  "top_k": top_k,
34
  }
35
  )
36
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
 
 
 
 
 
37
  retriever = vector_db.as_retriever()
38
  qa_chain = ConversationalRetrievalChain.from_llm(
39
  llm,
 
45
  )
46
  return qa_chain
47
 
 
48
  def initialize_database(list_file_obj):
49
  list_file_path = [x.name for x in list_file_obj if x is not None]
50
  doc_splits = load_doc(list_file_path)
51
  vector_db = create_db(doc_splits)
52
  return vector_db, "Datenbank erfolgreich erstellt!"
53
 
 
54
  def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db):
55
  llm_name = list_llm[llm_option]
56
  qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, vector_db)
 
63
  formatted_chat_history.append(f"Assistant: {bot_message}")
64
  return formatted_chat_history
65
 
 
66
  def conversation(qa_chain, message, history):
67
  formatted_chat_history = format_chat_history(message, history)
68
  response = qa_chain({"question": message, "chat_history": formatted_chat_history})
 
70
  new_history = history + [(message, response_answer)]
71
  return qa_chain, gr.update(value=""), new_history
72
 
 
73
  def demo():
74
  with gr.Blocks() as demo:
75
  vector_db = gr.State()