HemaMeena commited on
Commit
c793415
·
verified ·
1 Parent(s): 87ea58f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -83
app.py CHANGED
@@ -8,136 +8,116 @@ import time
8
 
9
  import langchain
10
 
11
- ### loaders
12
  from langchain.document_loaders import PyPDFLoader, DirectoryLoader
13
 
14
- ### splits
15
  from langchain.text_splitter import RecursiveCharacterTextSplitter
16
 
17
- ### prompts
18
  from langchain import PromptTemplate, LLMChain
19
 
20
- ### vector stores
21
  from langchain.vectorstores import FAISS
22
 
23
- ### models
24
  from langchain.llms import HuggingFacePipeline
25
  from langchain.embeddings import HuggingFaceInstructEmbeddings
26
 
27
- ### retrievers
28
  from langchain.chains import RetrievalQA
29
 
30
  import torch
31
  import transformers
32
  from transformers import (
33
  AutoTokenizer, AutoModelForCausalLM,
34
- BitsAndBytesConfig,
35
  pipeline
36
  )
37
  import gradio as gr
38
  import locale
39
- import time
40
- from langchain.embeddings.huggingface import HuggingFaceEmbeddings
41
-
42
- from transformers import logging
43
  import shutil
44
 
45
  # Clear transformers cache
46
- logging.set_verbosity_error()
47
  shutil.rmtree('./.cache', ignore_errors=True)
48
 
49
  class CFG:
50
- # LLMs
51
- model_name = 'llama2-13b-chat' # wizardlm, llama2-7b-chat, llama2-13b-chat, mistral-7B
52
  temperature = 0
53
  top_p = 0.95
54
  repetition_penalty = 1.15
55
 
56
- # splitting
57
  split_chunk_size = 800
58
  split_overlap = 0
59
 
60
- # embeddings
61
  embeddings_model_repo = 'sentence-transformers/all-MiniLM-L6-v2'
62
 
63
- # similar passages
64
  k = 6
65
 
66
- # paths
67
  PDFs_path = './'
68
- Embeddings_path = './faiss-hp-sentence-transformers'
69
  Output_folder = './rag-vectordb'
70
-
71
  def get_model(model=CFG.model_name):
72
  print('\nDownloading model: ', model, '\n\n')
73
- model_repo = None
74
-
75
- if model == 'llama2-13b-chat':
76
- model_repo = 'daryl149/llama-2-13b-chat-hf'
77
-
78
- if model_repo:
79
- tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=True)
80
- model = AutoModelForCausalLM.from_pretrained(
81
- model_repo,
82
- device_map="auto",
83
- offload_folder="./offload", # Specify offload folder
84
- trust_remote_code=True
85
- )
86
- max_len = 2048
87
- else:
88
  raise ValueError("Model not implemented: " + model)
89
 
 
 
 
 
 
 
 
 
 
 
90
  return tokenizer, model, max_len
91
 
92
-
93
  def wrap_text_preserve_newlines(text, width=700):
94
- # Split the input text into lines based on newline characters
95
  lines = text.split('\n')
96
-
97
- # Wrap each line individually
98
  wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
99
-
100
- # Join the wrapped lines back together using newline characters
101
- wrapped_text = '\n'.join(wrapped_lines)
102
-
103
- return wrapped_text
104
-
105
 
106
  def process_llm_response(llm_response):
107
  ans = wrap_text_preserve_newlines(llm_response['result'])
108
-
109
  sources_used = ' \n'.join(
110
  [
111
- source.metadata['source'].split('/')[-1][:-4]
112
- + ' - page: '
113
- + str(source.metadata['page'])
114
  for source in llm_response['source_documents']
115
  ]
116
  )
117
 
118
- ans = ans + '\n\nSources: \n' + sources_used
119
- return ans
120
 
121
  def llm_ans(query):
122
  start = time.time()
123
-
124
  llm_response = qa_chain.invoke(query)
125
  ans = process_llm_response(llm_response)
126
-
127
  end = time.time()
128
-
129
- time_elapsed = int(round(end - start, 0))
130
- time_elapsed_str = f'\n\nTime elapsed: {time_elapsed} s'
131
  return ans + time_elapsed_str
132
 
133
  def predict(message, history):
134
  output = str(llm_ans(message)).replace("\n", "<br/>")
135
  return output
136
 
137
-
138
-
139
-
140
- tokenizer, model, max_len = get_model(model = CFG.model_name)
141
 
142
  pipe = pipeline(
143
  task="text-generation",
@@ -150,46 +130,42 @@ pipe = pipeline(
150
  repetition_penalty=CFG.repetition_penalty
151
  )
152
 
153
- ### langchain pipeline
154
- llm = HuggingFacePipeline(pipeline = pipe)
155
 
156
  loader = DirectoryLoader(
157
  CFG.PDFs_path,
158
  glob="./*.pdf",
159
  loader_cls=PyPDFLoader,
160
  show_progress=True,
161
- use_multithreading=True
162
  )
163
 
164
  documents = loader.load()
165
  text_splitter = RecursiveCharacterTextSplitter(
166
- chunk_size = CFG.split_chunk_size,
167
- chunk_overlap = CFG.split_overlap
168
  )
169
 
170
  texts = text_splitter.split_documents(documents)
171
 
172
  vectordb = FAISS.from_documents(
173
  texts,
174
- HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')
175
  )
176
 
177
- ### persist vector database
178
  vectordb.save_local(f"{CFG.Output_folder}/faiss_index_rag")
179
 
180
- retriever = vectordb.as_retriever(search_kwargs = {"k": CFG.k, "search_type" : "similarity"})
181
 
182
  qa_chain = RetrievalQA.from_chain_type(
183
- llm = llm,
184
- chain_type = "stuff", # map_reduce, map_rerank, stuff, refine
185
- retriever = retriever,
186
- chain_type_kwargs = {"prompt": PROMPT},
187
- return_source_documents = True,
188
- verbose = False
189
  )
190
 
191
  prompt_template = """
192
- Don't try to make up an answer, if you don't know just say that you don't know.
193
  Answer in the same language the question was asked.
194
  Use only the following pieces of context to answer the question at the end.
195
 
@@ -198,19 +174,17 @@ Use only the following pieces of context to answer the question at the end.
198
  Question: {question}
199
  Answer:"""
200
 
201
-
202
  PROMPT = PromptTemplate(
203
- template = prompt_template,
204
- input_variables = ["context", "question"]
205
  )
206
 
207
-
208
  locale.getpreferredencoding = lambda: "UTF-8"
209
 
210
  demo = gr.ChatInterface(
211
- predict,
212
- title = f' Open-Source LLM ({CFG.model_name}) Question Answering'
213
- )
214
 
215
  demo.queue()
216
- demo.launch()
 
8
 
9
  import langchain
10
 
11
+ # Loaders
12
  from langchain.document_loaders import PyPDFLoader, DirectoryLoader
13
 
14
+ # Splits
15
  from langchain.text_splitter import RecursiveCharacterTextSplitter
16
 
17
+ # Prompts
18
  from langchain import PromptTemplate, LLMChain
19
 
20
+ # Vector stores
21
  from langchain.vectorstores import FAISS
22
 
23
+ # Models
24
  from langchain.llms import HuggingFacePipeline
25
  from langchain.embeddings import HuggingFaceInstructEmbeddings
26
 
27
+ # Retrievers
28
  from langchain.chains import RetrievalQA
29
 
30
  import torch
31
  import transformers
32
  from transformers import (
33
  AutoTokenizer, AutoModelForCausalLM,
 
34
  pipeline
35
  )
36
  import gradio as gr
37
  import locale
 
 
 
 
38
  import shutil
39
 
40
  # Clear transformers cache
41
+ transformers.logging.set_verbosity_error()
42
  shutil.rmtree('./.cache', ignore_errors=True)
43
 
44
  class CFG:
45
+ # LLMs configuration
46
+ model_name = 'llama2-13b-chat' # Options: wizardlm, llama2-7b-chat, llama2-13b-chat, mistral-7B
47
  temperature = 0
48
  top_p = 0.95
49
  repetition_penalty = 1.15
50
 
51
+ # Text splitting configuration
52
  split_chunk_size = 800
53
  split_overlap = 0
54
 
55
+ # Embeddings configuration
56
  embeddings_model_repo = 'sentence-transformers/all-MiniLM-L6-v2'
57
 
58
+ # Similar passages configuration
59
  k = 6
60
 
61
+ # File paths configuration
62
  PDFs_path = './'
63
+ Embeddings_path = './faiss-hp-sentence-transformers'
64
  Output_folder = './rag-vectordb'
65
+
66
  def get_model(model=CFG.model_name):
67
  print('\nDownloading model: ', model, '\n\n')
68
+
69
+ model_repo = 'daryl149/llama-2-13b-chat-hf' if model == 'llama2-13b-chat' else None
70
+
71
+ if not model_repo:
 
 
 
 
 
 
 
 
 
 
 
72
  raise ValueError("Model not implemented: " + model)
73
 
74
+ tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=True)
75
+ model = AutoModelForCausalLM.from_pretrained(
76
+ model_repo,
77
+ device_map="auto",
78
+ offload_folder="./offload",
79
+ trust_remote_code=True
80
+ )
81
+
82
+ max_len = 2048
83
+
84
  return tokenizer, model, max_len
85
 
 
86
  def wrap_text_preserve_newlines(text, width=700):
 
87
  lines = text.split('\n')
 
 
88
  wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
89
+
90
+ return '\n'.join(wrapped_lines)
 
 
 
 
91
 
92
  def process_llm_response(llm_response):
93
  ans = wrap_text_preserve_newlines(llm_response['result'])
94
+
95
  sources_used = ' \n'.join(
96
  [
97
+ f"{source.metadata['source'].split('/')[-1][:-4]} - page: {source.metadata['page']}"
 
 
98
  for source in llm_response['source_documents']
99
  ]
100
  )
101
 
102
+ return ans + '\n\nSources: \n' + sources_used
 
103
 
104
  def llm_ans(query):
105
  start = time.time()
106
+
107
  llm_response = qa_chain.invoke(query)
108
  ans = process_llm_response(llm_response)
109
+
110
  end = time.time()
111
+
112
+ time_elapsed_str = f'\n\nTime elapsed: {int(round(end - start))} s'
113
+
114
  return ans + time_elapsed_str
115
 
116
  def predict(message, history):
117
  output = str(llm_ans(message)).replace("\n", "<br/>")
118
  return output
119
 
120
+ tokenizer, model, max_len = get_model(model=CFG.model_name)
 
 
 
121
 
122
  pipe = pipeline(
123
  task="text-generation",
 
130
  repetition_penalty=CFG.repetition_penalty
131
  )
132
 
133
+ # LangChain pipeline setup
134
+ llm = HuggingFacePipeline(pipeline=pipe)
135
 
136
  loader = DirectoryLoader(
137
  CFG.PDFs_path,
138
  glob="./*.pdf",
139
  loader_cls=PyPDFLoader,
140
  show_progress=True,
 
141
  )
142
 
143
  documents = loader.load()
144
  text_splitter = RecursiveCharacterTextSplitter(
145
+ chunk_size=CFG.split_chunk_size,
146
+ chunk_overlap=CFG.split_overlap
147
  )
148
 
149
  texts = text_splitter.split_documents(documents)
150
 
151
  vectordb = FAISS.from_documents(
152
  texts,
153
+ HuggingFaceInstructEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')
154
  )
155
 
156
+ # Persist vector database
157
  vectordb.save_local(f"{CFG.Output_folder}/faiss_index_rag")
158
 
159
+ retriever = vectordb.as_retriever(search_kwargs={"k": CFG.k})
160
 
161
  qa_chain = RetrievalQA.from_chain_type(
162
+ llm=llm,
163
+ chain_type="stuff", # Options: map_reduce, map_rerank, stuff, refine
164
+ retriever=retriever,
 
 
 
165
  )
166
 
167
  prompt_template = """
168
+ Don't try to make up an answer; if you don't know just say that you don't know.
169
  Answer in the same language the question was asked.
170
  Use only the following pieces of context to answer the question at the end.
171
 
 
174
  Question: {question}
175
  Answer:"""
176
 
 
177
  PROMPT = PromptTemplate(
178
+ template=prompt_template,
179
+ input_variables=["context", "question"]
180
  )
181
 
 
182
  locale.getpreferredencoding = lambda: "UTF-8"
183
 
184
  demo = gr.ChatInterface(
185
+ fn=predict,
186
+ title=f'Open-Source LLM ({CFG.model_name}) Question Answering'
187
+ )
188
 
189
  demo.queue()
190
+ demo.launch()