Delete util
Browse files- util/conversation_rag.py +0 -85
- util/index.py +0 -41
util/conversation_rag.py
DELETED
@@ -1,85 +0,0 @@
|
|
1 |
-
from torch import cuda, bfloat16
|
2 |
-
import transformers
|
3 |
-
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
4 |
-
from langchain.vectorstores import FAISS
|
5 |
-
from langchain.chains import ConversationalRetrievalChain
|
6 |
-
from langchain.memory import ConversationBufferMemory
|
7 |
-
from langchain.llms import HuggingFacePipeline
|
8 |
-
from huggingface_hub import login
|
9 |
-
from langchain.prompts import PromptTemplate
|
10 |
-
|
11 |
-
|
12 |
-
class Conversation_RAG:
|
13 |
-
def __init__(self, hf_token = "", embedding_model_repo_id="sentence-transformers/all-roberta-large-v1",
|
14 |
-
llm_repo_id='meta-llama/Llama-2-7b-chat-hf'):
|
15 |
-
|
16 |
-
self.hf_token = hf_token
|
17 |
-
self.embedding_model_repo_id = embedding_model_repo_id
|
18 |
-
self.llm_repo_id = llm_repo_id
|
19 |
-
|
20 |
-
def load_model_and_tokenizer(self):
|
21 |
-
|
22 |
-
embedding_model = HuggingFaceEmbeddings(model_name=self.embedding_model_repo_id)
|
23 |
-
vectordb = FAISS.load_local("./db/faiss_index", embedding_model)
|
24 |
-
|
25 |
-
login(token=self.hf_token)
|
26 |
-
|
27 |
-
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
|
28 |
-
|
29 |
-
bnb_config = transformers.BitsAndBytesConfig(
|
30 |
-
load_in_4bit=True,
|
31 |
-
bnb_4bit_quant_type='nf4',
|
32 |
-
bnb_4bit_use_double_quant=True,
|
33 |
-
bnb_4bit_compute_dtype=bfloat16
|
34 |
-
)
|
35 |
-
|
36 |
-
model = transformers.AutoModelForCausalLM.from_pretrained(
|
37 |
-
self.llm_repo_id,
|
38 |
-
trust_remote_code=True,
|
39 |
-
quantization_config=bnb_config,
|
40 |
-
load_in_8bit=True,
|
41 |
-
device_map='auto'
|
42 |
-
)
|
43 |
-
model.eval()
|
44 |
-
|
45 |
-
tokenizer = transformers.AutoTokenizer.from_pretrained(self.llm_repo_id)
|
46 |
-
return model, tokenizer, vectordb
|
47 |
-
|
48 |
-
def create_conversation(self, model, tokenizer, vectordb, max_new_tokens=512, temperature=0.1, repetition_penalty=1.1, top_k=10, top_p=0.95, k_context=5,
|
49 |
-
num_return_sequences=1, instruction="Use the following pieces of context to answer the question at the end by. Generate the answer based on the given context only. If you do not find any information related to the question in the given context, just say that you don't know, don't try to make up an answer. Keep your answer expressive."):
|
50 |
-
|
51 |
-
generate_text = transformers.pipeline(
|
52 |
-
model=model,
|
53 |
-
tokenizer=tokenizer,
|
54 |
-
return_full_text=True, # langchain expects the full text
|
55 |
-
task='text-generation',
|
56 |
-
temperature=temperature, # 'randomness' of outputs, 0.0 is the min and 1.0 the max
|
57 |
-
max_new_tokens=max_new_tokens, # mex number of tokens to generate in the output
|
58 |
-
repetition_penalty=repetition_penalty, # without this output begins repeating
|
59 |
-
top_k=top_k,
|
60 |
-
top_p=top_p,
|
61 |
-
num_return_sequences=num_return_sequences,
|
62 |
-
)
|
63 |
-
|
64 |
-
llm = HuggingFacePipeline(pipeline=generate_text)
|
65 |
-
|
66 |
-
system_instruction = f"User: {instruction}\n"
|
67 |
-
template = system_instruction + """
|
68 |
-
context:\n
|
69 |
-
{context}\n
|
70 |
-
Question: {question}\n
|
71 |
-
Assistant:
|
72 |
-
"""
|
73 |
-
|
74 |
-
QCA_PROMPT = PromptTemplate(input_variables=["context", "question"], template=template)
|
75 |
-
|
76 |
-
qa = ConversationalRetrievalChain.from_llm(
|
77 |
-
llm=llm,
|
78 |
-
chain_type='stuff',
|
79 |
-
retriever=vectordb.as_retriever(search_kwargs={"k": k_context}),
|
80 |
-
combine_docs_chain_kwargs={"prompt": QCA_PROMPT},
|
81 |
-
get_chat_history=lambda h: h,
|
82 |
-
verbose=True
|
83 |
-
)
|
84 |
-
return qa
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
util/index.py
DELETED
@@ -1,41 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import numpy as np
|
3 |
-
import pickle
|
4 |
-
|
5 |
-
from langchain.vectorstores import FAISS, Chroma, DocArrayInMemorySearch
|
6 |
-
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
7 |
-
from langchain.document_loaders.csv_loader import CSVLoader
|
8 |
-
from langchain.text_splitter import CharacterTextSplitter
|
9 |
-
from langchain.document_loaders import PyPDFLoader
|
10 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
11 |
-
|
12 |
-
def create_vector_store_index(file_path, embedding_model_repo_id="sentence-transformers/all-roberta-large-v1"):
|
13 |
-
|
14 |
-
file_path_split = file_path.split(".")
|
15 |
-
file_type = file_path_split[-1].rstrip('/')
|
16 |
-
|
17 |
-
if file_type == 'csv':
|
18 |
-
print(file_path)
|
19 |
-
loader = CSVLoader(file_path=file_path)
|
20 |
-
documents = loader.load()
|
21 |
-
|
22 |
-
elif file_type == 'pdf':
|
23 |
-
loader = PyPDFLoader(file_path)
|
24 |
-
pages = loader.load()
|
25 |
-
|
26 |
-
text_splitter = RecursiveCharacterTextSplitter(
|
27 |
-
chunk_size = 1024,
|
28 |
-
chunk_overlap = 128,)
|
29 |
-
|
30 |
-
documents = text_splitter.split_documents(pages)
|
31 |
-
|
32 |
-
|
33 |
-
embedding_model = HuggingFaceEmbeddings(
|
34 |
-
model_name=embedding_model_repo_id
|
35 |
-
)
|
36 |
-
|
37 |
-
vectordb = FAISS.from_documents(documents, embedding_model)
|
38 |
-
file_output = "./db/faiss_index"
|
39 |
-
vectordb.save_local(file_output)
|
40 |
-
|
41 |
-
return "Vector store index is created."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|