TRaw commited on
Commit
b301e28
·
1 Parent(s): 9aae063

Delete util

Browse files
Files changed (2) hide show
  1. util/conversation_rag.py +0 -85
  2. util/index.py +0 -41
util/conversation_rag.py DELETED
@@ -1,85 +0,0 @@
1
- from torch import cuda, bfloat16
2
- import transformers
3
- from langchain.embeddings.huggingface import HuggingFaceEmbeddings
4
- from langchain.vectorstores import FAISS
5
- from langchain.chains import ConversationalRetrievalChain
6
- from langchain.memory import ConversationBufferMemory
7
- from langchain.llms import HuggingFacePipeline
8
- from huggingface_hub import login
9
- from langchain.prompts import PromptTemplate
10
-
11
-
12
- class Conversation_RAG:
13
- def __init__(self, hf_token = "", embedding_model_repo_id="sentence-transformers/all-roberta-large-v1",
14
- llm_repo_id='meta-llama/Llama-2-7b-chat-hf'):
15
-
16
- self.hf_token = hf_token
17
- self.embedding_model_repo_id = embedding_model_repo_id
18
- self.llm_repo_id = llm_repo_id
19
-
20
- def load_model_and_tokenizer(self):
21
-
22
- embedding_model = HuggingFaceEmbeddings(model_name=self.embedding_model_repo_id)
23
- vectordb = FAISS.load_local("./db/faiss_index", embedding_model)
24
-
25
- login(token=self.hf_token)
26
-
27
- device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
28
-
29
- bnb_config = transformers.BitsAndBytesConfig(
30
- load_in_4bit=True,
31
- bnb_4bit_quant_type='nf4',
32
- bnb_4bit_use_double_quant=True,
33
- bnb_4bit_compute_dtype=bfloat16
34
- )
35
-
36
- model = transformers.AutoModelForCausalLM.from_pretrained(
37
- self.llm_repo_id,
38
- trust_remote_code=True,
39
- quantization_config=bnb_config,
40
- load_in_8bit=True,
41
- device_map='auto'
42
- )
43
- model.eval()
44
-
45
- tokenizer = transformers.AutoTokenizer.from_pretrained(self.llm_repo_id)
46
- return model, tokenizer, vectordb
47
-
48
- def create_conversation(self, model, tokenizer, vectordb, max_new_tokens=512, temperature=0.1, repetition_penalty=1.1, top_k=10, top_p=0.95, k_context=5,
49
- num_return_sequences=1, instruction="Use the following pieces of context to answer the question at the end by. Generate the answer based on the given context only. If you do not find any information related to the question in the given context, just say that you don't know, don't try to make up an answer. Keep your answer expressive."):
50
-
51
- generate_text = transformers.pipeline(
52
- model=model,
53
- tokenizer=tokenizer,
54
- return_full_text=True, # langchain expects the full text
55
- task='text-generation',
56
- temperature=temperature, # 'randomness' of outputs, 0.0 is the min and 1.0 the max
57
- max_new_tokens=max_new_tokens, # mex number of tokens to generate in the output
58
- repetition_penalty=repetition_penalty, # without this output begins repeating
59
- top_k=top_k,
60
- top_p=top_p,
61
- num_return_sequences=num_return_sequences,
62
- )
63
-
64
- llm = HuggingFacePipeline(pipeline=generate_text)
65
-
66
- system_instruction = f"User: {instruction}\n"
67
- template = system_instruction + """
68
- context:\n
69
- {context}\n
70
- Question: {question}\n
71
- Assistant:
72
- """
73
-
74
- QCA_PROMPT = PromptTemplate(input_variables=["context", "question"], template=template)
75
-
76
- qa = ConversationalRetrievalChain.from_llm(
77
- llm=llm,
78
- chain_type='stuff',
79
- retriever=vectordb.as_retriever(search_kwargs={"k": k_context}),
80
- combine_docs_chain_kwargs={"prompt": QCA_PROMPT},
81
- get_chat_history=lambda h: h,
82
- verbose=True
83
- )
84
- return qa
85
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
util/index.py DELETED
@@ -1,41 +0,0 @@
1
- import os
2
- import numpy as np
3
- import pickle
4
-
5
- from langchain.vectorstores import FAISS, Chroma, DocArrayInMemorySearch
6
- from langchain.embeddings.huggingface import HuggingFaceEmbeddings
7
- from langchain.document_loaders.csv_loader import CSVLoader
8
- from langchain.text_splitter import CharacterTextSplitter
9
- from langchain.document_loaders import PyPDFLoader
10
- from langchain.text_splitter import RecursiveCharacterTextSplitter
11
-
12
- def create_vector_store_index(file_path, embedding_model_repo_id="sentence-transformers/all-roberta-large-v1"):
13
-
14
- file_path_split = file_path.split(".")
15
- file_type = file_path_split[-1].rstrip('/')
16
-
17
- if file_type == 'csv':
18
- print(file_path)
19
- loader = CSVLoader(file_path=file_path)
20
- documents = loader.load()
21
-
22
- elif file_type == 'pdf':
23
- loader = PyPDFLoader(file_path)
24
- pages = loader.load()
25
-
26
- text_splitter = RecursiveCharacterTextSplitter(
27
- chunk_size = 1024,
28
- chunk_overlap = 128,)
29
-
30
- documents = text_splitter.split_documents(pages)
31
-
32
-
33
- embedding_model = HuggingFaceEmbeddings(
34
- model_name=embedding_model_repo_id
35
- )
36
-
37
- vectordb = FAISS.from_documents(documents, embedding_model)
38
- file_output = "./db/faiss_index"
39
- vectordb.save_local(file_output)
40
-
41
- return "Vector store index is created."