DANISHFAYAZNAJAR commited on
Commit
726afa6
Β·
1 Parent(s): 6ca1a65

adding files

Browse files
Files changed (4) hide show
  1. app.py +80 -0
  2. faiss_index/index.faiss +0 -0
  3. faiss_index/index.pkl +0 -0
  4. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch import cuda , bfloat16
2
+ import os
3
+ import transformers
4
+ from langchain.chains import RetrievalQA
5
+ from langchain.llms import HuggingFacePipeline
6
+ from langchain.document_loaders import PyPDFLoader
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain.embeddings import HuggingFaceEmbeddings
9
+ from langchain.vectorstores import FAISS
10
+ import gradio as gr
11
+
12
+ os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ["HF_KEY"]
13
+ os.environ['HF_TOKEN'] = os.environ["HF_KEY"]
14
+
15
+
16
+ model_id = 'meta-llama/Llama-2-7b-chat-hf'
17
+
18
+ device= f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
19
+
20
+
21
+ bnb_config = transformers.BitsAndBytesConfig(
22
+ load_in_4bit = True,
23
+ bnb_4bit_quant_type='nf4',
24
+ bnb_4bit_use_double_quant=True,
25
+ bnb_4bit_compute_dtype=bfloat16
26
+ )
27
+
28
+
29
+ model_config = transformers.AutoConfig.from_pretrained(
30
+ model_id,
31
+ )
32
+
33
+
34
+ model = transformers.AutoModelForCausalLM.from_pretrained(
35
+ model_id,
36
+ trust_remote_code=True,
37
+ config = model_config,
38
+ quantization_config = bnb_config,
39
+ device_map='auto',
40
+
41
+ )
42
+
43
+
44
+ tokenizer = transformers.AutoTokenizer.from_pretrained(
45
+ model_id,
46
+ )
47
+
48
+ model.eval()
49
+
50
+ print(f"Model loaded on {device}")
51
+ generate_text = transformers.pipeline(
52
+ model = model,
53
+ tokenizer = tokenizer,
54
+ return_full_text = True,
55
+ task='text-generation',
56
+ temperature = 0.1,
57
+ max_new_tokens=512,
58
+ repetition_penalty=1.1
59
+
60
+ )
61
+ llm = HuggingFacePipeline(pipeline=generate_text)
62
+ # loader = PyPDFLoader("/content/CELEX%3A32023R1115%3AEN%3ATXT.pdf")
63
+ # pdf_documents = loader.load()
64
+ # text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024,chunk_overlap=100)
65
+ # pdf_document_chunks = text_splitter.split_documents(pdf_documents)
66
+ model_name = "sentence-transformers/all-mpnet-base-v2"
67
+ # model_kwargs = {'device':'cpu'}
68
+ embeddings = HuggingFaceEmbeddings(model_name = model_name)#,model_kwargs=model_kwargs)
69
+ vectorstore = FAISS.load_local("faiss_index",embeddings)
70
+ retrievalQA = RetrievalQA.from_chain_type(llm,
71
+ chain_type='stuff',
72
+ retriever = vectorstore.as_retriever(),return_source_documents=True)
73
+
74
+ print("setup complete lets start answering questions")
75
+ def question_answer(input):
76
+ response = retrievalQA.invoke(input)
77
+ return response['result'],response['source_documents'][0].page_content
78
+
79
+ iface = gr.Interface(fn = question_answer,inputs='text',outputs=['text','text'])
80
+ iface.launch()
faiss_index/index.faiss ADDED
Binary file (627 kB). View file
 
faiss_index/index.pkl ADDED
Binary file (205 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.21.0
2
+ transformers==4.31.0
3
+ tokenizers==0.13.3
4
+ bitsandbytes==0.40.0
5
+ einops==0.6.1
6
+ xformers==0.0.22.post7
7
+ langchain==0.1.4
8
+ faiss-gpu==1.7.1.post3
9
+ sentence_transformers
10
+ gradio