mca183 commited on
Commit
f207bfc
·
1 Parent(s): c0e05c3

adding RAG feature

Browse files
Files changed (1) hide show
  1. app.py +18 -4
app.py CHANGED
@@ -2,6 +2,8 @@ from langchain.document_loaders import HuggingFaceDatasetLoader
2
  from langchain.text_splitter import RecursiveCharacterTextSplitter
3
  from langchain.embeddings import HuggingFaceEmbeddings
4
  from langchain.vectorstores import FAISS
 
 
5
  import gradio as gr
6
 
7
 
@@ -24,12 +26,24 @@ embeddings = HuggingFaceEmbeddings(
24
  db = FAISS.from_documents(docs, embeddings)
25
 
26
  # Set up retrievers
27
- retriever = db.as_retriever()
 
 
 
28
 
 
 
 
 
 
 
 
29
 
30
- def generate(input):
31
- docs = retriever.get_relevant_documents(input)
32
- return docs[0].page_content
 
 
33
 
34
 
35
  def respond(message, chat_history):
 
2
  from langchain.text_splitter import RecursiveCharacterTextSplitter
3
  from langchain.embeddings import HuggingFaceEmbeddings
4
  from langchain.vectorstores import FAISS
5
+
6
+ from transformers import AutoTokenizer, pipeline
7
  import gradio as gr
8
 
9
 
 
26
  db = FAISS.from_documents(docs, embeddings)
27
 
28
  # Set up retrievers
29
+ retriever = db.as_retriever(search_kwargs={"k": 4})
30
+
31
+ # Load the tokenizer associated with the specified model
32
+ tokenizer = AutoTokenizer.from_pretrained("Intel/dynamic_tinybert", padding=True, truncation=True, max_length=512)
33
 
34
+ # Define a question-answering pipeline using the model and tokenizer
35
+ question_answerer = pipeline(
36
+ "question-answering",
37
+ model="Intel/dynamic_tinybert",
38
+ tokenizer=tokenizer,
39
+ return_tensors='pt'
40
+ )
41
 
42
+ def generate(question):
43
+ docs = retriever.get_relevant_documents(question)
44
+ context = docs[0].page_content
45
+ squad_ex = question_answerer(question=question, context=context)
46
+ return squad_ex['answer']
47
 
48
 
49
  def respond(message, chat_history):