anasmkh commited on
Commit
7f1e53f
·
verified ·
1 Parent(s): b5d2631

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +32 -30
main.py CHANGED
@@ -1,50 +1,52 @@
1
- from langchain.chains import RetrievalQA, ConversationalRetrievalChain
2
- from langchain.vectorstores import Chroma
3
- from langchain.text_splitter import CharacterTextSplitter
4
- from langchain.document_loaders import DirectoryLoader, TextLoader,PyPDFLoader
5
- from transformers import pipeline, AutoModelForCausalLM
6
  from langchain.llms import HuggingFacePipeline
7
- from langchain.embeddings import HuggingFaceInstructEmbeddings
8
- import gradio as gr
9
- from InstructorEmbedding import INSTRUCTOR
10
- import torch
11
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
14
 
 
15
 
16
- tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
17
- model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
18
 
19
  pipe = pipeline(
20
  "text2text-generation",
21
  model=model,
22
  tokenizer=tokenizer,
23
- max_length=200,
24
- temperature=0.8,
25
- top_p=0.95,
26
  repetition_penalty=1.15,
27
  do_sample=True
28
- )
29
 
 
30
  local_llm = HuggingFacePipeline(pipeline=pipe)
31
- loader = PyPDFLoader('bipolar.pdf')
32
- # loader = TextLoader('info.txt')
33
- document = loader.load()
34
- text_spliter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
35
- texts = text_spliter.split_documents(document)
36
- embedding = HuggingFaceInstructEmbeddings()
37
- docsearch = Chroma.from_documents(texts, embedding, persist_directory='db')
38
-
39
- retriever = docsearch.as_retriever(search_kwargs={"k": 3})
40
- qa_chain = RetrievalQA.from_chain_type(llm=local_llm,
41
- chain_type="stuff",
42
- retriever=retriever,
43
- return_source_documents=True)
44
 
45
  def gradinterface(query,history):
46
  result = qa_chain({'query': query})
47
- return result['result']
48
 
49
 
50
  demo = gr.ChatInterface(fn=gradinterface, title='OUR_OWN_BOT')
 
1
+ from langchain_community.document_loaders import TextLoader
2
+ from langchain_community.embeddings import HuggingFaceBgeEmbeddings
3
+ from langchain_community.vectorstores import FAISS
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
 
5
  from langchain.llms import HuggingFacePipeline
6
+ from transformers import pipeline
 
 
 
7
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
8
+ from langchain.chains import RetrievalQA
9
+ import torch
10
+ loader = TextLoader("info.txt")
11
+ docs = loader.load()
12
+ text_splitter = RecursiveCharacterTextSplitter()
13
+ # text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
14
+ documents = text_splitter.split_documents(docs)
15
+
16
+ huggingface_embeddings = HuggingFaceBgeEmbeddings(
17
+ model_name="BAAI/bge-small-en-v1.5",
18
+ model_kwargs={'device':'cpu'},
19
+ encode_kwargs={'normalize_embeddings': True}
20
+ )
21
+
22
+ vector = FAISS.from_documents(documents, huggingface_embeddings)
23
+ retriever = vector.as_retriever()
24
 
25
+ model_name = "facebook/bart-base"
26
 
27
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
28
 
29
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 
30
 
31
  pipe = pipeline(
32
  "text2text-generation",
33
  model=model,
34
  tokenizer=tokenizer,
35
+ max_length=300,
36
+ temperature=0.9,
37
+ top_p=0.9,
38
  repetition_penalty=1.15,
39
  do_sample=True
 
40
 
41
+ )
42
  local_llm = HuggingFacePipeline(pipeline=pipe)
43
+ qa_chain = RetrievalQA.from_llm(llm=local_llm, retriever=retriever)
44
+
45
+
 
 
 
 
 
 
 
 
 
 
46
 
47
  def gradinterface(query,history):
48
  result = qa_chain({'query': query})
49
+ return result
50
 
51
 
52
  demo = gr.ChatInterface(fn=gradinterface, title='OUR_OWN_BOT')