zac commited on
Commit
f8686c3
·
1 Parent(s): 029f102

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -0
app.py CHANGED
@@ -5,6 +5,14 @@ import ctypes #to run on C api directly
5
  import llama_cpp
6
  from llama_cpp import Llama
7
  from huggingface_hub import hf_hub_download #load from huggingfaces
 
 
 
 
 
 
 
 
8
 
9
 
10
  llm = Llama(model_path= hf_hub_download(repo_id="TheBloke/Dolphin-Llama2-7B-GGML", filename="dolphin-llama2-7b.ggmlv3.q4_1.bin"), n_ctx=2048) #download model from hf/ n_ctx=2048 for high ccontext length
@@ -13,6 +21,26 @@ history = []
13
 
14
  pre_prompt = " The user and the AI are having a conversation : <|endoftext|> \n "
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def generate_text(input_text, history):
17
  print("history ",history)
18
  print("input ", input_text)
 
5
  import llama_cpp
6
  from llama_cpp import Llama
7
  from huggingface_hub import hf_hub_download #load from huggingfaces
8
+ from dotenv import load_dotenv
9
+ from PyPDF2 import PdfReader
10
+ from langchain.text_splitter import CharacterTextSplitter
11
+ from langchain.vectorstores import FAISS
12
+ from langchain.chat_models import ChatOpenAI
13
+ from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
14
+ from langchain.memory import ConversationBufferMemory
15
+ from langchain.chains import ConversationalRetrievalChain
16
 
17
 
18
  llm = Llama(model_path= hf_hub_download(repo_id="TheBloke/Dolphin-Llama2-7B-GGML", filename="dolphin-llama2-7b.ggmlv3.q4_1.bin"), n_ctx=2048) #download model from hf/ n_ctx=2048 for high ccontext length
 
21
 
22
  pre_prompt = " The user and the AI are having a conversation : <|endoftext|> \n "
23
 
24
+ def get_pdf_text(pdfs):
25
+ text=""
26
+ for pdf in pdfs:
27
+ pdf_reader = PdfReader(pdf)
28
+ for page in pdf_reader.pages:
29
+ text+= page.extract_text()
30
+ return text
31
+
32
+ def get_text_chunks(text):
33
+ text_splitter = CharacterTextSplitter(separator="\n",
34
+ chunk_size=1000, chunk_overlap = 200, length_function=len)
35
+ chunks = text_splitter.split_text(text)
36
+ return chunks
37
+
38
+ def get_vectorstore(text_chunks):
39
+ embeddings = OpenAIEmbeddings()
40
+ # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
41
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
42
+ return vectorstore
43
+
44
  def generate_text(input_text, history):
45
  print("history ",history)
46
  print("input ", input_text)