PabloVD commited on
Commit
bc84f5e
·
2 Parent(s): a08531a 127f3c4

Merge branch 'newbranch' into main

Browse files
Files changed (3) hide show
  1. app.py +2 -1
  2. requirements.txt +6 -14
  3. worker.py +20 -13
app.py CHANGED
@@ -10,7 +10,8 @@ url = 'https://camels.readthedocs.io/_/downloads/en/latest/pdf/'
10
  r = requests.get(url, stream=True)
11
  document_path = Path('metadata.pdf')
12
  document_path.write_bytes(r.content)
13
- worker.process_document(str(document_path))
 
14
 
15
  def handle_prompt(message, history):
16
  bot_response = worker.process_prompt(message, history)
 
10
  r = requests.get(url, stream=True)
11
  document_path = Path('metadata.pdf')
12
  document_path.write_bytes(r.content)
13
+ # document_path="2022GS.pdf"
14
+ worker.process_document(document_path)
15
 
16
  def handle_prompt(message, history):
17
  bot_response = worker.process_prompt(message, history)
requirements.txt CHANGED
@@ -1,17 +1,9 @@
1
- Flask
2
- Flask_Cors
3
  pdf2image
4
  pypdf
5
  tiktoken
6
- pandas==1.5
7
- langchain==0.0.254
8
- atlassian-python-api==3.36.0
9
- chromadb==0.3.25
10
- huggingface-hub==0.16.4
11
- torch==2.0.1
12
- sentence-transformers==2.2.2
13
- InstructorEmbedding==1.0.0
14
- p4python==2023.1.2454917
15
- lxml==4.9.2
16
- bs4==0.0.1
17
- ibm-watson-machine-learning
 
 
 
1
  pdf2image
2
  pypdf
3
  tiktoken
4
+ langchain
5
+ langchain-community
6
+ langchain-huggingface
7
+ chromadb
8
+ InstructorEmbedding
9
+ huggingface_hub==0.25.2
 
 
 
 
 
 
worker.py CHANGED
@@ -1,14 +1,24 @@
1
  import torch
2
  from langchain.chains import RetrievalQA
3
- from langchain.embeddings import HuggingFaceInstructEmbeddings
4
- from langchain.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain.vectorstores import Chroma
7
- from langchain.llms import HuggingFaceHub
8
- import os
 
 
 
 
 
 
 
 
 
9
 
10
  # Check for GPU availability and set the appropriate device for computation.
11
  DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
 
12
 
13
  # Global variables
14
  conversation_retrieval_chain = None
@@ -20,10 +30,10 @@ embeddings = None
20
  def init_llm():
21
  global llm_hub, embeddings
22
  # Set up the environment variable for HuggingFace and initialize the desired model.
23
- tokenfile = open("api_token.txt")
24
- api_token = tokenfile.readline().replace("\n","")
25
- tokenfile.close()
26
- os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_token
27
 
28
  # repo name for the model
29
  # model_id = "tiiuae/falcon-7b-instruct"
@@ -32,16 +42,13 @@ def init_llm():
32
  # model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
33
 
34
  # load the model into the HuggingFaceHub
35
- #llm_hub = HuggingFaceHub(repo_id=model_id, temperature=0.1, max_new_tokens=600, model_kwargs={"max_length":600})
36
- llm_hub = HuggingFaceHub(repo_id=model_id, model_kwargs={"temperature": 0.1, "max_new_tokens": 600, "max_length": 600})
37
  llm_hub.client.api_url = 'https://api-inference.huggingface.co/models/'+model_id
38
  # llm_hub.invoke('foo bar')
39
 
40
  #Initialize embeddings using a pre-trained model to represent the text data.
41
  embedddings_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
42
  # embedddings_model = "sentence-transformers/all-MiniLM-L6-v2"
43
-
44
- # emb_model = SentenceTransformer(embedddings_model)
45
 
46
  embeddings = HuggingFaceInstructEmbeddings(
47
  model_name=embedddings_model,
 
1
  import torch
2
  from langchain.chains import RetrievalQA
3
+ from langchain_community.embeddings import HuggingFaceInstructEmbeddings
4
+ from langchain_community.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain_community.vectorstores import Chroma
7
+ from langchain_huggingface import HuggingFaceEndpoint
8
+ # import pip
9
+
10
+ # def install(package):
11
+ # if hasattr(pip, 'main'):
12
+ # pip.main(['install', package])
13
+ # else:
14
+ # pip._internal.main(['install', package])
15
+
16
+ # # Temporal fix for incompatibility between langchain_huggingface and sentence-transformers<2.6
17
+ # install("sentence-transformers==2.2.2")
18
 
19
  # Check for GPU availability and set the appropriate device for computation.
20
  DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
21
+ # DEVICE = "cpu"
22
 
23
  # Global variables
24
  conversation_retrieval_chain = None
 
30
  def init_llm():
31
  global llm_hub, embeddings
32
  # Set up the environment variable for HuggingFace and initialize the desired model.
33
+ # tokenfile = open("api_token.txt")
34
+ # api_token = tokenfile.readline().replace("\n","")
35
+ # tokenfile.close()
36
+ # os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_token
37
 
38
  # repo name for the model
39
  # model_id = "tiiuae/falcon-7b-instruct"
 
42
  # model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
43
 
44
  # load the model into the HuggingFaceHub
45
+ llm_hub = HuggingFaceEndpoint(repo_id=model_id, temperature=0.1, max_new_tokens=600, model_kwargs={"max_length":600})
 
46
  llm_hub.client.api_url = 'https://api-inference.huggingface.co/models/'+model_id
47
  # llm_hub.invoke('foo bar')
48
 
49
  #Initialize embeddings using a pre-trained model to represent the text data.
50
  embedddings_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
51
  # embedddings_model = "sentence-transformers/all-MiniLM-L6-v2"
 
 
52
 
53
  embeddings = HuggingFaceInstructEmbeddings(
54
  model_name=embedddings_model,