Deepakraj2006 commited on
Commit
5c01bc1
·
verified ·
1 Parent(s): 15507f3

Update worker_huggingFace.py

Browse files
Files changed (1) hide show
  1. worker_huggingFace.py +12 -28
worker_huggingFace.py CHANGED
@@ -8,7 +8,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain.vectorstores import Chroma
9
  from langchain.llms import HuggingFaceHub
10
 
11
- # Check for GPU availability and set the appropriate device for computation.
12
  DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
13
 
14
  # Global variables
@@ -17,67 +17,51 @@ chat_history = []
17
  llm_hub = None
18
  embeddings = None
19
 
20
- # Function to initialize the language model and its embeddings
21
  def init_llm():
22
  global llm_hub, embeddings
23
- # Set up the environment variable for HuggingFace and initialize the desired model.
24
- os.environ["HUGGINGFACEHUB_API_TOKEN"] = "YOUR API KEY"
25
 
26
- # repo name for the model
 
 
 
 
27
  model_id = "tiiuae/falcon-7b-instruct"
28
- # load the model into the HuggingFaceHub
29
  llm_hub = HuggingFaceHub(repo_id=model_id, model_kwargs={"temperature": 0.1, "max_new_tokens": 600, "max_length": 600})
30
 
31
- #Initialize embeddings using a pre-trained model to represent the text data.
32
  embeddings = HuggingFaceInstructEmbeddings(
33
  model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": DEVICE}
34
  )
35
 
36
-
37
- # Function to process a PDF document
38
  def process_document(document_path):
39
  global conversation_retrieval_chain
40
 
41
- # Load the document
42
  loader = PyPDFLoader(document_path)
43
  documents = loader.load()
44
 
45
- # Split the document into chunks
46
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
47
  texts = text_splitter.split_documents(documents)
48
 
49
- # Create an embeddings database using Chroma from the split text chunks.
50
  db = Chroma.from_documents(texts, embedding=embeddings)
51
 
52
-
53
- # --> Build the QA chain, which utilizes the LLM and retriever for answering questions.
54
- # By default, the vectorstore retriever uses similarity search.
55
- # If the underlying vectorstore support maximum marginal relevance search, you can specify that as the search type (search_type="mmr").
56
- # You can also specify search kwargs like k to use when doing retrieval. k represent how many search results send to llm
57
  conversation_retrieval_chain = RetrievalQA.from_chain_type(
58
  llm=llm_hub,
59
  chain_type="stuff",
60
  retriever=db.as_retriever(search_type="mmr", search_kwargs={'k': 6, 'lambda_mult': 0.25}),
61
  return_source_documents=False,
62
- input_key = "question"
63
- # chain_type_kwargs={"prompt": prompt} # if you are using prompt template, you need to uncomment this part
64
  )
65
 
66
-
67
- # Function to process a user prompt
68
  def process_prompt(prompt):
69
- global conversation_retrieval_chain
70
- global chat_history
71
-
72
- # Query the model
 
73
  output = conversation_retrieval_chain({"question": prompt, "chat_history": chat_history})
74
  answer = output["result"]
75
 
76
- # Update the chat history
77
  chat_history.append((prompt, answer))
78
 
79
- # Return the model's response
80
  return answer
81
 
82
- # Initialize the language model
83
  init_llm()
 
8
  from langchain.vectorstores import Chroma
9
  from langchain.llms import HuggingFaceHub
10
 
11
+ # Check for GPU availability
12
  DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
13
 
14
  # Global variables
 
17
  llm_hub = None
18
  embeddings = None
19
 
 
20
  def init_llm():
21
  global llm_hub, embeddings
 
 
22
 
23
+ # Ensure API key is set in Hugging Face Spaces
24
+ hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
25
+ if not hf_token:
26
+ raise ValueError("HUGGINGFACEHUB_API_TOKEN is not set in environment variables.")
27
+
28
  model_id = "tiiuae/falcon-7b-instruct"
 
29
  llm_hub = HuggingFaceHub(repo_id=model_id, model_kwargs={"temperature": 0.1, "max_new_tokens": 600, "max_length": 600})
30
 
 
31
  embeddings = HuggingFaceInstructEmbeddings(
32
  model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": DEVICE}
33
  )
34
 
 
 
35
  def process_document(document_path):
36
  global conversation_retrieval_chain
37
 
 
38
  loader = PyPDFLoader(document_path)
39
  documents = loader.load()
40
 
 
41
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
42
  texts = text_splitter.split_documents(documents)
43
 
 
44
  db = Chroma.from_documents(texts, embedding=embeddings)
45
 
 
 
 
 
 
46
  conversation_retrieval_chain = RetrievalQA.from_chain_type(
47
  llm=llm_hub,
48
  chain_type="stuff",
49
  retriever=db.as_retriever(search_type="mmr", search_kwargs={'k': 6, 'lambda_mult': 0.25}),
50
  return_source_documents=False,
51
+ input_key="question"
 
52
  )
53
 
 
 
54
  def process_prompt(prompt):
55
+ global conversation_retrieval_chain, chat_history
56
+
57
+ if not conversation_retrieval_chain:
58
+ return "No document has been processed yet. Please upload a PDF first."
59
+
60
  output = conversation_retrieval_chain({"question": prompt, "chat_history": chat_history})
61
  answer = output["result"]
62
 
 
63
  chat_history.append((prompt, answer))
64
 
 
65
  return answer
66
 
 
67
  init_llm()