Moha782 commited on
Commit
edc2346
·
verified ·
1 Parent(s): 42961d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -5
app.py CHANGED
@@ -1,18 +1,34 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  """
6
  For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
  """
8
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
9
 
10
- # Load the question-answering pipeline
11
- qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
12
 
13
  def respond(
14
  message,
15
- history: list[tuple[str, str]],
16
  system_message,
17
  max_tokens,
18
  temperature,
@@ -28,6 +44,11 @@ def respond(
28
 
29
  messages.append({"role": "user", "content": message})
30
 
 
 
 
 
 
31
  response = ""
32
 
33
  for message in client.chat_completion(
@@ -36,12 +57,12 @@ def respond(
36
  stream=True,
37
  temperature=temperature,
38
  top_p=top_p,
 
39
  ):
40
  token = message.choices[0].delta.content
41
 
42
- response = qa_pipeline(question=message, context=system_message)["answer"]
43
  response += token
44
- return response
45
 
46
  """
47
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  from transformers import pipeline
4
+ from typing import List, Dict, Tuple
5
+ import re
6
+ import os
7
+
8
+ # Set up the retriever pipeline
9
+ retriever = pipeline('retrieval', model='facebook/rag-token-nq')
10
+
11
+ # Load your PDF document
12
+ pdf_path = "apexcustoms.pdf"
13
+ with open(pdf_path, 'rb') as f:
14
+ pdf_text = f.read().decode('utf-8', errors='ignore')
15
+
16
+ # Split the PDF text into chunks
17
+ split_pattern = r'\n\n'
18
+ doc_chunks = re.split(split_pattern, pdf_text)
19
+
20
+ # Create the retriever input
21
+ corpus = [{"text": chunk} for chunk in doc_chunks]
22
 
23
  """
24
  For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
25
  """
26
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
27
 
 
 
28
 
29
  def respond(
30
  message,
31
+ history: List[Tuple[str, str]],
32
  system_message,
33
  max_tokens,
34
  temperature,
 
44
 
45
  messages.append({"role": "user", "content": message})
46
 
47
+ # Retrieve relevant context from the PDF
48
+ retrieval_output = retriever(message, corpus, top_k=3)
49
+ retrieved_contexts = [passage['text'] for passage in retrieval_output['retrieved_passages']]
50
+ context = ' '.join(retrieved_contexts)
51
+
52
  response = ""
53
 
54
  for message in client.chat_completion(
 
57
  stream=True,
58
  temperature=temperature,
59
  top_p=top_p,
60
+ context=context, # Include the retrieved context
61
  ):
62
  token = message.choices[0].delta.content
63
 
 
64
  response += token
65
+ yield response
66
 
67
  """
68
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface