GaborToth2 commited on
Commit
1a7ff48
·
1 Parent(s): 317ef9d

implementing rag

Browse files
Files changed (2) hide show
  1. app.py +29 -9
  2. original.py +1 -9
app.py CHANGED
@@ -2,10 +2,27 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import os
4
  import cohere
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
8
- """
9
  client = InferenceClient("meta-llama/Llama-3.2-3B-Instruct")
10
  COHERE_API_KEY = os.getenv("COHERE_API_KEY")
11
  client_cohere = cohere.Client(COHERE_API_KEY)
@@ -20,7 +37,14 @@ def respond(
20
  top_p,
21
  use_cohere_api,
22
  ):
23
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
24
 
25
  for val in history:
26
  if val[0]:
@@ -32,6 +56,7 @@ def respond(
32
 
33
  response = ""
34
 
 
35
  if use_cohere_api:
36
  cohere_response = client_cohere.chat(
37
  message=message,
@@ -53,10 +78,6 @@ def respond(
53
  response += token
54
  yield response
55
 
56
-
57
- """
58
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
59
- """
60
  demo = gr.ChatInterface(
61
  respond,
62
  additional_inputs=[
@@ -74,6 +95,5 @@ demo = gr.ChatInterface(
74
  ],
75
  )
76
 
77
-
78
  if __name__ == "__main__":
79
  demo.launch()
 
2
  from huggingface_hub import InferenceClient
3
  import os
4
  import cohere
5
+ import faiss
6
+ from transformers import pipeline
7
+ from sentence_transformers import SentenceTransformer
8
+
9
+ documents = [
10
+ "The capital of France is Paris.",
11
+ "Python is a popular programming language.",
12
+ "The Eiffel Tower is located in Paris.",
13
+ "Llama is a type of animal found in South America.",
14
+ "Paris is known for its art, fashion, and culture.",
15
+ "Gabor Toth is the author of this document."
16
+ ]
17
+
18
+ embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
19
+ document_embeddings = embedding_model.encode(documents, convert_to_tensor=True)
20
+ document_embeddings_np = document_embeddings.cpu().numpy()
21
+
22
+ index = faiss.IndexFlatL2(document_embeddings_np.shape[1])
23
+ index.add(document_embeddings_np)
24
+
25
 
 
 
 
26
  client = InferenceClient("meta-llama/Llama-3.2-3B-Instruct")
27
  COHERE_API_KEY = os.getenv("COHERE_API_KEY")
28
  client_cohere = cohere.Client(COHERE_API_KEY)
 
37
  top_p,
38
  use_cohere_api,
39
  ):
40
+
41
+ query_embedding = embedding_model.encode([message], convert_to_tensor=True)
42
+ query_embedding_np = query_embedding.cpu().numpy()
43
+ distances, indices = index.search(query_embedding_np, k=1)
44
+ relevant_document = documents[indices[0][0]]
45
+ messages = [{"role": "system", "content": system_message},{{"role": "system", "content": f"context: {relevant_document}"}}]
46
+
47
+
48
 
49
  for val in history:
50
  if val[0]:
 
56
 
57
  response = ""
58
 
59
+
60
  if use_cohere_api:
61
  cohere_response = client_cohere.chat(
62
  message=message,
 
78
  response += token
79
  yield response
80
 
 
 
 
 
81
  demo = gr.ChatInterface(
82
  respond,
83
  additional_inputs=[
 
95
  ],
96
  )
97
 
 
98
  if __name__ == "__main__":
99
  demo.launch()
original.py CHANGED
@@ -1,11 +1,8 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
9
 
10
  def respond(
11
  message,
@@ -39,10 +36,6 @@ def respond(
39
  response += token
40
  yield response
41
 
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
  demo = gr.ChatInterface(
47
  respond,
48
  additional_inputs=[
@@ -59,6 +52,5 @@ demo = gr.ChatInterface(
59
  ],
60
  )
61
 
62
-
63
  if __name__ == "__main__":
64
  demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
 
 
 
 
4
 
5
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
6
 
7
  def respond(
8
  message,
 
36
  response += token
37
  yield response
38
 
 
 
 
 
39
  demo = gr.ChatInterface(
40
  respond,
41
  additional_inputs=[
 
52
  ],
53
  )
54
 
 
55
  if __name__ == "__main__":
56
  demo.launch()