SansarK commited on
Commit
6abc97a
·
verified ·
1 Parent(s): 034b84a

Update rag_with_mircosoftphi2_and_hf_embeddings.py

Browse files
rag_with_mircosoftphi2_and_hf_embeddings.py CHANGED
@@ -1,28 +1,7 @@
1
- # -*- coding: utf-8 -*-
2
- """RAG_with_MircosoftPhi2_and_HF_Embeddings.ipynb
3
-
4
- Automatically generated by Colab.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/github/sumant1122/RAG-Phi2-LlamaIndex/blob/main/RAG_with_MircosoftPhi2_and_HF_Embeddings.ipynb
8
- """
9
-
10
- !pip install -q pypdf
11
- !pip install -q python-dotenv
12
- !pip install -q llama-index
13
- !pip install -q llama-index-llms-huggingface
14
- !pip install -q llama-index-embeddings-huggingface
15
- !pip install -q gradio
16
- !pip install einops
17
- !pip install accelerate
18
- !pip install -q llama-cpp-python
19
-
20
- !pip install llama-index-llms-llama-cpp llama-index-embeddings-huggingface
21
-
22
  from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,ServiceContext
23
  import torch
24
 
25
- documents = SimpleDirectoryReader("/content/rag").load_data()
26
 
27
  """New sectiond"""
28
 
@@ -38,20 +17,18 @@ query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")
38
  model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q4_0.gguf"
39
 
40
  llm = LlamaCPP(
41
- # You can pass in the URL to a GGML model to download it automatically
42
- model_url=model_url,
43
  # optionally, you can set the path to a pre-downloaded model instead of model_url
44
- model_path=None,
45
  temperature=0.1,
46
  max_new_tokens=256,
47
  # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
48
- context_window=3900,
49
  # kwargs to pass to __call__()
50
  generate_kwargs={},
51
  # kwargs to pass to __init__()
52
  # set to at least 1 to use GPU
53
- model_kwargs={"n_gpu_layers": 1},
54
- verbose=True,
55
  )
56
 
57
  """HuggingFace Embeddings"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,ServiceContext
2
  import torch
3
 
4
+ documents = SimpleDirectoryReader("data").load_data()
5
 
6
  """New sectiond"""
7
 
 
17
  model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q4_0.gguf"
18
 
19
  llm = LlamaCPP(
 
 
20
  # optionally, you can set the path to a pre-downloaded model instead of model_url
21
+ model_path="model.gguf",
22
  temperature=0.1,
23
  max_new_tokens=256,
24
  # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
25
+ context_window=4096,
26
  # kwargs to pass to __call__()
27
  generate_kwargs={},
28
  # kwargs to pass to __init__()
29
  # set to at least 1 to use GPU
30
+ model_kwargs={"n_gpu_layers": 0},
31
+ verbose=True
32
  )
33
 
34
  """HuggingFace Embeddings"""