aklai commited on
Commit
a81cab2
·
1 Parent(s): 0b8c276

Update space

Browse files
Files changed (2) hide show
  1. app.py +27 -7
  2. requirements.txt +1 -1
app.py CHANGED
@@ -11,6 +11,7 @@ from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
11
  from langchain_core.runnables import RunnableParallel
12
  from langchain_core.runnables import RunnablePassthrough
13
  from langchain_core.output_parsers import StrOutputParser
 
14
  from langchain_chroma import Chroma
15
 
16
 
@@ -18,12 +19,31 @@ from langchain_chroma import Chroma
18
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
19
  """
20
 
21
- # LLM Model#
22
- llm = HuggingFacePipeline.from_model_id(
23
- model_id="llmware/bling-phi-3-gguf",
24
- task="text-generation",
25
- pipeline_kwargs={"max_new_tokens": 100},
26
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  # Initialize embedding model "all-MiniLM-L6-v2"
29
  embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
@@ -32,7 +52,7 @@ embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
32
  vector_store = Chroma(persist_directory="./chroma_db", embedding_function=embedding_model)
33
 
34
  # See full prompt at https://smith.langchain.com/hub/rlm/rag-prompt
35
- # Basically a solid prompt for RAG
36
  prompt = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
37
  Question: {question}
38
  Context: {context}
 
11
  from langchain_core.runnables import RunnableParallel
12
  from langchain_core.runnables import RunnablePassthrough
13
  from langchain_core.output_parsers import StrOutputParser
14
+ from langchain_core.prompts import ChatPromptTemplate
15
  from langchain_chroma import Chroma
16
 
17
 
 
19
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
20
  """
21
 
22
+ from langchain.llms.base import LLM
23
+ from typing import Optional, List, Mapping, Any
24
+ import subprocess
25
+
26
+ class LlamaCppLLM(LLM):
27
+ model_path: str # Path to the GGUF model
28
+ n_ctx: int = 2048 # Context window size
29
+
30
+ def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
31
+ # Use llama.cpp to generate a response
32
+ command = [
33
+ "./main", # Path to llama.cpp executable
34
+ "-m", self.model_path,
35
+ "-p", prompt,
36
+ "--ctx-size", str(self.n_ctx),
37
+ ]
38
+ result = subprocess.run(command, capture_output=True, text=True)
39
+ return result.stdout
40
+
41
+ @property
42
+ def _llm_type(self) -> str:
43
+ return "llama-cpp"
44
+
45
+ # Initialize the custom LLM
46
+ llm = LlamaCppLLM(model_path="path/to/bling-phi-3-gguf.bin")
47
 
48
  # Initialize embedding model "all-MiniLM-L6-v2"
49
  embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
 
52
  vector_store = Chroma(persist_directory="./chroma_db", embedding_function=embedding_model)
53
 
54
  # See full prompt at https://smith.langchain.com/hub/rlm/rag-prompt
55
+ # Define the prompt
56
  prompt = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
57
  Question: {question}
58
  Context: {context}
requirements.txt CHANGED
@@ -11,4 +11,4 @@ langchain-ollama
11
  chromadb
12
  pypdf
13
  bs4
14
- langchain-chroma
 
11
  chromadb
12
  pypdf
13
  bs4
14
+ langchain-chroma