Spaces:

laiaaron
/

WEHI_Student_Organiser_RAG_LLM

Sleeping

App Files Files Community

aklai commited on 22 days ago

Commit

a81cab2

1 Parent(s): 0b8c276

Update space

Browse files

Files changed (2) hide show

app.py +27 -7
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
 from langchain_core.runnables import RunnableParallel
 from langchain_core.runnables import RunnablePassthrough
 from langchain_core.output_parsers import StrOutputParser
 from langchain_chroma import Chroma
@@ -18,12 +19,31 @@ from langchain_chroma import Chroma
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
-# LLM Model#
-llm = HuggingFacePipeline.from_model_id(
-    model_id="llmware/bling-phi-3-gguf",
-    task="text-generation",
-    pipeline_kwargs={"max_new_tokens": 100},
-)
 # Initialize embedding model "all-MiniLM-L6-v2"
 embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
@@ -32,7 +52,7 @@ embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
 vector_store = Chroma(persist_directory="./chroma_db", embedding_function=embedding_model)
 # See full prompt at https://smith.langchain.com/hub/rlm/rag-prompt
-# Basically a solid prompt for RAG
 prompt = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
 Question: {question}
 Context: {context}

 from langchain_core.runnables import RunnableParallel
 from langchain_core.runnables import RunnablePassthrough
 from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
 from langchain_chroma import Chroma
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
+from langchain.llms.base import LLM
+from typing import Optional, List, Mapping, Any
+import subprocess
+class LlamaCppLLM(LLM):
+    model_path: str  # Path to the GGUF model
+    n_ctx: int = 2048  # Context window size
+    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+        # Use llama.cpp to generate a response
+        command = [
+            "./main",  # Path to llama.cpp executable
+            "-m", self.model_path,
+            "-p", prompt,
+            "--ctx-size", str(self.n_ctx),
+        ]
+        result = subprocess.run(command, capture_output=True, text=True)
+        return result.stdout
+    @property
+    def _llm_type(self) -> str:
+        return "llama-cpp"
+# Initialize the custom LLM
+llm = LlamaCppLLM(model_path="path/to/bling-phi-3-gguf.bin")
 # Initialize embedding model "all-MiniLM-L6-v2"
 embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
 vector_store = Chroma(persist_directory="./chroma_db", embedding_function=embedding_model)
 # See full prompt at https://smith.langchain.com/hub/rlm/rag-prompt
+# Define the prompt
 prompt = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
 Question: {question}
 Context: {context}

requirements.txt CHANGED Viewed

@@ -11,4 +11,4 @@ langchain-ollama
 chromadb
 pypdf
 bs4
-langchain-chroma

 chromadb
 pypdf
 bs4
+langchain-chroma