aklai commited on
Commit
0b8c276
·
1 Parent(s): 235cdce

Update space

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -7,7 +7,7 @@ from langchain_ollama.llms import OllamaLLM
7
 
8
  from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
9
 
10
- from langchain import hub
11
  from langchain_core.runnables import RunnableParallel
12
  from langchain_core.runnables import RunnablePassthrough
13
  from langchain_core.output_parsers import StrOutputParser
@@ -20,9 +20,9 @@ For more information on `huggingface_hub` Inference API support, please check th
20
 
21
  # LLM Model#
22
  llm = HuggingFacePipeline.from_model_id(
23
- model_id="meta-llama/Llama-3.2-3B",
24
  task="text-generation",
25
- pipeline_kwargs={"max_new_tokens": 10},
26
  )
27
 
28
  # Initialize embedding model "all-MiniLM-L6-v2"
@@ -33,7 +33,10 @@ vector_store = Chroma(persist_directory="./chroma_db", embedding_function=embedd
33
 
34
  # See full prompt at https://smith.langchain.com/hub/rlm/rag-prompt
35
  # Basically a solid prompt for RAG
36
- prompt = hub.pull("rlm/rag-prompt")
 
 
 
37
 
38
  # Define a new chain to return both the answer and the sources
39
  qa_chain_with_sources = (
@@ -52,13 +55,14 @@ qa_chain_with_sources = (
52
  # Function to call a RAG LLM query
53
  def rag_query(query, history):
54
  # Invoke the chain
55
- response = qa_chain_with_sources.invoke(query)
56
 
57
- answer = response["answer"]
58
- unique_sources = list(set(response["sources"]))
59
 
60
  # Print answers + sources
61
  output = f"Answer: {answer}\n\nSources:\n" + "\n".join(unique_sources)
 
62
  return output
63
 
64
 
 
7
 
8
  from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
9
 
10
+ #from langchain import hub
11
  from langchain_core.runnables import RunnableParallel
12
  from langchain_core.runnables import RunnablePassthrough
13
  from langchain_core.output_parsers import StrOutputParser
 
20
 
21
  # LLM Model#
22
  llm = HuggingFacePipeline.from_model_id(
23
+ model_id="llmware/bling-phi-3-gguf",
24
  task="text-generation",
25
+ pipeline_kwargs={"max_new_tokens": 100},
26
  )
27
 
28
  # Initialize embedding model "all-MiniLM-L6-v2"
 
33
 
34
  # See full prompt at https://smith.langchain.com/hub/rlm/rag-prompt
35
  # Basically a solid prompt for RAG
36
+ prompt = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
37
+ Question: {question}
38
+ Context: {context}
39
+ Answer:"""
40
 
41
  # Define a new chain to return both the answer and the sources
42
  qa_chain_with_sources = (
 
55
  # Function to call a RAG LLM query
56
  def rag_query(query, history):
57
  # Invoke the chain
58
+ r = qa_chain_with_sources.invoke(query)
59
 
60
+ answer = r["answer"]
61
+ unique_sources = list(set(r["sources"]))
62
 
63
  # Print answers + sources
64
  output = f"Answer: {answer}\n\nSources:\n" + "\n".join(unique_sources)
65
+
66
  return output
67
 
68