Spaces:
Running
Running
File size: 3,122 Bytes
7d9087b 8d6933e 7d9087b 8d6933e 7d9087b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import ollama
from langchain.chains import RetrievalQA
from langchain.chains import create_retrieval_chain
from langchain_ollama import OllamaLLM
from services.pdf_processing import load_and_split_pdf
from services.vector_store import create_vector_store
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import PromptTemplate
import streamlit as st
PROMPT_TEMPLATE = """Question: {context}
Answer: Let's think step by step."""
@st.cache_resource
def initialize_qa_chain(filepath, model_name, temperature, top_p, max_tokens):
# Load and split the PDF
splits = load_and_split_pdf(filepath)
vectordb = create_vector_store(splits)
# Use Ollama or Hugging Face LLM
# Configure the LLM with additional parameters
llm = OllamaLLM(
model=model_name,
# base_url="https://deepak7376-ollama-server.hf.space",
temperature=temperature, # Controls randomness (0 = deterministic, 1 = max randomness)
max_tokens=max_tokens, # Limit the number of tokens in the output
top_p=top_p # Nucleus sampling for controlling diversity
)
# # Define strict retrieval-based prompting
# prompt_template = PromptTemplate(
# template=(
# "You are an AI assistant that only answers questions based on the provided document. "
# "Do not use external knowledge. If you cannot find an answer in the document, respond with: 'I don't know.'\n\n"
# "Document Context:\n{context}\n\n"
# "User Question: {query}\n\n"
# "Assistant Answer:"
# ),
# input_variables=["context", "query"]
# )
system_prompt = (
"Use the given context to answer the question. "
"If you don't know the answer, say you don't know. "
"Use three sentence maximum and keep the answer concise. "
"Context: {context}"
)
prompt = ChatPromptTemplate.from_messages(
[
("system", system_prompt),
("human", "{input}"),
]
)
question_answer_chain = create_stuff_documents_chain(llm, prompt)
chain = create_retrieval_chain(vectordb.as_retriever(), question_answer_chain)
# return RetrievalQA.from_chain_type(
# llm=llm,
# chain_type="stuff",
# retriever=vectordb.as_retriever(),
# chain_type_kwargs={"prompt": prompt_template}
# )
return chain
@st.cache_resource
def initialize_chain(model_name, temperature, top_p, max_tokens):
# Use Ollama or Hugging Face LLM
# Configure the LLM with additional parameters
llm = OllamaLLM(
model=model_name,
# base_url="https://deepak7376-ollama-server.hf.space",
temperature=temperature, # Controls randomness (0 = deterministic, 1 = max randomness)
max_tokens=max_tokens, # Limit the number of tokens in the output
top_p=top_p # Nucleus sampling for controlling diversity
)
prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
chain = prompt | llm
return chain
|