Spaces:
Running
Running
import ollama | |
from langchain.chains import RetrievalQA | |
from langchain.chains import create_retrieval_chain | |
from langchain_ollama import OllamaLLM | |
from services.pdf_processing import load_and_split_pdf | |
from services.vector_store import create_vector_store | |
from langchain.chains.combine_documents import create_stuff_documents_chain | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain.prompts import PromptTemplate | |
import streamlit as st | |
PROMPT_TEMPLATE = """Question: {context} | |
Answer: Let's think step by step.""" | |
def initialize_qa_chain(filepath, model_name, temperature, top_p, max_tokens): | |
# Load and split the PDF | |
splits = load_and_split_pdf(filepath) | |
vectordb = create_vector_store(splits) | |
# Use Ollama or Hugging Face LLM | |
# Configure the LLM with additional parameters | |
llm = OllamaLLM( | |
model=model_name, | |
# base_url="https://deepak7376-ollama-server.hf.space", | |
temperature=temperature, # Controls randomness (0 = deterministic, 1 = max randomness) | |
max_tokens=max_tokens, # Limit the number of tokens in the output | |
top_p=top_p # Nucleus sampling for controlling diversity | |
) | |
# # Define strict retrieval-based prompting | |
# prompt_template = PromptTemplate( | |
# template=( | |
# "You are an AI assistant that only answers questions based on the provided document. " | |
# "Do not use external knowledge. If you cannot find an answer in the document, respond with: 'I don't know.'\n\n" | |
# "Document Context:\n{context}\n\n" | |
# "User Question: {query}\n\n" | |
# "Assistant Answer:" | |
# ), | |
# input_variables=["context", "query"] | |
# ) | |
system_prompt = ( | |
"Use the given context to answer the question. " | |
"If you don't know the answer, say you don't know. " | |
"Use three sentence maximum and keep the answer concise. " | |
"Context: {context}" | |
) | |
prompt = ChatPromptTemplate.from_messages( | |
[ | |
("system", system_prompt), | |
("human", "{input}"), | |
] | |
) | |
question_answer_chain = create_stuff_documents_chain(llm, prompt) | |
chain = create_retrieval_chain(vectordb.as_retriever(), question_answer_chain) | |
# return RetrievalQA.from_chain_type( | |
# llm=llm, | |
# chain_type="stuff", | |
# retriever=vectordb.as_retriever(), | |
# chain_type_kwargs={"prompt": prompt_template} | |
# ) | |
return chain | |
def initialize_chain(model_name, temperature, top_p, max_tokens): | |
# Use Ollama or Hugging Face LLM | |
# Configure the LLM with additional parameters | |
llm = OllamaLLM( | |
model=model_name, | |
# base_url="https://deepak7376-ollama-server.hf.space", | |
temperature=temperature, # Controls randomness (0 = deterministic, 1 = max randomness) | |
max_tokens=max_tokens, # Limit the number of tokens in the output | |
top_p=top_p # Nucleus sampling for controlling diversity | |
) | |
prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE) | |
chain = prompt | llm | |
return chain | |