Spaces:

Deepak7376
/

DocChatAI

Running

Deepak Yadav

updated new version deepseek-r1

8d6933e 5 months ago

3.12 kB

	import ollama
	from langchain.chains import RetrievalQA
	from langchain.chains import create_retrieval_chain
	from langchain_ollama import OllamaLLM
	from services.pdf_processing import load_and_split_pdf
	from services.vector_store import create_vector_store
	from langchain.chains.combine_documents import create_stuff_documents_chain
	from langchain_core.prompts import ChatPromptTemplate
	from langchain.prompts import PromptTemplate
	import streamlit as st

	PROMPT_TEMPLATE = """Question: {context}

	Answer: Let's think step by step."""

	@st.cache_resource
	def initialize_qa_chain(filepath, model_name, temperature, top_p, max_tokens):
	# Load and split the PDF
	splits = load_and_split_pdf(filepath)
	vectordb = create_vector_store(splits)

	# Use Ollama or Hugging Face LLM
	# Configure the LLM with additional parameters
	llm = OllamaLLM(
	model=model_name,
	# base_url="https://deepak7376-ollama-server.hf.space",
	temperature=temperature, # Controls randomness (0 = deterministic, 1 = max randomness)
	max_tokens=max_tokens, # Limit the number of tokens in the output
	top_p=top_p # Nucleus sampling for controlling diversity
	)


	# # Define strict retrieval-based prompting
	# prompt_template = PromptTemplate(
	# template=(
	# "You are an AI assistant that only answers questions based on the provided document. "
	# "Do not use external knowledge. If you cannot find an answer in the document, respond with: 'I don't know.'\n\n"
	# "Document Context:\n{context}\n\n"
	# "User Question: {query}\n\n"
	# "Assistant Answer:"
	# ),
	# input_variables=["context", "query"]
	# )

	system_prompt = (
	"Use the given context to answer the question. "
	"If you don't know the answer, say you don't know. "
	"Use three sentence maximum and keep the answer concise. "
	"Context: {context}"
	)
	prompt = ChatPromptTemplate.from_messages(
	[
	("system", system_prompt),
	("human", "{input}"),
	]
	)
	question_answer_chain = create_stuff_documents_chain(llm, prompt)
	chain = create_retrieval_chain(vectordb.as_retriever(), question_answer_chain)

	# return RetrievalQA.from_chain_type(
	# llm=llm,
	# chain_type="stuff",
	# retriever=vectordb.as_retriever(),
	# chain_type_kwargs={"prompt": prompt_template}
	# )
	return chain

	@st.cache_resource
	def initialize_chain(model_name, temperature, top_p, max_tokens):
	# Use Ollama or Hugging Face LLM
	# Configure the LLM with additional parameters
	llm = OllamaLLM(
	model=model_name,
	# base_url="https://deepak7376-ollama-server.hf.space",
	temperature=temperature, # Controls randomness (0 = deterministic, 1 = max randomness)
	max_tokens=max_tokens, # Limit the number of tokens in the output
	top_p=top_p # Nucleus sampling for controlling diversity
	)



	prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)

	chain = prompt \| llm

	return chain