from src.helper import download_hugging_face_embeddings from langchain_pinecone import PineconeVectorStore from langchain.prompts import PromptTemplate from langchain_community.llms import CTransformers from langchain.chains import RetrievalQA from dotenv import load_dotenv from prompt import prompt_template import os load_dotenv() PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY") PINECONE_API_ENV = os.environ.get("PINECONE_API_ENV") embeddings = download_hugging_face_embeddings() index_name = "llm-chatbot" # Initializing the Pinecone docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings) PROMPT = PromptTemplate( template=prompt_template, input_variables=["context", "question"] ) chain_type_kwargs = {"prompt": PROMPT} current_dir = os.getcwd() llm = CTransformers( model=os.path.join(current_dir, "saved_models/llama-2-7b-chat.ggmlv3.q4_0.bin"), model_type="llama", streaming=True, config={"max_new_tokens": 256, "temperature": 0.6, "context_length": -1}, ) qa = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(search_kwargs={"k": 2}), return_source_documents=True, chain_type_kwargs=chain_type_kwargs, verbose=True, ) def llama_call(input): result = qa.invoke({"query": input}) return str(result["result"])