Spaces:

UldisKK
/

TestRAGonPDFs

Sleeping

App Files Files Community

UldisKK commited on Jan 9, 2024

Commit

81e3d37

1 Parent(s): 874cdc5

add the rest of program

Browse files

Files changed (1) hide show

app.py +60 -0

app.py CHANGED Viewed

@@ -37,4 +37,64 @@ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=51
 texts = text_splitter.split_documents(pdf_pages)
 st.write('total chunks from pages:', len(texts))

 texts = text_splitter.split_documents(pdf_pages)
 st.write('total chunks from pages:', len(texts))
+st.write('loading chunks into vector db')
+model_name = "hkunlp/instructor-large"
+hf_embeddings = HuggingFaceInstructEmbeddings(
+    model_name = model_name)
+db = Chroma.from_documents(texts, hf_embeddings)
+st.write('loading LLM')
+model_name_or_path = "TheBloke/Llama-2-13B-chat-GPTQ"
+tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
+model_basename = "model"
+use_triton = False
+DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
+model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
+        model_basename=model_basename,
+        use_safetensors=True,
+        trust_remote_code=True,
+        device=DEVICE,
+        use_triton=use_triton,
+        quantize_config=None)
+st.write('setting up the chain')
+streamer = TextStreamer(tokenizer, skip_prompt = True, skip_special_tokens = True)
+text_pipeline = pipeline(task = 'text-generation', model = model, tokenizer = tokenizer, streamer = streamer)
+llm = HuggingFacePipeline(pipeline = text_pipeline)
+def generate_prompt(prompt, sys_prompt):
+    return f"[INST] <<SYS>> {sys_prompt} <</SYS>> {prompt} [/INST]"
+sys_prompt = "Use following piece of context to answer the question in less than 20 words"
+template = generate_prompt(
+    """
+    {context}
+    Question : {question}
+    """
+    , sys_prompt)
+prompt = PromptTemplate(template=template, input_variables=["context", "question"])
+chain_type_kwargs = {"prompt": prompt}
+qa_chain = RetrievalQA.from_chain_type(
+    llm=llm,
+    chain_type="stuff",
+    retriever=db.as_retriever(search_kwargs={"k": 2}),
+    return_source_documents = True,
+    chain_type_kwargs=chain_type_kwargs,
+)
+st.write('READY!!!')
+q1="what the author worked on ?"
+q2="where did author study?"
+q3="what author did ?"
+result = qa_chain(q1)
+st.write('question:', q1, 'result:', result)
+result = qa_chain(q2)
+st.write('question:', q2, 'result:', result)
+result = qa_chain(q3)
+st.write('question:', q3, 'result:', result)