mobinln commited on
Commit
3af157b
1 Parent(s): 7565566
Files changed (5) hide show
  1. .gitignore +3 -0
  2. app.py +64 -0
  3. llm.py +61 -0
  4. requirements.txt +7 -0
  5. vector_store.py +40 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ /__pycache__
2
+ /temp
3
+ /models
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from llm import load_llm, response_generator
3
+ from vector_store import load_vector_store, process_pdf
4
+ from uuid import uuid4
5
+
6
+ # repo_id = "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
7
+ # filename = "qwen2.5-0.5b-instruct-q8_0.gguf"
8
+ repo_id = "MaziyarPanahi/Qwen2.5-7B-Instruct-GGUF"
9
+ filename = "Qwen2.5-7B-Instruct.Q4_K_S.gguf"
10
+
11
+ llm = load_llm(repo_id, filename)
12
+
13
+ st.title("PDF QA")
14
+ # Initialize chat history
15
+ if "messages" not in st.session_state:
16
+ st.session_state.messages = []
17
+
18
+ # Display chat messages from history on app rerun
19
+ for message in st.session_state.messages:
20
+ with st.chat_message(message["role"]):
21
+ if message["role"] == "user":
22
+ st.markdown(message["content"])
23
+ else:
24
+ st.code(message["content"])
25
+
26
+ # Accept user input
27
+ if prompt := st.chat_input("What is up?"):
28
+ # Add user message to chat history
29
+ st.session_state.messages.append({"role": "user", "content": prompt})
30
+ # Display user message in chat message container
31
+ with st.chat_message("user"):
32
+ st.markdown(prompt)
33
+
34
+ # Display assistant response in chat message container
35
+ with st.chat_message("assistant"):
36
+ vector_store = load_vector_store()
37
+ retriever = vector_store.as_retriever()
38
+ docs = retriever.get_relevant_documents(prompt)
39
+
40
+ response = response_generator(llm, st.session_state.messages, prompt, retriever)
41
+
42
+ st.markdown(response["answer"])
43
+
44
+ # Add assistant response to chat history
45
+ st.session_state.messages.append(
46
+ {"role": "assistant", "content": response["answer"]}
47
+ )
48
+
49
+ with st.sidebar:
50
+ st.title("PDFs")
51
+ st.write("Upload your pdfs here")
52
+ uploaded_files = st.file_uploader(
53
+ "Choose a PDF file", accept_multiple_files=True, type="pdf"
54
+ )
55
+ if uploaded_files is not None:
56
+ vector_store = load_vector_store()
57
+ for uploaded_file in uploaded_files:
58
+ temp_file = f"./temp/{uploaded_file.name}-{uuid4()}.pdf"
59
+ with open(temp_file, "wb") as file:
60
+ file.write(uploaded_file.getvalue())
61
+
62
+ st.write("filename:", uploaded_file.name)
63
+ process_pdf(temp_file, vector_store)
64
+ st.success("PDFs uploaded successfully. ✅")
llm.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pathlib
3
+
4
+ from huggingface_hub import hf_hub_download
5
+ from langchain_community.llms import LlamaCpp
6
+ from langchain.chains import create_retrieval_chain
7
+ from langchain.chains.combine_documents import create_stuff_documents_chain
8
+ from langchain_core.prompts import ChatPromptTemplate
9
+
10
+
11
+ @st.cache_resource()
12
+ def load_llm(repo_id, filename):
13
+ # Create a directory for models if it doesn't exist
14
+ models_folder = pathlib.Path("models")
15
+ models_folder.mkdir(exist_ok=True)
16
+
17
+ # Download the model
18
+ model_path = hf_hub_download(
19
+ repo_id=repo_id, filename=filename, local_dir=models_folder
20
+ )
21
+
22
+ llm = LlamaCpp(
23
+ model_path=model_path,
24
+ repo_id=repo_id,
25
+ filename=filename,
26
+ verbose=False,
27
+ use_mmap=True,
28
+ use_mlock=True,
29
+ n_threads=4,
30
+ n_threads_batch=4,
31
+ n_ctx=8000,
32
+ )
33
+ print(f"{repo_id} loaded successfully. ✅")
34
+ return llm
35
+
36
+
37
+ # Streamed response emulator
38
+ def response_generator(llm, messages, question, retriever):
39
+ system_prompt = (
40
+ "You are an assistant for question-answering tasks. "
41
+ "Use the following pieces of retrieved context to answer "
42
+ "the question. If you don't know the answer, say that you "
43
+ "don't know. Use three sentences maximum and keep the "
44
+ "answer concise."
45
+ "\n\n"
46
+ "{context}"
47
+ )
48
+
49
+ prompt = ChatPromptTemplate.from_messages(
50
+ [
51
+ ("system", system_prompt),
52
+ ("user", "{input}"),
53
+ ]
54
+ )
55
+
56
+ question_answer_chain = create_stuff_documents_chain(llm, prompt)
57
+ rag_chain = create_retrieval_chain(retriever, question_answer_chain)
58
+
59
+ results = rag_chain.invoke({"input": question})
60
+
61
+ return results
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ huggingface_hub
2
+ llama-cpp-python
3
+ pypdf
4
+ langchain_community
5
+ chromadb
6
+ langchain-huggingface
7
+ langchain-chroma
vector_store.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_huggingface import HuggingFaceEmbeddings
3
+ from langchain_chroma import Chroma
4
+ from langchain_community.document_loaders import PyPDFLoader
5
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
6
+
7
+
8
+ @st.cache_resource()
9
+ def load_embedding_model(model):
10
+ """
11
+ sentence-transformers/all-mpnet-base-v2
12
+ sentence-transformers/all-MiniLM-L6-v2
13
+ """
14
+ model = HuggingFaceEmbeddings(model_name=model)
15
+ return model
16
+
17
+
18
+ def load_vector_store():
19
+ """
20
+ Loads a simple vector store
21
+ I didn't use @st.cache because I want to
22
+ load vector store on every page load
23
+ """
24
+ model = load_embedding_model("sentence-transformers/all-MiniLM-L6-v2")
25
+ vector_store = Chroma(
26
+ collection_name="main_store",
27
+ embedding_function=model,
28
+ )
29
+ return vector_store
30
+
31
+
32
+ def process_pdf(pdf, vector_store):
33
+ """
34
+ Loads a pdf and splits it into chunks
35
+ """
36
+ loader = PyPDFLoader(pdf)
37
+ docs = loader.load()
38
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
39
+ splits = text_splitter.split_documents(docs)
40
+ vector_store.add_documents(splits)