Spaces:
Sleeping
Sleeping
Pratik Dwivedi
commited on
Commit
·
00062c3
1
Parent(s):
ca2a0f8
document loader
Browse files- app.py +21 -30
- data/48lawsofpower.pdf +0 -0
- requirements.txt +2 -1
app.py
CHANGED
@@ -1,8 +1,11 @@
|
|
1 |
import streamlit as st
|
2 |
from llmware.prompts import Prompt
|
3 |
import requests
|
4 |
-
import io
|
5 |
import PyPDF2
|
|
|
|
|
|
|
6 |
|
7 |
def register_gguf_model():
|
8 |
|
@@ -29,48 +32,36 @@ def register_gguf_model():
|
|
29 |
# prompter.load_model(your_model_name)
|
30 |
return prompter
|
31 |
|
32 |
-
def load_pdf_from_url(url):
|
33 |
-
r = requests.get(url)
|
34 |
-
f = io.BytesIO(r.content)
|
35 |
-
file = PyPDF2.PdfReader(f)
|
36 |
-
return file
|
37 |
-
|
38 |
-
def load_pdf_content(pdf):
|
39 |
-
content = ""
|
40 |
-
for page in pdf.pages:
|
41 |
-
content += page.extract_text()
|
42 |
-
return content
|
43 |
-
|
44 |
-
|
45 |
def main():
|
46 |
st.title("BetterZila RAG Enabled LLM")
|
47 |
with st.spinner("Registering Models for use..."):
|
48 |
prompter = register_gguf_model()
|
49 |
-
|
|
|
|
|
|
|
|
|
50 |
model_name = st.sidebar.selectbox("Select Model", ["llama", "open_gpt4", "phi2", "mistral"])
|
51 |
-
with st.spinner("Loading
|
52 |
-
print(model_name)
|
53 |
prompter.load_model(model_name)
|
54 |
-
|
55 |
-
with st.spinner("Loading PDF content from the assignment URL..."):
|
56 |
-
url = "https://pgcag.files.wordpress.com/2010/01/48lawsofpower.pdf"
|
57 |
-
pdf = load_pdf_from_url(url)
|
58 |
-
content = load_pdf_content(pdf)
|
59 |
-
print("Loaded PDF content")
|
60 |
-
st.success("PDF content loaded!")
|
61 |
|
62 |
queries = ['Can you give me an example from history where the enemy was crushed totally from the book?', "What's the point of making myself less accessible?", "Can you tell me the story of Queen Elizabeth I from this 48 laws of power book?"]
|
63 |
|
64 |
for query in queries:
|
65 |
st.subheader(f"Query: {query}")
|
66 |
with st.spinner("Generating response..."):
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
72 |
st.success("Response generated!")
|
73 |
-
|
74 |
|
75 |
if __name__ == "__main__":
|
76 |
main()
|
|
|
1 |
import streamlit as st
|
2 |
from llmware.prompts import Prompt
|
3 |
import requests
|
4 |
+
import io, os, re
|
5 |
import PyPDF2
|
6 |
+
from langchain.text_splitter import CharacterTextSplitter
|
7 |
+
from langchain.embeddings import HuggingFaceInstructEmbeddings
|
8 |
+
from langchain.vectorstores import FAISS
|
9 |
|
10 |
def register_gguf_model():
|
11 |
|
|
|
32 |
# prompter.load_model(your_model_name)
|
33 |
return prompter
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
def main():
|
36 |
st.title("BetterZila RAG Enabled LLM")
|
37 |
with st.spinner("Registering Models for use..."):
|
38 |
prompter = register_gguf_model()
|
39 |
+
|
40 |
+
data_path = "data/"
|
41 |
+
|
42 |
+
# keep the select box to llama as default but give a button right below it that says select model after which the model will be loaded
|
43 |
+
st.sidebar.subheader("Select Model")
|
44 |
model_name = st.sidebar.selectbox("Select Model", ["llama", "open_gpt4", "phi2", "mistral"])
|
45 |
+
with st.spinner("Loading Model..."):
|
|
|
46 |
prompter.load_model(model_name)
|
47 |
+
st.success("Model Loaded!")
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
queries = ['Can you give me an example from history where the enemy was crushed totally from the book?', "What's the point of making myself less accessible?", "Can you tell me the story of Queen Elizabeth I from this 48 laws of power book?"]
|
50 |
|
51 |
for query in queries:
|
52 |
st.subheader(f"Query: {query}")
|
53 |
with st.spinner("Generating response..."):
|
54 |
+
for file in os.listdir(data_path):
|
55 |
+
if file.endswith(".pdf"):
|
56 |
+
source = prompter.add_source_document(data_path, file, query=None)
|
57 |
+
responses = prompter.prompt_with_source(query, prompt_name="just_the_facts", temperature=0.3)
|
58 |
+
for r, response in enumerate(responses):
|
59 |
+
print(query, ":", re.sub("[\n]"," ", response["llm_response"]).strip())
|
60 |
+
prompter.clear_source_materials()
|
61 |
+
st.write(query)
|
62 |
+
st.write(re.sub("[\n]"," ", response["llm_response"]).strip())
|
63 |
st.success("Response generated!")
|
64 |
+
|
65 |
|
66 |
if __name__ == "__main__":
|
67 |
main()
|
data/48lawsofpower.pdf
ADDED
Binary file (105 kB). View file
|
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
llmware
|
2 |
streamlit
|
3 |
requests
|
4 |
-
PyPDF2
|
|
|
|
1 |
llmware
|
2 |
streamlit
|
3 |
requests
|
4 |
+
PyPDF2
|
5 |
+
langchain
|