Spaces:
Sleeping
Sleeping
Pratik Dwivedi
commited on
Commit
·
80effc2
1
Parent(s):
c856835
test new version
Browse files- .gitignore +0 -2
- app_OG.py +77 -69
.gitignore
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
.env
|
2 |
-
.gitattributes
|
|
|
|
|
|
app_OG.py
CHANGED
@@ -1,76 +1,84 @@
|
|
1 |
-
|
2 |
-
from
|
3 |
-
|
4 |
-
|
5 |
-
from langchain.chat_models import ChatOpenAI
|
6 |
-
from langchain.memory import ConversationBufferMemory
|
7 |
-
from langchain.chains import ConversationalRetrievalChain
|
8 |
-
from langchain.llms import HuggingFaceHub
|
9 |
|
10 |
-
def
|
11 |
-
text = ""
|
12 |
-
for pdf in pdf_docs:
|
13 |
-
pdf_reader = PdfReader(pdf)
|
14 |
-
for page in pdf_reader.pages:
|
15 |
-
text += page.extract_text()
|
16 |
-
return text
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
-
def
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
)
|
26 |
-
chunks = text_splitter.split_text(text)
|
27 |
-
return chunks
|
28 |
-
|
29 |
-
|
30 |
-
def get_vectorstore(text_chunks):
|
31 |
-
# embeddings = OpenAIEmbeddings()
|
32 |
-
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
|
33 |
-
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
34 |
-
return vectorstore
|
35 |
-
|
36 |
-
|
37 |
-
def get_conversation_chain(vectorstore):
|
38 |
-
# llm = ChatOpenAI()
|
39 |
-
llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
|
40 |
-
|
41 |
-
memory = ConversationBufferMemory(
|
42 |
-
# memory_key='chat_history',
|
43 |
-
return_messages=True)
|
44 |
-
conversation_chain = ConversationalRetrievalChain.from_llm(
|
45 |
-
llm=llm,
|
46 |
-
retriever=vectorstore.as_retriever(),
|
47 |
-
# memory=memory
|
48 |
-
)
|
49 |
-
return conversation_chain
|
50 |
-
|
51 |
-
|
52 |
-
def main():
|
53 |
-
|
54 |
-
# if "conversation" not in st.session_state:
|
55 |
-
# st.session_state.conversation = None
|
56 |
-
# if "chat_history" not in st.session_state:
|
57 |
-
# st.session_state.chat_history = None
|
58 |
-
|
59 |
-
# st.header("Chat with multiple PDFs :books:")
|
60 |
-
user_question = input("Ask a question about your documents:")
|
61 |
-
if user_question:
|
62 |
-
print(user_question)
|
63 |
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
|
|
73 |
|
|
|
74 |
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# import streamlit as st
|
2 |
+
# from llmware.prompts import Prompt
|
3 |
+
# import io, os, re
|
4 |
+
# import PyPDF2
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
# def register_gguf_model():
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
+
# prompter = Prompt()
|
9 |
+
# your_model_name = "llama"
|
10 |
+
# hf_repo_name = "TheBloke/Llama-2-7B-Chat-GGUF"
|
11 |
+
# model_file = "llama-2-7b-chat.Q3_K_M.gguf"
|
12 |
+
# print("registering models")
|
13 |
+
# prompter.model_catalog.register_gguf_model(your_model_name,hf_repo_name, model_file, prompt_wrapper="open_chat")
|
14 |
+
# your_model_name = "open_gpt4"
|
15 |
+
# hf_repo_name = "TheBloke/Open_Gpt4_8x7B-GGUF"
|
16 |
+
# model_file = "open_gpt4_8x7b.Q3_K_M.gguf"
|
17 |
+
# prompter.model_catalog.register_gguf_model(your_model_name,hf_repo_name, model_file, prompt_wrapper="open_chat")
|
18 |
+
# your_model_name = "phi2"
|
19 |
+
# hf_repo_name = "TheBloke/phi-2-GGUF"
|
20 |
+
# model_file = "phi-2.Q3_K_M.gguf"
|
21 |
+
# prompter.model_catalog.register_gguf_model(your_model_name,hf_repo_name, model_file, prompt_wrapper="open_chat")
|
22 |
+
# your_model_name = "mistral"
|
23 |
+
# hf_repo_name = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
|
24 |
+
# model_file = "mistral-7b-instruct-v0.2.Q3_K_M.gguf"
|
25 |
+
# prompter.model_catalog.register_gguf_model(your_model_name,hf_repo_name, model_file, prompt_wrapper="open_chat")
|
26 |
+
# return prompter
|
27 |
|
28 |
+
# def main():
|
29 |
+
# st.title("BetterZila RAG Enabled LLM")
|
30 |
+
# with st.spinner("Registering Models for use..."):
|
31 |
+
# prompter = register_gguf_model()
|
32 |
+
|
33 |
+
# data_path = "data/"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
+
# st.sidebar.subheader("Select Model")
|
36 |
+
# model_name = st.sidebar.selectbox("Select Model", ["llama", "open_gpt4", "phi2", "mistral"])
|
37 |
+
# with st.spinner("Loading Model..."):
|
38 |
+
# prompter.load_model(model_name)
|
39 |
+
# st.success("Model Loaded!")
|
40 |
+
|
41 |
+
# queries = ['Can you give me an example from history where the enemy was crushed totally from the book?', "What's the point of making myself less accessible?", "Can you tell me the story of Queen Elizabeth I from this 48 laws of power book?"]
|
42 |
+
|
43 |
+
# st.subheader("Query")
|
44 |
+
|
45 |
+
# with st.spinner("Loading PDF file..."):
|
46 |
+
# for file in os.listdir(data_path):
|
47 |
+
# if file.endswith(".pdf"):
|
48 |
+
# print("Found PDF file: ", file)
|
49 |
+
# pdf_file = file
|
50 |
+
# break
|
51 |
+
# print("loading Source...")
|
52 |
+
# source = prompter.add_source_document(data_path, pdf_file, query=None)
|
53 |
|
54 |
+
# for query in queries:
|
55 |
+
# st.subheader(f"Query: {query}")
|
56 |
+
# with st.spinner("Generating response..."):
|
57 |
+
# responses = prompter.prompt_with_source(query, prompt_name="just_the_facts", temperature=0.3)
|
58 |
+
|
59 |
+
# for r, response in enumerate(responses):
|
60 |
+
# st.write(query)
|
61 |
+
# st.write(re.sub("[\n]", " ", response["llm_response"]).strip())
|
62 |
|
63 |
+
# st.success("Responses generated!")
|
64 |
|
65 |
+
# # for query in queries:
|
66 |
+
# # st.subheader(f"Query: {query}")
|
67 |
+
# # with st.spinner("Generating response..."):
|
68 |
+
# # for file in os.listdir(data_path):
|
69 |
+
# # if file.endswith(".pdf"):
|
70 |
+
# # print("Found PDF file: ", file)
|
71 |
+
# # print("loading Source...")
|
72 |
+
# # source = prompter.add_source_document(data_path, file, query=None)
|
73 |
+
# # print("generating response...")
|
74 |
+
# # responses = prompter.prompt_with_source(query, prompt_name="just_the_facts", temperature=0.3)
|
75 |
+
# # print("response generated!")
|
76 |
+
# # for r, response in enumerate(responses):
|
77 |
+
# # print(query, ":", re.sub("[\n]"," ", response["llm_response"]).strip())
|
78 |
+
# # prompter.clear_source_materials()
|
79 |
+
# # st.write(query)
|
80 |
+
# # st.write(re.sub("[\n]"," ", response["llm_response"]).strip())
|
81 |
+
# # st.success("Response generated!")
|
82 |
+
|
83 |
+
# if __name__ == "__main__":
|
84 |
+
# main()
|