Spaces:
Sleeping
Sleeping
Merge branch 'main' of https://huggingface.co/spaces/zhtet/RegBotBeta
Browse files- .gitattributes +2 -0
- assets/pdf/CADWReg.pdf +3 -0
- models/langOpen.py +7 -5
- models/llamaCustom.py +21 -18
- pages/langchain_demo.py +1 -1
- pages/llama_custom_demo.py +6 -3
- utils/chatbox.py +0 -1
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
.*pdf filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
.*pdf filter=lfs diff=lfs merge=lfs -text
|
36 |
+
assets/pdf/calregs.pdf filter=lfs diff=lfs merge=lfs -text
|
37 |
+
assets/pdf/CADWReg.pdf filter=lfs diff=lfs merge=lfs -text
|
assets/pdf/CADWReg.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a80bdec9f2f6c15ca5d9181723b711bb7428fa48babaf87bab3f3b690055f54
|
3 |
+
size 3724564
|
models/langOpen.py
CHANGED
@@ -4,13 +4,15 @@ import openai
|
|
4 |
from dotenv import load_dotenv
|
5 |
from langchain.chains import LLMChain
|
6 |
from langchain.chat_models import ChatOpenAI
|
|
|
7 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
8 |
from langchain.prompts import PromptTemplate
|
9 |
from langchain.vectorstores import FAISS
|
10 |
|
11 |
-
|
|
|
12 |
|
13 |
-
|
14 |
|
15 |
prompt_template = """Answer the question using the given context to the best of your ability.
|
16 |
If you don't know, answer I don't know.
|
@@ -28,12 +30,12 @@ class LangOpen:
|
|
28 |
|
29 |
def initialize_index(self, index_name):
|
30 |
path = f"./vectorStores/{index_name}"
|
|
|
|
|
31 |
if os.path.exists(path=path):
|
32 |
return FAISS.load_local(folder_path=path, embeddings=embeddings)
|
33 |
else:
|
34 |
-
faiss = FAISS.
|
35 |
-
"./assets/updated_calregs.txt", embedding=embeddings
|
36 |
-
)
|
37 |
faiss.save_local(path)
|
38 |
return faiss
|
39 |
|
|
|
4 |
from dotenv import load_dotenv
|
5 |
from langchain.chains import LLMChain
|
6 |
from langchain.chat_models import ChatOpenAI
|
7 |
+
from langchain.document_loaders import PyPDFLoader
|
8 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
9 |
from langchain.prompts import PromptTemplate
|
10 |
from langchain.vectorstores import FAISS
|
11 |
|
12 |
+
loader = PyPDFLoader("./assets/pdf/CADWReg.pdf")
|
13 |
+
pages = loader.load_and_split()
|
14 |
|
15 |
+
load_dotenv()
|
16 |
|
17 |
prompt_template = """Answer the question using the given context to the best of your ability.
|
18 |
If you don't know, answer I don't know.
|
|
|
30 |
|
31 |
def initialize_index(self, index_name):
|
32 |
path = f"./vectorStores/{index_name}"
|
33 |
+
embeddings = OpenAIEmbeddings()
|
34 |
+
|
35 |
if os.path.exists(path=path):
|
36 |
return FAISS.load_local(folder_path=path, embeddings=embeddings)
|
37 |
else:
|
38 |
+
faiss = FAISS.from_documents(pages, embeddings)
|
|
|
|
|
39 |
faiss.save_local(path)
|
40 |
return faiss
|
41 |
|
models/llamaCustom.py
CHANGED
@@ -5,6 +5,7 @@ from typing import Any, List, Mapping, Optional
|
|
5 |
|
6 |
import numpy as np
|
7 |
import openai
|
|
|
8 |
import pandas as pd
|
9 |
from dotenv import load_dotenv
|
10 |
from huggingface_hub import HfFileSystem
|
@@ -35,12 +36,6 @@ NUM_OUTPUT = 525
|
|
35 |
# set maximum chunk overlap
|
36 |
CHUNK_OVERLAP_RATION = 0.2
|
37 |
|
38 |
-
prompt_helper = PromptHelper(
|
39 |
-
context_window=CONTEXT_WINDOW,
|
40 |
-
num_output=NUM_OUTPUT,
|
41 |
-
chunk_overlap_ratio=CHUNK_OVERLAP_RATION,
|
42 |
-
)
|
43 |
-
|
44 |
llm_model_name = "bigscience/bloom-560m"
|
45 |
tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
|
46 |
model = AutoModelForCausalLM.from_pretrained(llm_model_name, config="T5Config")
|
@@ -70,24 +65,21 @@ class CustomLLM(LLM):
|
|
70 |
|
71 |
@property
|
72 |
def _identifying_params(self) -> Mapping[str, Any]:
|
73 |
-
return {"name_of_model":
|
74 |
|
75 |
@property
|
76 |
def _llm_type(self) -> str:
|
77 |
return "custom"
|
78 |
|
79 |
-
|
80 |
class LlamaCustom:
|
81 |
-
# define llm
|
82 |
-
llm_predictor = LLMPredictor(llm=CustomLLM())
|
83 |
-
service_context = ServiceContext.from_defaults(
|
84 |
-
llm_predictor=llm_predictor, prompt_helper=prompt_helper
|
85 |
-
)
|
86 |
|
87 |
-
def __init__(self,
|
88 |
-
self.vector_index = self.initialize_index(
|
89 |
|
90 |
-
def initialize_index(self,
|
|
|
|
|
91 |
file_path = f"./vectorStores/{index_name}"
|
92 |
if os.path.exists(path=file_path):
|
93 |
# rebuild storage context
|
@@ -101,11 +93,22 @@ class LlamaCustom:
|
|
101 |
# index = pickle.loads(file.readlines())
|
102 |
return index
|
103 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
# documents = prepare_data(r"./assets/regItems.json")
|
105 |
documents = SimpleDirectoryReader(input_dir="./assets/pdf").load_data()
|
106 |
|
107 |
index = GPTVectorStoreIndex.from_documents(
|
108 |
-
documents, service_context=
|
109 |
)
|
110 |
|
111 |
# local write access
|
@@ -120,4 +123,4 @@ class LlamaCustom:
|
|
120 |
print("query_str: ", query_str)
|
121 |
query_engine = self.vector_index.as_query_engine()
|
122 |
response = query_engine.query(query_str)
|
123 |
-
return str(response)
|
|
|
5 |
|
6 |
import numpy as np
|
7 |
import openai
|
8 |
+
import streamlit as st
|
9 |
import pandas as pd
|
10 |
from dotenv import load_dotenv
|
11 |
from huggingface_hub import HfFileSystem
|
|
|
36 |
# set maximum chunk overlap
|
37 |
CHUNK_OVERLAP_RATION = 0.2
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
llm_model_name = "bigscience/bloom-560m"
|
40 |
tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
|
41 |
model = AutoModelForCausalLM.from_pretrained(llm_model_name, config="T5Config")
|
|
|
65 |
|
66 |
@property
|
67 |
def _identifying_params(self) -> Mapping[str, Any]:
|
68 |
+
return {"name_of_model": llm_model_name}
|
69 |
|
70 |
@property
|
71 |
def _llm_type(self) -> str:
|
72 |
return "custom"
|
73 |
|
74 |
+
@st.cache_resource
|
75 |
class LlamaCustom:
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
+
def __init__(self, model_name: str) -> None:
|
78 |
+
self.vector_index = self.initialize_index(model_name=model_name)
|
79 |
|
80 |
+
def initialize_index(self, model_name: str):
|
81 |
+
index_name = model_name.split("/")[-1]
|
82 |
+
|
83 |
file_path = f"./vectorStores/{index_name}"
|
84 |
if os.path.exists(path=file_path):
|
85 |
# rebuild storage context
|
|
|
93 |
# index = pickle.loads(file.readlines())
|
94 |
return index
|
95 |
else:
|
96 |
+
# define llm
|
97 |
+
prompt_helper = PromptHelper(
|
98 |
+
context_window=CONTEXT_WINDOW,
|
99 |
+
num_output=NUM_OUTPUT,
|
100 |
+
chunk_overlap_ratio=CHUNK_OVERLAP_RATION,
|
101 |
+
)
|
102 |
+
llm_predictor = LLMPredictor(llm=CustomLLM())
|
103 |
+
service_context = ServiceContext.from_defaults(
|
104 |
+
llm_predictor=llm_predictor, prompt_helper=prompt_helper
|
105 |
+
)
|
106 |
+
|
107 |
# documents = prepare_data(r"./assets/regItems.json")
|
108 |
documents = SimpleDirectoryReader(input_dir="./assets/pdf").load_data()
|
109 |
|
110 |
index = GPTVectorStoreIndex.from_documents(
|
111 |
+
documents, service_context=service_context
|
112 |
)
|
113 |
|
114 |
# local write access
|
|
|
123 |
print("query_str: ", query_str)
|
124 |
query_engine = self.vector_index.as_query_engine()
|
125 |
response = query_engine.query(query_str)
|
126 |
+
return str(response)
|
pages/langchain_demo.py
CHANGED
@@ -17,7 +17,7 @@ if "openai_api_key" not in st.session_state:
|
|
17 |
st.info("Enter your openai key to access the chatbot.")
|
18 |
else:
|
19 |
option = st.selectbox(
|
20 |
-
label="Select your model:", options=("gpt-3.5-turbo", "gpt-4")
|
21 |
)
|
22 |
|
23 |
with st.spinner(f"Initializing {option} ..."):
|
|
|
17 |
st.info("Enter your openai key to access the chatbot.")
|
18 |
else:
|
19 |
option = st.selectbox(
|
20 |
+
label="Select your model:", options=("gpt-3.5-turbo", "gpt-4")
|
21 |
)
|
22 |
|
23 |
with st.spinner(f"Initializing {option} ..."):
|
pages/llama_custom_demo.py
CHANGED
@@ -1,11 +1,10 @@
|
|
1 |
import os
|
2 |
-
import time
|
3 |
|
4 |
import openai
|
5 |
import streamlit as st
|
6 |
|
7 |
from models.llamaCustom import LlamaCustom
|
8 |
-
from utils.chatbox import
|
9 |
|
10 |
st.set_page_config(page_title="Llama", page_icon="🦙")
|
11 |
|
@@ -17,7 +16,11 @@ if "messages" not in st.session_state:
|
|
17 |
if "openai_api_key" not in st.session_state:
|
18 |
st.info("Enter your openai key to access the chatbot.")
|
19 |
else:
|
|
|
|
|
|
|
|
|
20 |
with st.spinner("Initializing vector index"):
|
21 |
-
model = LlamaCustom(
|
22 |
|
23 |
chatbox("llama_custom", model)
|
|
|
1 |
import os
|
|
|
2 |
|
3 |
import openai
|
4 |
import streamlit as st
|
5 |
|
6 |
from models.llamaCustom import LlamaCustom
|
7 |
+
from utils.chatbox import chatbox
|
8 |
|
9 |
st.set_page_config(page_title="Llama", page_icon="🦙")
|
10 |
|
|
|
16 |
if "openai_api_key" not in st.session_state:
|
17 |
st.info("Enter your openai key to access the chatbot.")
|
18 |
else:
|
19 |
+
option = st.selectbox(
|
20 |
+
label="Select your model:", options=("bigscience/bloom-560m",)
|
21 |
+
)
|
22 |
+
|
23 |
with st.spinner("Initializing vector index"):
|
24 |
+
model = LlamaCustom(model_name=option)
|
25 |
|
26 |
chatbox("llama_custom", model)
|
utils/chatbox.py
CHANGED
@@ -40,7 +40,6 @@ def display_bot_msg(model_name: str, bot_response: str):
|
|
40 |
{"model_name": model_name, "role": "assistant", "content": full_response}
|
41 |
)
|
42 |
|
43 |
-
# @st.cache_data
|
44 |
def chatbox(model_name: str, model: None):
|
45 |
# Display chat messages from history on app rerun
|
46 |
for message in st.session_state.messages:
|
|
|
40 |
{"model_name": model_name, "role": "assistant", "content": full_response}
|
41 |
)
|
42 |
|
|
|
43 |
def chatbox(model_name: str, model: None):
|
44 |
# Display chat messages from history on app rerun
|
45 |
for message in st.session_state.messages:
|