Zwea Htet commited on
Commit
b17ddeb
·
2 Parent(s): 54d209f ad2d0d7

Merge branch 'main' of https://huggingface.co/spaces/zhtet/RegBotBeta

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  .*pdf filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  .*pdf filter=lfs diff=lfs merge=lfs -text
36
+ assets/pdf/calregs.pdf filter=lfs diff=lfs merge=lfs -text
37
+ assets/pdf/CADWReg.pdf filter=lfs diff=lfs merge=lfs -text
assets/pdf/CADWReg.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a80bdec9f2f6c15ca5d9181723b711bb7428fa48babaf87bab3f3b690055f54
3
+ size 3724564
models/langOpen.py CHANGED
@@ -4,13 +4,15 @@ import openai
4
  from dotenv import load_dotenv
5
  from langchain.chains import LLMChain
6
  from langchain.chat_models import ChatOpenAI
 
7
  from langchain.embeddings.openai import OpenAIEmbeddings
8
  from langchain.prompts import PromptTemplate
9
  from langchain.vectorstores import FAISS
10
 
11
- load_dotenv()
 
12
 
13
- embeddings = OpenAIEmbeddings()
14
 
15
  prompt_template = """Answer the question using the given context to the best of your ability.
16
  If you don't know, answer I don't know.
@@ -28,12 +30,12 @@ class LangOpen:
28
 
29
  def initialize_index(self, index_name):
30
  path = f"./vectorStores/{index_name}"
 
 
31
  if os.path.exists(path=path):
32
  return FAISS.load_local(folder_path=path, embeddings=embeddings)
33
  else:
34
- faiss = FAISS.from_texts(
35
- "./assets/updated_calregs.txt", embedding=embeddings
36
- )
37
  faiss.save_local(path)
38
  return faiss
39
 
 
4
  from dotenv import load_dotenv
5
  from langchain.chains import LLMChain
6
  from langchain.chat_models import ChatOpenAI
7
+ from langchain.document_loaders import PyPDFLoader
8
  from langchain.embeddings.openai import OpenAIEmbeddings
9
  from langchain.prompts import PromptTemplate
10
  from langchain.vectorstores import FAISS
11
 
12
+ loader = PyPDFLoader("./assets/pdf/CADWReg.pdf")
13
+ pages = loader.load_and_split()
14
 
15
+ load_dotenv()
16
 
17
  prompt_template = """Answer the question using the given context to the best of your ability.
18
  If you don't know, answer I don't know.
 
30
 
31
  def initialize_index(self, index_name):
32
  path = f"./vectorStores/{index_name}"
33
+ embeddings = OpenAIEmbeddings()
34
+
35
  if os.path.exists(path=path):
36
  return FAISS.load_local(folder_path=path, embeddings=embeddings)
37
  else:
38
+ faiss = FAISS.from_documents(pages, embeddings)
 
 
39
  faiss.save_local(path)
40
  return faiss
41
 
models/llamaCustom.py CHANGED
@@ -5,6 +5,7 @@ from typing import Any, List, Mapping, Optional
5
 
6
  import numpy as np
7
  import openai
 
8
  import pandas as pd
9
  from dotenv import load_dotenv
10
  from huggingface_hub import HfFileSystem
@@ -35,12 +36,6 @@ NUM_OUTPUT = 525
35
  # set maximum chunk overlap
36
  CHUNK_OVERLAP_RATION = 0.2
37
 
38
- prompt_helper = PromptHelper(
39
- context_window=CONTEXT_WINDOW,
40
- num_output=NUM_OUTPUT,
41
- chunk_overlap_ratio=CHUNK_OVERLAP_RATION,
42
- )
43
-
44
  llm_model_name = "bigscience/bloom-560m"
45
  tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
46
  model = AutoModelForCausalLM.from_pretrained(llm_model_name, config="T5Config")
@@ -70,24 +65,21 @@ class CustomLLM(LLM):
70
 
71
  @property
72
  def _identifying_params(self) -> Mapping[str, Any]:
73
- return {"name_of_model": self.model_name}
74
 
75
  @property
76
  def _llm_type(self) -> str:
77
  return "custom"
78
 
79
-
80
  class LlamaCustom:
81
- # define llm
82
- llm_predictor = LLMPredictor(llm=CustomLLM())
83
- service_context = ServiceContext.from_defaults(
84
- llm_predictor=llm_predictor, prompt_helper=prompt_helper
85
- )
86
 
87
- def __init__(self, name: str) -> None:
88
- self.vector_index = self.initialize_index(index_name=name)
89
 
90
- def initialize_index(self, index_name):
 
 
91
  file_path = f"./vectorStores/{index_name}"
92
  if os.path.exists(path=file_path):
93
  # rebuild storage context
@@ -101,11 +93,22 @@ class LlamaCustom:
101
  # index = pickle.loads(file.readlines())
102
  return index
103
  else:
 
 
 
 
 
 
 
 
 
 
 
104
  # documents = prepare_data(r"./assets/regItems.json")
105
  documents = SimpleDirectoryReader(input_dir="./assets/pdf").load_data()
106
 
107
  index = GPTVectorStoreIndex.from_documents(
108
- documents, service_context=self.service_context
109
  )
110
 
111
  # local write access
@@ -120,4 +123,4 @@ class LlamaCustom:
120
  print("query_str: ", query_str)
121
  query_engine = self.vector_index.as_query_engine()
122
  response = query_engine.query(query_str)
123
- return str(response)
 
5
 
6
  import numpy as np
7
  import openai
8
+ import streamlit as st
9
  import pandas as pd
10
  from dotenv import load_dotenv
11
  from huggingface_hub import HfFileSystem
 
36
  # set maximum chunk overlap
37
  CHUNK_OVERLAP_RATION = 0.2
38
 
 
 
 
 
 
 
39
  llm_model_name = "bigscience/bloom-560m"
40
  tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
41
  model = AutoModelForCausalLM.from_pretrained(llm_model_name, config="T5Config")
 
65
 
66
  @property
67
  def _identifying_params(self) -> Mapping[str, Any]:
68
+ return {"name_of_model": llm_model_name}
69
 
70
  @property
71
  def _llm_type(self) -> str:
72
  return "custom"
73
 
74
+ @st.cache_resource
75
  class LlamaCustom:
 
 
 
 
 
76
 
77
+ def __init__(self, model_name: str) -> None:
78
+ self.vector_index = self.initialize_index(model_name=model_name)
79
 
80
+ def initialize_index(self, model_name: str):
81
+ index_name = model_name.split("/")[-1]
82
+
83
  file_path = f"./vectorStores/{index_name}"
84
  if os.path.exists(path=file_path):
85
  # rebuild storage context
 
93
  # index = pickle.loads(file.readlines())
94
  return index
95
  else:
96
+ # define llm
97
+ prompt_helper = PromptHelper(
98
+ context_window=CONTEXT_WINDOW,
99
+ num_output=NUM_OUTPUT,
100
+ chunk_overlap_ratio=CHUNK_OVERLAP_RATION,
101
+ )
102
+ llm_predictor = LLMPredictor(llm=CustomLLM())
103
+ service_context = ServiceContext.from_defaults(
104
+ llm_predictor=llm_predictor, prompt_helper=prompt_helper
105
+ )
106
+
107
  # documents = prepare_data(r"./assets/regItems.json")
108
  documents = SimpleDirectoryReader(input_dir="./assets/pdf").load_data()
109
 
110
  index = GPTVectorStoreIndex.from_documents(
111
+ documents, service_context=service_context
112
  )
113
 
114
  # local write access
 
123
  print("query_str: ", query_str)
124
  query_engine = self.vector_index.as_query_engine()
125
  response = query_engine.query(query_str)
126
+ return str(response)
pages/langchain_demo.py CHANGED
@@ -17,7 +17,7 @@ if "openai_api_key" not in st.session_state:
17
  st.info("Enter your openai key to access the chatbot.")
18
  else:
19
  option = st.selectbox(
20
- label="Select your model:", options=("gpt-3.5-turbo", "gpt-4"), index=0
21
  )
22
 
23
  with st.spinner(f"Initializing {option} ..."):
 
17
  st.info("Enter your openai key to access the chatbot.")
18
  else:
19
  option = st.selectbox(
20
+ label="Select your model:", options=("gpt-3.5-turbo", "gpt-4")
21
  )
22
 
23
  with st.spinner(f"Initializing {option} ..."):
pages/llama_custom_demo.py CHANGED
@@ -1,11 +1,10 @@
1
  import os
2
- import time
3
 
4
  import openai
5
  import streamlit as st
6
 
7
  from models.llamaCustom import LlamaCustom
8
- from utils.chatbox import *
9
 
10
  st.set_page_config(page_title="Llama", page_icon="🦙")
11
 
@@ -17,7 +16,11 @@ if "messages" not in st.session_state:
17
  if "openai_api_key" not in st.session_state:
18
  st.info("Enter your openai key to access the chatbot.")
19
  else:
 
 
 
 
20
  with st.spinner("Initializing vector index"):
21
- model = LlamaCustom(name="llamaCustom")
22
 
23
  chatbox("llama_custom", model)
 
1
  import os
 
2
 
3
  import openai
4
  import streamlit as st
5
 
6
  from models.llamaCustom import LlamaCustom
7
+ from utils.chatbox import chatbox
8
 
9
  st.set_page_config(page_title="Llama", page_icon="🦙")
10
 
 
16
  if "openai_api_key" not in st.session_state:
17
  st.info("Enter your openai key to access the chatbot.")
18
  else:
19
+ option = st.selectbox(
20
+ label="Select your model:", options=("bigscience/bloom-560m",)
21
+ )
22
+
23
  with st.spinner("Initializing vector index"):
24
+ model = LlamaCustom(model_name=option)
25
 
26
  chatbox("llama_custom", model)
utils/chatbox.py CHANGED
@@ -40,7 +40,6 @@ def display_bot_msg(model_name: str, bot_response: str):
40
  {"model_name": model_name, "role": "assistant", "content": full_response}
41
  )
42
 
43
- # @st.cache_data
44
  def chatbox(model_name: str, model: None):
45
  # Display chat messages from history on app rerun
46
  for message in st.session_state.messages:
 
40
  {"model_name": model_name, "role": "assistant", "content": full_response}
41
  )
42
 
 
43
  def chatbox(model_name: str, model: None):
44
  # Display chat messages from history on app rerun
45
  for message in st.session_state.messages: