andreped commited on
Commit
612f625
·
1 Parent(s): 59fea76

Reformated code

Browse files
Files changed (3) hide show
  1. app.py +4 -3
  2. chatbot/data.py +8 -7
  3. chatbot/redirect.py +55 -23
app.py CHANGED
@@ -15,11 +15,12 @@ st.header("Chat with André's research 💬 📚")
15
  if "messages" not in st.session_state.keys(): # Initialize the chat message history
16
  st.session_state.messages = [{"role": "assistant", "content": "Ask me a question about André's research!"}]
17
 
 
18
  def main():
19
  # setup logger sidebar
20
- #st.sidebar.text("Standard output log:")
21
- #_sidebar_out = st.sidebar.empty()
22
- #with rd.stdout(to=_sidebar_out, format='text'):
23
  # print("test")
24
 
25
  # setup dataset
 
15
  if "messages" not in st.session_state.keys(): # Initialize the chat message history
16
  st.session_state.messages = [{"role": "assistant", "content": "Ask me a question about André's research!"}]
17
 
18
+
19
  def main():
20
  # setup logger sidebar
21
+ # st.sidebar.text("Standard output log:")
22
+ # _sidebar_out = st.sidebar.empty()
23
+ # with rd.stdout(to=_sidebar_out, format='text'):
24
  # print("test")
25
 
26
  # setup dataset
chatbot/data.py CHANGED
@@ -1,7 +1,5 @@
1
  import os
2
 
3
- from chatbot import redirect as rd
4
-
5
  import streamlit as st
6
  from gdown import download_folder
7
  from llama_index import ServiceContext
@@ -11,21 +9,24 @@ from llama_index import set_global_service_context
11
  from llama_index.embeddings import OpenAIEmbedding
12
  from llama_index.llms import AzureOpenAI
13
 
 
 
14
 
15
  @st.cache_resource(show_spinner=False)
16
  def download_test_data():
17
  # url = f"https://drive.google.com/drive/folders/uc?export=download&confirm=pbef&id={file_id}"
18
  url = "https://drive.google.com/drive/folders/1uDSAWtLvp1YPzfXUsK_v6DeWta16pq6y"
19
  with st.spinner(text="Downloading test data. This might take a minute."):
20
- # @TODO: replace gown solution with a custom solution compatible with GitHub and
21
  # use st.progress to get more verbose during download
22
  download_folder(url=url, quiet=False, use_cookies=False, output="./data/")
23
 
 
24
  @st.cache_resource(show_spinner=False)
25
  def load_data():
26
  with st.spinner(text="Loading and indexing the provided dataset – hang tight! This may take a few seconds."):
27
  documents = SimpleDirectoryReader(input_dir="./data", recursive=True).load_data()
28
-
29
  with st.spinner(text="Setting up Azure OpenAI..."):
30
  llm = AzureOpenAI(
31
  model="gpt-3.5-turbo",
@@ -50,11 +51,11 @@ def load_data():
50
  api_base=st.secrets["OPENAI_API_BASE"],
51
  api_type="azure",
52
  api_version=st.secrets["OPENAI_API_VERSION"],
53
- embed_batch_size=10, # set to one to reduce rate limit -> may degrade response runtime
54
  )
55
-
56
  with st.spinner(text="Setting up Vector Store Index..."):
57
- service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
58
  set_global_service_context(service_context)
59
  index = VectorStoreIndex.from_documents(documents) # , service_context=service_context)
60
  return index
 
1
  import os
2
 
 
 
3
  import streamlit as st
4
  from gdown import download_folder
5
  from llama_index import ServiceContext
 
9
  from llama_index.embeddings import OpenAIEmbedding
10
  from llama_index.llms import AzureOpenAI
11
 
12
+ from chatbot import redirect as rd
13
+
14
 
15
  @st.cache_resource(show_spinner=False)
16
  def download_test_data():
17
  # url = f"https://drive.google.com/drive/folders/uc?export=download&confirm=pbef&id={file_id}"
18
  url = "https://drive.google.com/drive/folders/1uDSAWtLvp1YPzfXUsK_v6DeWta16pq6y"
19
  with st.spinner(text="Downloading test data. This might take a minute."):
20
+ # @TODO: replace gown solution with a custom solution compatible with GitHub and
21
  # use st.progress to get more verbose during download
22
  download_folder(url=url, quiet=False, use_cookies=False, output="./data/")
23
 
24
+
25
  @st.cache_resource(show_spinner=False)
26
  def load_data():
27
  with st.spinner(text="Loading and indexing the provided dataset – hang tight! This may take a few seconds."):
28
  documents = SimpleDirectoryReader(input_dir="./data", recursive=True).load_data()
29
+
30
  with st.spinner(text="Setting up Azure OpenAI..."):
31
  llm = AzureOpenAI(
32
  model="gpt-3.5-turbo",
 
51
  api_base=st.secrets["OPENAI_API_BASE"],
52
  api_type="azure",
53
  api_version=st.secrets["OPENAI_API_VERSION"],
54
+ embed_batch_size=10, # set to low value to reduce rate limit -> may degrade response runtime
55
  )
56
+
57
  with st.spinner(text="Setting up Vector Store Index..."):
58
+ service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model) # , chunk_size=512)
59
  set_global_service_context(service_context)
60
  index = VectorStoreIndex.from_documents(documents) # , service_context=service_context)
61
  return index
chatbot/redirect.py CHANGED
@@ -1,8 +1,9 @@
1
- import streamlit as st
2
- import io
3
  import contextlib
4
- import sys
5
  import re
 
 
 
6
 
7
 
8
  class _Redirect:
@@ -10,6 +11,7 @@ class _Redirect:
10
  Based on: https://gist.github.com/schaumb/037f139035d93cff3ad9f4f7e5f739ce
11
  Also see: https://github.com/streamlit/streamlit/issues/268#issuecomment-810478208
12
  """
 
13
  class IOStuff(io.StringIO):
14
  def __init__(self, trigger, max_buffer, buffer_separator, regex, dup=None):
15
  super().__init__()
@@ -23,7 +25,7 @@ class _Redirect:
23
  if self._max_buffer:
24
  concatenated_len = super().tell() + len(__s)
25
  if concatenated_len > self._max_buffer:
26
- rest = self.get_filtered_output()[concatenated_len - self._max_buffer:]
27
  if self._buffer_separator is not None:
28
  rest = rest.split(self._buffer_separator, 1)[-1]
29
  super().seek(0)
@@ -39,20 +41,35 @@ class _Redirect:
39
  if self._regex is None or self._buffer_separator is None:
40
  return self.getvalue()
41
 
42
- return self._buffer_separator.join(filter(self._regex.search, self.getvalue().split(self._buffer_separator)))
 
 
43
 
44
  def print_at_end(self):
45
  self._trigger(self.get_filtered_output())
46
 
47
- def __init__(self, stdout=None, stderr=False, format=None, to=None, max_buffer=None, buffer_separator='\n',
48
- regex=None, duplicate_out=False):
49
- self.io_args = {'trigger': self._write, 'max_buffer': max_buffer, 'buffer_separator': buffer_separator,
50
- 'regex': regex}
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  self.redirections = []
52
  self.st = None
53
  self.stderr = stderr is True
54
  self.stdout = stdout is True or (stdout is None and not self.stderr)
55
- self.format = format or 'code'
56
  self.to = to
57
  self.fun = None
58
  self.duplicate_out = duplicate_out or None
@@ -61,29 +78,36 @@ class _Redirect:
61
  if not self.stdout and not self.stderr:
62
  raise ValueError("one of stdout or stderr must be True")
63
 
64
- if self.format not in ['text', 'markdown', 'latex', 'code', 'write']:
65
  raise ValueError(
66
- f"format need oneof the following: {', '.join(['text', 'markdown', 'latex', 'code', 'write'])}")
 
67
 
68
- if self.to and (not hasattr(self.to, 'text') or not hasattr(self.to, 'empty')):
69
  raise ValueError(f"'to' is not a streamlit container object")
70
 
71
  def __enter__(self):
72
  if self.st is not None:
73
  if self.to is None:
74
  if self.active_nested is None:
75
- self.active_nested = self(format=self.format, max_buffer=self.io_args['max_buffer'],
76
- buffer_separator=self.io_args['buffer_separator'],
77
- regex=self.io_args['regex'], duplicate_out=self.duplicate_out)
 
 
 
 
78
  return self.active_nested.__enter__()
79
  else:
80
  raise Exception("Already entered")
81
  to = self.to or st
82
 
83
- to.text(f"Redirected output from "
84
- f"{'stdout and stderr' if self.stdout and self.stderr else 'stdout' if self.stdout else 'stderr'}"
85
- f"{' [' + self.io_args['regex'] + ']' if self.io_args['regex'] else ''}"
86
- f":")
 
 
87
  self.st = to.empty()
88
  self.fun = getattr(self.st, self.format)
89
 
@@ -103,9 +127,17 @@ class _Redirect:
103
 
104
  return io_obj
105
 
106
- def __call__(self, to=None, format=None, max_buffer=None, buffer_separator='\n', regex=None, duplicate_out=False):
107
- return _Redirect(self.stdout, self.stderr, format=format, to=to, max_buffer=max_buffer,
108
- buffer_separator=buffer_separator, regex=regex, duplicate_out=duplicate_out)
 
 
 
 
 
 
 
 
109
 
110
  def __exit__(self, *exc):
111
  if self.active_nested is not None:
 
 
 
1
  import contextlib
2
+ import io
3
  import re
4
+ import sys
5
+
6
+ import streamlit as st
7
 
8
 
9
  class _Redirect:
 
11
  Based on: https://gist.github.com/schaumb/037f139035d93cff3ad9f4f7e5f739ce
12
  Also see: https://github.com/streamlit/streamlit/issues/268#issuecomment-810478208
13
  """
14
+
15
  class IOStuff(io.StringIO):
16
  def __init__(self, trigger, max_buffer, buffer_separator, regex, dup=None):
17
  super().__init__()
 
25
  if self._max_buffer:
26
  concatenated_len = super().tell() + len(__s)
27
  if concatenated_len > self._max_buffer:
28
+ rest = self.get_filtered_output()[concatenated_len - self._max_buffer :]
29
  if self._buffer_separator is not None:
30
  rest = rest.split(self._buffer_separator, 1)[-1]
31
  super().seek(0)
 
41
  if self._regex is None or self._buffer_separator is None:
42
  return self.getvalue()
43
 
44
+ return self._buffer_separator.join(
45
+ filter(self._regex.search, self.getvalue().split(self._buffer_separator))
46
+ )
47
 
48
  def print_at_end(self):
49
  self._trigger(self.get_filtered_output())
50
 
51
+ def __init__(
52
+ self,
53
+ stdout=None,
54
+ stderr=False,
55
+ format=None,
56
+ to=None,
57
+ max_buffer=None,
58
+ buffer_separator="\n",
59
+ regex=None,
60
+ duplicate_out=False,
61
+ ):
62
+ self.io_args = {
63
+ "trigger": self._write,
64
+ "max_buffer": max_buffer,
65
+ "buffer_separator": buffer_separator,
66
+ "regex": regex,
67
+ }
68
  self.redirections = []
69
  self.st = None
70
  self.stderr = stderr is True
71
  self.stdout = stdout is True or (stdout is None and not self.stderr)
72
+ self.format = format or "code"
73
  self.to = to
74
  self.fun = None
75
  self.duplicate_out = duplicate_out or None
 
78
  if not self.stdout and not self.stderr:
79
  raise ValueError("one of stdout or stderr must be True")
80
 
81
+ if self.format not in ["text", "markdown", "latex", "code", "write"]:
82
  raise ValueError(
83
+ f"format need oneof the following: {', '.join(['text', 'markdown', 'latex', 'code', 'write'])}"
84
+ )
85
 
86
+ if self.to and (not hasattr(self.to, "text") or not hasattr(self.to, "empty")):
87
  raise ValueError(f"'to' is not a streamlit container object")
88
 
89
  def __enter__(self):
90
  if self.st is not None:
91
  if self.to is None:
92
  if self.active_nested is None:
93
+ self.active_nested = self(
94
+ format=self.format,
95
+ max_buffer=self.io_args["max_buffer"],
96
+ buffer_separator=self.io_args["buffer_separator"],
97
+ regex=self.io_args["regex"],
98
+ duplicate_out=self.duplicate_out,
99
+ )
100
  return self.active_nested.__enter__()
101
  else:
102
  raise Exception("Already entered")
103
  to = self.to or st
104
 
105
+ to.text(
106
+ f"Redirected output from "
107
+ f"{'stdout and stderr' if self.stdout and self.stderr else 'stdout' if self.stdout else 'stderr'}"
108
+ f"{' [' + self.io_args['regex'] + ']' if self.io_args['regex'] else ''}"
109
+ f":"
110
+ )
111
  self.st = to.empty()
112
  self.fun = getattr(self.st, self.format)
113
 
 
127
 
128
  return io_obj
129
 
130
+ def __call__(self, to=None, format=None, max_buffer=None, buffer_separator="\n", regex=None, duplicate_out=False):
131
+ return _Redirect(
132
+ self.stdout,
133
+ self.stderr,
134
+ format=format,
135
+ to=to,
136
+ max_buffer=max_buffer,
137
+ buffer_separator=buffer_separator,
138
+ regex=regex,
139
+ duplicate_out=duplicate_out,
140
+ )
141
 
142
  def __exit__(self, *exc):
143
  if self.active_nested is not None: