xu-song commited on
Commit
7cb27ea
·
1 Parent(s): 7a8d6d6
Files changed (2) hide show
  1. util.py +2 -8
  2. vocab/__init__.py +1 -1
util.py CHANGED
@@ -72,6 +72,7 @@ def tokenize_pair(text, tokenizer_type_1, tokenizer_type_2):
72
  return pos_tokens_1, table_df_1, pos_tokens_2, table_df_2
73
 
74
 
 
75
  def basic_count(tokenizer_type):
76
  tokenizer = load_tokener(tokenizer_type)
77
  stats = iter_vocab(tokenizer, tokenizer_type)
@@ -113,7 +114,6 @@ def on_load(url_params, request: gr.Request):
113
  """
114
  onLoad
115
  """
116
-
117
  text = None
118
  tokenizer_type_1 = None
119
  tokenizer_type_2 = None
@@ -122,11 +122,7 @@ def on_load(url_params, request: gr.Request):
122
  except:
123
  url_params = {}
124
  if request:
125
- try:
126
- logger.info(str(request.headers))
127
- logger.info(str(request.query_params))
128
- except:
129
- pass
130
  client_ip = request.client.host
131
  # local_ip = socket.gethostbyname(socket.gethostbyname(""))
132
  # headers = request.kwargs['headers']
@@ -139,8 +135,6 @@ def on_load(url_params, request: gr.Request):
139
  tokenizer_type_1 = url_params.get("tokenizer1", default_tokenizer_type_1)
140
  tokenizer_type_2 = url_params.get("tokenizer2", default_tokenizer_type_2)
141
  text = url_params.get("text", default_user_input)
142
-
143
-
144
  logger.info(f"client_ip: {client_ip}; params: {url_params}")
145
  return text, tokenizer_type_1, tokenizer_type_2
146
 
 
72
  return pos_tokens_1, table_df_1, pos_tokens_2, table_df_2
73
 
74
 
75
+ @lru_cache
76
  def basic_count(tokenizer_type):
77
  tokenizer = load_tokener(tokenizer_type)
78
  stats = iter_vocab(tokenizer, tokenizer_type)
 
114
  """
115
  onLoad
116
  """
 
117
  text = None
118
  tokenizer_type_1 = None
119
  tokenizer_type_2 = None
 
122
  except:
123
  url_params = {}
124
  if request:
125
+ logger.info(str(request.headers))
 
 
 
 
126
  client_ip = request.client.host
127
  # local_ip = socket.gethostbyname(socket.gethostbyname(""))
128
  # headers = request.kwargs['headers']
 
135
  tokenizer_type_1 = url_params.get("tokenizer1", default_tokenizer_type_1)
136
  tokenizer_type_2 = url_params.get("tokenizer2", default_tokenizer_type_2)
137
  text = url_params.get("text", default_user_input)
 
 
138
  logger.info(f"client_ip: {client_ip}; params: {url_params}")
139
  return text, tokenizer_type_1, tokenizer_type_2
140
 
vocab/__init__.py CHANGED
@@ -75,7 +75,7 @@ all_tokenizers = [
75
  # ##### glm系列
76
  "glm_chinese",
77
  "chatglm_6b",
78
- "chatglm2-6b",
79
  #
80
  # #### llama alpaca系列
81
  "llama", # '中文单字': 700, '中文多字': 0
 
75
  # ##### glm系列
76
  "glm_chinese",
77
  "chatglm_6b",
78
+ "chatglm2_6b",
79
  #
80
  # #### llama alpaca系列
81
  "llama", # '中文单字': 700, '中文多字': 0