Ritvik19 commited on
Commit
143152b
·
verified ·
1 Parent(s): 713f6e4

Update process_documents.py

Browse files
Files changed (1) hide show
  1. process_documents.py +1 -1
process_documents.py CHANGED
@@ -157,4 +157,4 @@ def get_pdf_semantic_snippets(filtered_snippets, median_font_size):
157
 
158
 
159
  def num_tokens(string):
160
- return len(tiktoken.get_encoding("cl100k_base", disallowed_special=()).encode(string))
 
157
 
158
 
159
  def num_tokens(string):
160
+ return len(tiktoken.get_encoding("cl100k_base").encode(string, disallowed_special=()))