Spaces:
Sleeping
Sleeping
ubuntu
commited on
Commit
•
90620f9
1
Parent(s):
6d46819
fix bug
Browse files- app.py +0 -3
- clip/simple_tokenizer.py +3 -1
app.py
CHANGED
@@ -17,9 +17,6 @@ def _handle_fd_solve(img_path: str):
|
|
17 |
if img_path is None:
|
18 |
raise gr.Error("Please upload file completely!")
|
19 |
|
20 |
-
# gzip
|
21 |
-
os.system("gzip clip/bpe_simple_vocab_16e6.txt")
|
22 |
-
|
23 |
# Begin solve and record the solving time
|
24 |
start_time = time.time()
|
25 |
detect(
|
|
|
17 |
if img_path is None:
|
18 |
raise gr.Error("Please upload file completely!")
|
19 |
|
|
|
|
|
|
|
20 |
# Begin solve and record the solving time
|
21 |
start_time = time.time()
|
22 |
detect(
|
clip/simple_tokenizer.py
CHANGED
@@ -6,7 +6,6 @@ from functools import lru_cache
|
|
6 |
import ftfy
|
7 |
import regex as re
|
8 |
|
9 |
-
|
10 |
@lru_cache()
|
11 |
def default_bpe():
|
12 |
return os.path.join(os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz")
|
@@ -63,6 +62,9 @@ class SimpleTokenizer(object):
|
|
63 |
def __init__(self, bpe_path: str = default_bpe()):
|
64 |
self.byte_encoder = bytes_to_unicode()
|
65 |
self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
|
|
|
|
|
|
|
66 |
merges = gzip.open(bpe_path).read().decode("utf-8").split('\n')
|
67 |
merges = merges[1:49152-256-2+1]
|
68 |
merges = [tuple(merge.split()) for merge in merges]
|
|
|
6 |
import ftfy
|
7 |
import regex as re
|
8 |
|
|
|
9 |
@lru_cache()
|
10 |
def default_bpe():
|
11 |
return os.path.join(os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz")
|
|
|
62 |
def __init__(self, bpe_path: str = default_bpe()):
|
63 |
self.byte_encoder = bytes_to_unicode()
|
64 |
self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
|
65 |
+
if not os.system(bpe_path):
|
66 |
+
txt_path = bpe_path.replace(".gz", ".txt")
|
67 |
+
os.system(f"gzip {txt_path}")
|
68 |
merges = gzip.open(bpe_path).read().decode("utf-8").split('\n')
|
69 |
merges = merges[1:49152-256-2+1]
|
70 |
merges = [tuple(merge.split()) for merge in merges]
|