Spaces:
Runtime error
Runtime error
Commit
·
cc99942
1
Parent(s):
8907e41
hashtag segmentation
Browse files
app.py
CHANGED
@@ -11,14 +11,16 @@ Hashtag segmentation is the task of automatically adding spaces between the word
|
|
11 |
This app uses the <a href=\"https://github.com/ruanchaves/hashformers\">Hashformers library</a> to suggest segmentations for hashtags.
|
12 |
|
13 |
Enter a hashtag or pick one from the examples below. The app will suggest the best segmentation for the hashtag.
|
|
|
|
|
14 |
"""
|
15 |
|
16 |
app_examples = [
|
17 |
-
["#cristianoronaldo", "
|
18 |
-
["#madridsinfiltros", "
|
19 |
-
["#kuenstlicheintelligenz", "
|
20 |
-
["#dadscare", "
|
21 |
-
["#nowthatcherisdead", "
|
22 |
]
|
23 |
|
24 |
output_json_component_description = {"": ""}
|
@@ -86,7 +88,7 @@ def parse_candidates(candidates):
|
|
86 |
candidates = [c.strip() for c in candidates]
|
87 |
return candidates
|
88 |
|
89 |
-
def predict(s1,
|
90 |
hashtag_list = [s1]
|
91 |
if language:
|
92 |
chosen_model = model_dict[language]
|
@@ -100,13 +102,16 @@ def predict(s1, candidates, language, use_reranker, topk, steps):
|
|
100 |
segmenter_df = format_dataframe(segmentation.segmenter_rank)
|
101 |
reranker_df = format_dataframe(segmentation.reranker_rank)
|
102 |
|
|
|
|
|
|
|
|
|
103 |
|
104 |
top_segmentation = segmentation.output[0]
|
105 |
segmenter_score_dict = convert_to_score_dict(segmenter_df)
|
106 |
reranker_score_dict = convert_to_score_dict(reranker_df)
|
107 |
top_segmentation_df = get_candidates_df([top_segmentation], segmenter_score_dict, reranker_score_dict)
|
108 |
-
|
109 |
-
candidates_list = parse_candidates(candidates)
|
110 |
|
111 |
candidates_df = get_candidates_df(candidates_list, segmenter_score_dict, reranker_score_dict)
|
112 |
output_df = pd.concat([top_segmentation_df, candidates_df], axis=0)
|
@@ -123,16 +128,15 @@ def predict(s1, candidates, language, use_reranker, topk, steps):
|
|
123 |
|
124 |
inputs = [
|
125 |
gr.Textbox(label="Hashtag"),
|
126 |
-
gr.Textbox(label="Candidate segmentations"),
|
127 |
gr.Dropdown(language_list, label="Language", value="english (fast)"),
|
128 |
gr.Checkbox(label="Use reranker", value=True),
|
129 |
-
gr.Slider(0, 100, value=20, label="Advanced setting - Beamsearch
|
130 |
-
gr.Slider(0, 100, value=13, label="Advanced setting -
|
131 |
]
|
132 |
|
133 |
outputs = [
|
134 |
gr.Textbox(label="Suggested segmentation"),
|
135 |
-
gr.DataFrame(label="
|
136 |
]
|
137 |
|
138 |
|
|
|
11 |
This app uses the <a href=\"https://github.com/ruanchaves/hashformers\">Hashformers library</a> to suggest segmentations for hashtags.
|
12 |
|
13 |
Enter a hashtag or pick one from the examples below. The app will suggest the best segmentation for the hashtag.
|
14 |
+
|
15 |
+
In the advanced settings, decreasing the slider values will make the app faster, but it may also reduce its accuracy.
|
16 |
"""
|
17 |
|
18 |
app_examples = [
|
19 |
+
["#cristianoronaldo", "portuguese"],
|
20 |
+
["#madridsinfiltros", "spanish"],
|
21 |
+
["#kuenstlicheintelligenz", "german"],
|
22 |
+
["#dadscare", "english (fast)"],
|
23 |
+
["#nowthatcherisdead", "english"],
|
24 |
]
|
25 |
|
26 |
output_json_component_description = {"": ""}
|
|
|
88 |
candidates = [c.strip() for c in candidates]
|
89 |
return candidates
|
90 |
|
91 |
+
def predict(s1, language, use_reranker, topk, steps):
|
92 |
hashtag_list = [s1]
|
93 |
if language:
|
94 |
chosen_model = model_dict[language]
|
|
|
102 |
segmenter_df = format_dataframe(segmentation.segmenter_rank)
|
103 |
reranker_df = format_dataframe(segmentation.reranker_rank)
|
104 |
|
105 |
+
if not use_reranker:
|
106 |
+
candidates_list = segmenter_df.head(3)["segmentation"].tolist()
|
107 |
+
else:
|
108 |
+
candidates_list = reranker_df.head(3)["segmentation"].tolist()
|
109 |
|
110 |
top_segmentation = segmentation.output[0]
|
111 |
segmenter_score_dict = convert_to_score_dict(segmenter_df)
|
112 |
reranker_score_dict = convert_to_score_dict(reranker_df)
|
113 |
top_segmentation_df = get_candidates_df([top_segmentation], segmenter_score_dict, reranker_score_dict)
|
114 |
+
|
|
|
115 |
|
116 |
candidates_df = get_candidates_df(candidates_list, segmenter_score_dict, reranker_score_dict)
|
117 |
output_df = pd.concat([top_segmentation_df, candidates_df], axis=0)
|
|
|
128 |
|
129 |
inputs = [
|
130 |
gr.Textbox(label="Hashtag"),
|
|
|
131 |
gr.Dropdown(language_list, label="Language", value="english (fast)"),
|
132 |
gr.Checkbox(label="Use reranker", value=True),
|
133 |
+
gr.Slider(0, 100, value=20, label="Advanced setting - Beamsearch: Number of beams"),
|
134 |
+
gr.Slider(0, 100, value=13, label="Advanced setting - Maximum number of spaces allowed")
|
135 |
]
|
136 |
|
137 |
outputs = [
|
138 |
gr.Textbox(label="Suggested segmentation"),
|
139 |
+
gr.DataFrame(label="Top alternatives"),
|
140 |
]
|
141 |
|
142 |
|