Yehor commited on
Commit
07b19d7
·
1 Parent(s): c6f8aa3

Don't use joblib

Browse files
Files changed (2) hide show
  1. app.py +13 -19
  2. requirements.txt +0 -2
app.py CHANGED
@@ -5,7 +5,6 @@ from importlib.metadata import version
5
  import evaluate
6
  import polars as pl
7
  import gradio as gr
8
- from joblib import Parallel, delayed
9
 
10
  # Load evaluators
11
  wer = evaluate.load("wer")
@@ -62,6 +61,7 @@ tech_libraries = f"""
62
  - gradio: {version("gradio")}
63
  - jiwer: {version("jiwer")}
64
  - evaluate: {version("evaluate")}
 
65
  - polars: {version("polars")}
66
  """.strip()
67
 
@@ -128,21 +128,18 @@ def inference(file_name, _batch_mode, _calculate_metrics):
128
  df = df.with_columns(pl.col("inference_total").round(2).alias("elapsed"))
129
  df = df.drop(["inference_total"])
130
 
 
 
131
  # reassign columns
132
  if _batch_mode:
133
  if _calculate_metrics:
134
- wer_values = Parallel(n_jobs=-1)(
135
- delayed(compute_batch_wer)(row["predictions"], row["references"])
136
- for row in df.iter_rows(named=True)
137
  )
138
- cer_values = Parallel(n_jobs=-1)(
139
- delayed(compute_batch_cer)(row["predictions"], row["references"])
140
- for row in df.iter_rows(named=True)
141
  )
142
 
143
- df.insert_column(2, pl.Series("wer", wer_values))
144
- df.insert_column(3, pl.Series("cer", cer_values))
145
-
146
  fields = [
147
  "elapsed",
148
  "durations",
@@ -160,18 +157,13 @@ def inference(file_name, _batch_mode, _calculate_metrics):
160
  ]
161
  else:
162
  if _calculate_metrics:
163
- wer_values = Parallel(n_jobs=-1)(
164
- delayed(compute_wer)(row["prediction"], row["reference"])
165
- for row in df.iter_rows(named=True)
166
  )
167
- cer_values = Parallel(n_jobs=-1)(
168
- delayed(compute_cer)(row["prediction"], row["reference"])
169
- for row in df.iter_rows(named=True)
170
  )
171
 
172
- df.insert_column(2, pl.Series("wer", wer_values))
173
- df.insert_column(3, pl.Series("cer", cer_values))
174
-
175
  fields = [
176
  "elapsed",
177
  "duration",
@@ -188,6 +180,8 @@ def inference(file_name, _batch_mode, _calculate_metrics):
188
  "reference",
189
  ]
190
 
 
 
191
  return df.select(fields)
192
 
193
 
 
5
  import evaluate
6
  import polars as pl
7
  import gradio as gr
 
8
 
9
  # Load evaluators
10
  wer = evaluate.load("wer")
 
61
  - gradio: {version("gradio")}
62
  - jiwer: {version("jiwer")}
63
  - evaluate: {version("evaluate")}
64
+ - pandas: {version("pandas")}
65
  - polars: {version("polars")}
66
  """.strip()
67
 
 
128
  df = df.with_columns(pl.col("inference_total").round(2).alias("elapsed"))
129
  df = df.drop(["inference_total"])
130
 
131
+ df_pd = df.to_pandas()
132
+
133
  # reassign columns
134
  if _batch_mode:
135
  if _calculate_metrics:
136
+ df_pd["wer"] = df_pd.apply(
137
+ lambda row: compute_batch_wer(row["predictions"], row["references"]), axis=1,
 
138
  )
139
+ df_pd["cer"] = df_pd.apply(
140
+ lambda row: compute_batch_cer(row["predictions"], row["references"]), axis=1,
 
141
  )
142
 
 
 
 
143
  fields = [
144
  "elapsed",
145
  "durations",
 
157
  ]
158
  else:
159
  if _calculate_metrics:
160
+ df_pd["wer"] = df_pd.apply(
161
+ lambda row: compute_wer(row["prediction"], row["reference"]), axis=1,
 
162
  )
163
+ df_pd["cer"] = df_pd.apply(
164
+ lambda row: compute_cer(row["prediction"], row["reference"]), axis=1,
 
165
  )
166
 
 
 
 
167
  fields = [
168
  "elapsed",
169
  "duration",
 
180
  "reference",
181
  ]
182
 
183
+ df = pl.DataFrame(df_pd)
184
+
185
  return df.select(fields)
186
 
187
 
requirements.txt CHANGED
@@ -3,5 +3,3 @@ gradio==5.23.0
3
  polars==1.26.0
4
  evaluate==0.4.3
5
  jiwer==3.1.0
6
-
7
- joblib==1.4.2
 
3
  polars==1.26.0
4
  evaluate==0.4.3
5
  jiwer==3.1.0