Refactor app.py: reorganize imports, remove unused code, and enhance readability
Browse files
app.py
CHANGED
@@ -1,17 +1,24 @@
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import pickle
|
3 |
import re
|
|
|
4 |
from collections import Counter
|
5 |
from dataclasses import dataclass
|
6 |
-
from typing import Callable, Dict, Iterable, List, Optional, Type, TypeVar
|
7 |
|
|
|
8 |
import nltk
|
9 |
import tqdm
|
|
|
10 |
|
11 |
from nlp4web_codebase.ir.data_loaders.dm import Document
|
|
|
|
|
12 |
|
13 |
nltk.download("stopwords", quiet=True)
|
14 |
-
from nltk.corpus import stopwords as nltk_stopwords
|
15 |
|
16 |
LANGUAGE = "english"
|
17 |
word_splitter = re.compile(r"(?u)\b\w\w+\b").findall
|
@@ -144,19 +151,9 @@ def run_counting(
|
|
144 |
)
|
145 |
|
146 |
|
147 |
-
from nlp4web_codebase.ir.data_loaders.sciq import load_sciq
|
148 |
-
|
149 |
sciq = load_sciq()
|
150 |
counting = run_counting(documents=iter(sciq.corpus), ndocs=len(sciq.corpus))
|
151 |
|
152 |
-
from __future__ import annotations
|
153 |
-
|
154 |
-
import math
|
155 |
-
from dataclasses import dataclass
|
156 |
-
from typing import Iterable, List, Optional, Type
|
157 |
-
|
158 |
-
from nlp4web_codebase.ir.data_loaders.dm import Document
|
159 |
-
|
160 |
|
161 |
@dataclass
|
162 |
class BM25Index(InvertedIndex):
|
@@ -251,11 +248,6 @@ bm25_index = BM25Index.build_from_documents(
|
|
251 |
)
|
252 |
bm25_index.save("output/bm25_index")
|
253 |
|
254 |
-
from abc import abstractmethod
|
255 |
-
from typing import Type
|
256 |
-
|
257 |
-
from nlp4web_codebase.ir.models import BaseRetriever
|
258 |
-
|
259 |
|
260 |
class BaseInvertedIndexRetriever(BaseRetriever):
|
261 |
@property
|
@@ -362,10 +354,6 @@ bm25_index = BM25Index.build_from_documents(
|
|
362 |
b=best_b,
|
363 |
)
|
364 |
|
365 |
-
from typing import TypedDict
|
366 |
-
|
367 |
-
import gradio as gr
|
368 |
-
|
369 |
|
370 |
class Hit(TypedDict):
|
371 |
cid: str
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import math
|
4 |
import os
|
5 |
import pickle
|
6 |
import re
|
7 |
+
from abc import abstractmethod
|
8 |
from collections import Counter
|
9 |
from dataclasses import dataclass
|
10 |
+
from typing import Callable, Dict, Iterable, List, Optional, Type, TypedDict, TypeVar
|
11 |
|
12 |
+
import gradio as gr
|
13 |
import nltk
|
14 |
import tqdm
|
15 |
+
from nltk.corpus import stopwords as nltk_stopwords
|
16 |
|
17 |
from nlp4web_codebase.ir.data_loaders.dm import Document
|
18 |
+
from nlp4web_codebase.ir.data_loaders.sciq import load_sciq
|
19 |
+
from nlp4web_codebase.ir.models import BaseRetriever
|
20 |
|
21 |
nltk.download("stopwords", quiet=True)
|
|
|
22 |
|
23 |
LANGUAGE = "english"
|
24 |
word_splitter = re.compile(r"(?u)\b\w\w+\b").findall
|
|
|
151 |
)
|
152 |
|
153 |
|
|
|
|
|
154 |
sciq = load_sciq()
|
155 |
counting = run_counting(documents=iter(sciq.corpus), ndocs=len(sciq.corpus))
|
156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
@dataclass
|
159 |
class BM25Index(InvertedIndex):
|
|
|
248 |
)
|
249 |
bm25_index.save("output/bm25_index")
|
250 |
|
|
|
|
|
|
|
|
|
|
|
251 |
|
252 |
class BaseInvertedIndexRetriever(BaseRetriever):
|
253 |
@property
|
|
|
354 |
b=best_b,
|
355 |
)
|
356 |
|
|
|
|
|
|
|
|
|
357 |
|
358 |
class Hit(TypedDict):
|
359 |
cid: str
|