Spaces:
Running
Running
aliasgerovs
commited on
Commit
•
f6a92d7
1
Parent(s):
40d9ac8
Update app.py
Browse files
app.py
CHANGED
@@ -25,6 +25,7 @@ import time
|
|
25 |
from utils import cos_sim_torch, embed_text
|
26 |
import multiprocessing
|
27 |
from functools import partial
|
|
|
28 |
|
29 |
nltk.download('punkt')
|
30 |
|
@@ -124,28 +125,30 @@ def plagiarism_check(
|
|
124 |
# # score = matchingScoreWithTimeout(sent, page_content)
|
125 |
# score = cos_sim_torch(embed_text(sent), source_embeddings[i])
|
126 |
# ScoreArray[i][j] = score
|
127 |
-
|
128 |
-
def calculate_score(params):
|
129 |
-
i, sent, source_embedding = params
|
130 |
-
return cos_sim_torch(embed_text(sent), source_embedding)
|
131 |
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
146 |
ScoreArray[i][j] = score
|
|
|
|
|
|
|
|
|
147 |
|
148 |
-
worker(soups, sentences, source_embeddings)
|
149 |
|
150 |
print(f"Time for matching score: {time.perf_counter()-time1}")
|
151 |
time1 = time.perf_counter()
|
|
|
25 |
from utils import cos_sim_torch, embed_text
|
26 |
import multiprocessing
|
27 |
from functools import partial
|
28 |
+
import concurrent.futures
|
29 |
|
30 |
nltk.download('punkt')
|
31 |
|
|
|
125 |
# # score = matchingScoreWithTimeout(sent, page_content)
|
126 |
# score = cos_sim_torch(embed_text(sent), source_embeddings[i])
|
127 |
# ScoreArray[i][j] = score
|
|
|
|
|
|
|
|
|
128 |
|
129 |
+
|
130 |
+
def compute_cosine_similarity(args):
|
131 |
+
sent, source_embedding, i, j = args
|
132 |
+
score = cos_sim_torch(embed_text(sent), source_embedding)
|
133 |
+
return i, j, score
|
134 |
+
|
135 |
+
def main(soups, sentences):
|
136 |
+
source_embeddings = [preprocess(soup) for soup in soups]
|
137 |
+
ScoreArray = [[0 for _ in sentences] for _ in soups]
|
138 |
+
args_list = []
|
139 |
+
for i, soup in enumerate(soups):
|
140 |
+
if soup:
|
141 |
+
for j, sent in enumerate(sentences):
|
142 |
+
args_list.append((sent, source_embeddings[i], i, j))
|
143 |
+
with concurrent.futures.ProcessPoolExecutor() as executor:
|
144 |
+
results = executor.map(compute_cosine_similarity, args_list)
|
145 |
+
for i, j, score in results:
|
146 |
ScoreArray[i][j] = score
|
147 |
+
return ScoreArray
|
148 |
+
|
149 |
+
ScoreArray = main(soups, sentences)
|
150 |
+
|
151 |
|
|
|
152 |
|
153 |
print(f"Time for matching score: {time.perf_counter()-time1}")
|
154 |
time1 = time.perf_counter()
|