aliasgerovs commited on
Commit
f6a92d7
1 Parent(s): 40d9ac8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -19
app.py CHANGED
@@ -25,6 +25,7 @@ import time
25
  from utils import cos_sim_torch, embed_text
26
  import multiprocessing
27
  from functools import partial
 
28
 
29
  nltk.download('punkt')
30
 
@@ -124,28 +125,30 @@ def plagiarism_check(
124
  # # score = matchingScoreWithTimeout(sent, page_content)
125
  # score = cos_sim_torch(embed_text(sent), source_embeddings[i])
126
  # ScoreArray[i][j] = score
127
-
128
- def calculate_score(params):
129
- i, sent, source_embedding = params
130
- return cos_sim_torch(embed_text(sent), source_embedding)
131
 
132
-
133
- def worker(soups, sentences, source_embeddings):
134
- params_list = []
135
- for i, soup in enumerate(soups):
136
- if soup:
137
- for j, sent in enumerate(sentences):
138
- params_list.append((i, sent, source_embeddings[i]))
139
- num_processes = multiprocessing.cpu_count()
140
- pool = multiprocessing.Pool(processes=num_processes)
141
- scores = pool.map(calculate_score, params_list)
142
- pool.close()
143
- pool.join()
144
- for k, score in enumerate(scores):
145
- i, j = divmod(k, len(sentences))
 
 
 
146
  ScoreArray[i][j] = score
 
 
 
 
147
 
148
- worker(soups, sentences, source_embeddings)
149
 
150
  print(f"Time for matching score: {time.perf_counter()-time1}")
151
  time1 = time.perf_counter()
 
25
  from utils import cos_sim_torch, embed_text
26
  import multiprocessing
27
  from functools import partial
28
+ import concurrent.futures
29
 
30
  nltk.download('punkt')
31
 
 
125
  # # score = matchingScoreWithTimeout(sent, page_content)
126
  # score = cos_sim_torch(embed_text(sent), source_embeddings[i])
127
  # ScoreArray[i][j] = score
 
 
 
 
128
 
129
+
130
+ def compute_cosine_similarity(args):
131
+ sent, source_embedding, i, j = args
132
+ score = cos_sim_torch(embed_text(sent), source_embedding)
133
+ return i, j, score
134
+
135
+ def main(soups, sentences):
136
+ source_embeddings = [preprocess(soup) for soup in soups]
137
+ ScoreArray = [[0 for _ in sentences] for _ in soups]
138
+ args_list = []
139
+ for i, soup in enumerate(soups):
140
+ if soup:
141
+ for j, sent in enumerate(sentences):
142
+ args_list.append((sent, source_embeddings[i], i, j))
143
+ with concurrent.futures.ProcessPoolExecutor() as executor:
144
+ results = executor.map(compute_cosine_similarity, args_list)
145
+ for i, j, score in results:
146
  ScoreArray[i][j] = score
147
+ return ScoreArray
148
+
149
+ ScoreArray = main(soups, sentences)
150
+
151
 
 
152
 
153
  print(f"Time for matching score: {time.perf_counter()-time1}")
154
  time1 = time.perf_counter()