minko186 commited on
Commit
5e1c28a
1 Parent(s): cca0df0

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +30 -14
utils.py CHANGED
@@ -10,6 +10,7 @@ import numpy as np
10
  import asyncio
11
  import nltk
12
  from sentence_transformers import SentenceTransformer, util
 
13
 
14
  nltk.download('punkt')
15
 
@@ -189,21 +190,36 @@ async def parallel_scrap(urls):
189
  return results
190
 
191
 
 
 
 
 
 
 
 
 
192
  def matchingScore(sentence, content):
193
- if sentence in content:
194
- return 1
195
- sentence = removePunc(sentence)
196
- content = removePunc(content)
197
- if sentence in content:
198
- return 1
199
- else:
200
- n = 5
201
- ngrams = getQueries(sentence, n)
202
- print("ngrams done.......")
203
- if len(ngrams) == 0:
204
- return 0
205
- matched = [x for x in ngrams if " ".join(x) in content]
206
- return len(matched) / len(ngrams)
 
 
 
 
 
 
 
207
 
208
 
209
  async def matchingScoreAsync(sentences, content, content_idx, ScoreArray):
 
10
  import asyncio
11
  import nltk
12
  from sentence_transformers import SentenceTransformer, util
13
+ import signal
14
 
15
  nltk.download('punkt')
16
 
 
190
  return results
191
 
192
 
193
+ class TimeoutError(Exception):
194
+ pass
195
+
196
+
197
+ def signal_handler(signum, frame):
198
+ raise TimeoutError("Function timed out")
199
+
200
+
201
  def matchingScore(sentence, content):
202
+ signal.signal(signal.SIGALRM, signal_handler)
203
+ signal.alarm(10) # Set alarm for 10 seconds
204
+
205
+ try:
206
+ if sentence in content:
207
+ return 1
208
+ sentence = removePunc(sentence)
209
+ content = removePunc(content)
210
+ if sentence in content:
211
+ return 1
212
+ else:
213
+ n = 5
214
+ ngrams = getQueries(sentence, n)
215
+ print("ngrams done.......")
216
+ if len(ngrams) == 0:
217
+ return 0
218
+ matched = [x for x in ngrams if " ".join(x) in content]
219
+ signal.alarm(0) # Cancel the alarm if calculation completes before timeout
220
+ return len(matched) / len(ngrams)
221
+ except TimeoutError:
222
+ return 0
223
 
224
 
225
  async def matchingScoreAsync(sentences, content, content_idx, ScoreArray):