Spaces:
Running
Running
Update utils.py
Browse files
utils.py
CHANGED
@@ -10,6 +10,7 @@ import numpy as np
|
|
10 |
import asyncio
|
11 |
import nltk
|
12 |
from sentence_transformers import SentenceTransformer, util
|
|
|
13 |
|
14 |
nltk.download('punkt')
|
15 |
|
@@ -189,21 +190,36 @@ async def parallel_scrap(urls):
|
|
189 |
return results
|
190 |
|
191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
def matchingScore(sentence, content):
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
|
208 |
|
209 |
async def matchingScoreAsync(sentences, content, content_idx, ScoreArray):
|
|
|
10 |
import asyncio
|
11 |
import nltk
|
12 |
from sentence_transformers import SentenceTransformer, util
|
13 |
+
import signal
|
14 |
|
15 |
nltk.download('punkt')
|
16 |
|
|
|
190 |
return results
|
191 |
|
192 |
|
193 |
+
class TimeoutError(Exception):
|
194 |
+
pass
|
195 |
+
|
196 |
+
|
197 |
+
def signal_handler(signum, frame):
|
198 |
+
raise TimeoutError("Function timed out")
|
199 |
+
|
200 |
+
|
201 |
def matchingScore(sentence, content):
|
202 |
+
signal.signal(signal.SIGALRM, signal_handler)
|
203 |
+
signal.alarm(10) # Set alarm for 10 seconds
|
204 |
+
|
205 |
+
try:
|
206 |
+
if sentence in content:
|
207 |
+
return 1
|
208 |
+
sentence = removePunc(sentence)
|
209 |
+
content = removePunc(content)
|
210 |
+
if sentence in content:
|
211 |
+
return 1
|
212 |
+
else:
|
213 |
+
n = 5
|
214 |
+
ngrams = getQueries(sentence, n)
|
215 |
+
print("ngrams done.......")
|
216 |
+
if len(ngrams) == 0:
|
217 |
+
return 0
|
218 |
+
matched = [x for x in ngrams if " ".join(x) in content]
|
219 |
+
signal.alarm(0) # Cancel the alarm if calculation completes before timeout
|
220 |
+
return len(matched) / len(ngrams)
|
221 |
+
except TimeoutError:
|
222 |
+
return 0
|
223 |
|
224 |
|
225 |
async def matchingScoreAsync(sentences, content, content_idx, ScoreArray):
|