Spaces:
Running
Running
aliasgerovs
commited on
Commit
•
416ca10
1
Parent(s):
c78ec74
Update plagiarism.py
Browse files- plagiarism.py +5 -2
plagiarism.py
CHANGED
@@ -19,6 +19,7 @@ model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
|
19 |
# returns cosine similarity of two vectors
|
20 |
# input: two vectors
|
21 |
# output: integer between 0 and 1.
|
|
|
22 |
def get_cosine(vec1, vec2):
|
23 |
intersection = set(vec1.keys()) & set(vec2.keys())
|
24 |
|
@@ -93,7 +94,7 @@ def google_search(
|
|
93 |
if "items" in results and len(results["items"]) > 0:
|
94 |
for count, link in enumerate(results["items"]):
|
95 |
# stop after 3 pages
|
96 |
-
if count >=
|
97 |
break
|
98 |
# skip user selected domains
|
99 |
if any(
|
@@ -312,17 +313,19 @@ def plagiarism_check(
|
|
312 |
urlMap = {}
|
313 |
for count, i in enumerate(index_descending):
|
314 |
urlMap[i] = count + 1
|
|
|
315 |
for i, sent in enumerate(sentences):
|
316 |
formatted_tokens.append(
|
317 |
(sent, "[" + str(urlMap[sentenceToMaxURL[i]]) + "]")
|
318 |
)
|
|
|
319 |
for ind in index_descending:
|
320 |
formatted_tokens.append(
|
321 |
(
|
322 |
urlList[ind]
|
323 |
+ " --- Matching Score: "
|
324 |
+ f"{str(round(urlScore[ind] * 100, 2))}%",
|
325 |
-
"[" + str(urlMap[ind]) + "]",
|
326 |
)
|
327 |
)
|
328 |
formatted_tokens.append(("\n", None))
|
|
|
19 |
# returns cosine similarity of two vectors
|
20 |
# input: two vectors
|
21 |
# output: integer between 0 and 1.
|
22 |
+
|
23 |
def get_cosine(vec1, vec2):
|
24 |
intersection = set(vec1.keys()) & set(vec2.keys())
|
25 |
|
|
|
94 |
if "items" in results and len(results["items"]) > 0:
|
95 |
for count, link in enumerate(results["items"]):
|
96 |
# stop after 3 pages
|
97 |
+
if count >= 4:
|
98 |
break
|
99 |
# skip user selected domains
|
100 |
if any(
|
|
|
313 |
urlMap = {}
|
314 |
for count, i in enumerate(index_descending):
|
315 |
urlMap[i] = count + 1
|
316 |
+
|
317 |
for i, sent in enumerate(sentences):
|
318 |
formatted_tokens.append(
|
319 |
(sent, "[" + str(urlMap[sentenceToMaxURL[i]]) + "]")
|
320 |
)
|
321 |
+
formatted_tokens.append("\n\n\n\")
|
322 |
for ind in index_descending:
|
323 |
formatted_tokens.append(
|
324 |
(
|
325 |
urlList[ind]
|
326 |
+ " --- Matching Score: "
|
327 |
+ f"{str(round(urlScore[ind] * 100, 2))}%",
|
328 |
+
"[" + str(urlMap[ind]) + "]" + "\n",
|
329 |
)
|
330 |
)
|
331 |
formatted_tokens.append(("\n", None))
|