aliasgerovs commited on
Commit
416ca10
1 Parent(s): c78ec74

Update plagiarism.py

Browse files
Files changed (1) hide show
  1. plagiarism.py +5 -2
plagiarism.py CHANGED
@@ -19,6 +19,7 @@ model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
19
  # returns cosine similarity of two vectors
20
  # input: two vectors
21
  # output: integer between 0 and 1.
 
22
  def get_cosine(vec1, vec2):
23
  intersection = set(vec1.keys()) & set(vec2.keys())
24
 
@@ -93,7 +94,7 @@ def google_search(
93
  if "items" in results and len(results["items"]) > 0:
94
  for count, link in enumerate(results["items"]):
95
  # stop after 3 pages
96
- if count >= 3:
97
  break
98
  # skip user selected domains
99
  if any(
@@ -312,17 +313,19 @@ def plagiarism_check(
312
  urlMap = {}
313
  for count, i in enumerate(index_descending):
314
  urlMap[i] = count + 1
 
315
  for i, sent in enumerate(sentences):
316
  formatted_tokens.append(
317
  (sent, "[" + str(urlMap[sentenceToMaxURL[i]]) + "]")
318
  )
 
319
  for ind in index_descending:
320
  formatted_tokens.append(
321
  (
322
  urlList[ind]
323
  + " --- Matching Score: "
324
  + f"{str(round(urlScore[ind] * 100, 2))}%",
325
- "[" + str(urlMap[ind]) + "]",
326
  )
327
  )
328
  formatted_tokens.append(("\n", None))
 
19
  # returns cosine similarity of two vectors
20
  # input: two vectors
21
  # output: integer between 0 and 1.
22
+
23
  def get_cosine(vec1, vec2):
24
  intersection = set(vec1.keys()) & set(vec2.keys())
25
 
 
94
  if "items" in results and len(results["items"]) > 0:
95
  for count, link in enumerate(results["items"]):
96
  # stop after 3 pages
97
+ if count >= 4:
98
  break
99
  # skip user selected domains
100
  if any(
 
313
  urlMap = {}
314
  for count, i in enumerate(index_descending):
315
  urlMap[i] = count + 1
316
+
317
  for i, sent in enumerate(sentences):
318
  formatted_tokens.append(
319
  (sent, "[" + str(urlMap[sentenceToMaxURL[i]]) + "]")
320
  )
321
+ formatted_tokens.append("\n\n\n\")
322
  for ind in index_descending:
323
  formatted_tokens.append(
324
  (
325
  urlList[ind]
326
  + " --- Matching Score: "
327
  + f"{str(round(urlScore[ind] * 100, 2))}%",
328
+ "[" + str(urlMap[ind]) + "]" + "\n",
329
  )
330
  )
331
  formatted_tokens.append(("\n", None))