Spaces:
Sleeping
Sleeping
aliasgerovs
commited on
Commit
·
8fb8d86
1
Parent(s):
0eaca07
Added fix on plagiarsim
Browse files- plagiarism.py +6 -5
- predictors.py +1 -0
plagiarism.py
CHANGED
@@ -290,9 +290,8 @@ def plagiarism_check(
|
|
290 |
# api_key = "AIzaSyCLyCCpOPLZWuptuPAPSg8cUIZhdEMVf6g"
|
291 |
# api_key = "AIzaSyCS1WQDMl1IMjaXtwSd_2rA195-Yc4psQE"
|
292 |
# api_key = "AIzaSyCB61O70B8AC3l5Kk3KMoLb6DN37B7nqIk"
|
293 |
-
|
294 |
# api_key = "AIzaSyA5VVwY1eEoIoflejObrxFDI0DJvtbmgW8"
|
295 |
-
api_key = "AIzaSyA5VVwY1eEoIoflejObrxFDI0DJvtbmgW8"
|
296 |
cse_id = "851813e81162b4ed4"
|
297 |
|
298 |
url_scores = []
|
@@ -374,7 +373,7 @@ def plagiarism_check(
|
|
374 |
print("SNIPPETS: ", snippets)
|
375 |
snippets = [[item for item in sublist if item] for sublist in snippets]
|
376 |
for ind in index_descending:
|
377 |
-
if url_source[ind] > 0.
|
378 |
matched_sentence_array = [
|
379 |
[item for item in sublist if item]
|
380 |
for sublist in matched_sentence_array
|
@@ -470,15 +469,16 @@ def html_highlight(
|
|
470 |
combined_sentence = ""
|
471 |
total_score = 0
|
472 |
total_count = 0
|
473 |
-
category_scores = defaultdict(
|
474 |
for sentence, score, url, idx in sentence_scores:
|
475 |
category = check_url_category(url)
|
476 |
if score is None:
|
477 |
total_score += 0
|
478 |
else:
|
479 |
total_score += score
|
|
|
480 |
total_count += 1
|
481 |
-
|
482 |
if idx != prev_idx and prev_idx is not None:
|
483 |
color = color_map[prev_idx - 1]
|
484 |
index_part = f"<span>[{prev_idx}]</span>"
|
@@ -488,6 +488,7 @@ def html_highlight(
|
|
488 |
combined_sentence += " " + sentence
|
489 |
prev_idx = idx
|
490 |
|
|
|
491 |
total_average_score = round(total_score / total_count, 2)
|
492 |
category_averages = {
|
493 |
category: round((sum(scores) / len(scores)), 2)
|
|
|
290 |
# api_key = "AIzaSyCLyCCpOPLZWuptuPAPSg8cUIZhdEMVf6g"
|
291 |
# api_key = "AIzaSyCS1WQDMl1IMjaXtwSd_2rA195-Yc4psQE"
|
292 |
# api_key = "AIzaSyCB61O70B8AC3l5Kk3KMoLb6DN37B7nqIk"
|
293 |
+
api_key = "AIzaSyCg1IbevcTAXAPYeYreps6wYWDbU0Kz8tg"
|
294 |
# api_key = "AIzaSyA5VVwY1eEoIoflejObrxFDI0DJvtbmgW8"
|
|
|
295 |
cse_id = "851813e81162b4ed4"
|
296 |
|
297 |
url_scores = []
|
|
|
373 |
print("SNIPPETS: ", snippets)
|
374 |
snippets = [[item for item in sublist if item] for sublist in snippets]
|
375 |
for ind in index_descending:
|
376 |
+
if url_source[ind] > 0.1:
|
377 |
matched_sentence_array = [
|
378 |
[item for item in sublist if item]
|
379 |
for sublist in matched_sentence_array
|
|
|
469 |
combined_sentence = ""
|
470 |
total_score = 0
|
471 |
total_count = 0
|
472 |
+
category_scores = defaultdict(set)
|
473 |
for sentence, score, url, idx in sentence_scores:
|
474 |
category = check_url_category(url)
|
475 |
if score is None:
|
476 |
total_score += 0
|
477 |
else:
|
478 |
total_score += score
|
479 |
+
category_scores[category].add(score)
|
480 |
total_count += 1
|
481 |
+
|
482 |
if idx != prev_idx and prev_idx is not None:
|
483 |
color = color_map[prev_idx - 1]
|
484 |
index_part = f"<span>[{prev_idx}]</span>"
|
|
|
488 |
combined_sentence += " " + sentence
|
489 |
prev_idx = idx
|
490 |
|
491 |
+
print(category_scores)
|
492 |
total_average_score = round(total_score / total_count, 2)
|
493 |
category_averages = {
|
494 |
category: round((sum(scores) / len(scores)), 2)
|
predictors.py
CHANGED
@@ -26,6 +26,7 @@ nltk.download("punkt")
|
|
26 |
nltk.download("stopwords")
|
27 |
device_needed = "cuda" if torch.cuda.is_available() else "cpu"
|
28 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
29 |
|
30 |
text_bc_model_path = params["TEXT_BC_MODEL_PATH"]
|
31 |
text_mc_model_path = params["TEXT_MC_MODEL_PATH"]
|
|
|
26 |
nltk.download("stopwords")
|
27 |
device_needed = "cuda" if torch.cuda.is_available() else "cpu"
|
28 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
29 |
+
print('DEVICE IS :' , device)
|
30 |
|
31 |
text_bc_model_path = params["TEXT_BC_MODEL_PATH"]
|
32 |
text_mc_model_path = params["TEXT_MC_MODEL_PATH"]
|