Spaces:
Running
Running
aliasgerovs
commited on
Commit
·
6d6d84c
1
Parent(s):
caa635d
Added options on ai and plagiarisim checker.
Browse files- __pycache__/utils.cpython-310.pyc +0 -0
- __pycache__/writing_analysis.cpython-310.pyc +0 -0
- app.py +31 -8
- utils.py +7 -2
__pycache__/utils.cpython-310.pyc
ADDED
Binary file (7.22 kB). View file
|
|
__pycache__/writing_analysis.cpython-310.pyc
ADDED
Binary file (4.6 kB). View file
|
|
app.py
CHANGED
@@ -34,6 +34,7 @@ np.set_printoptions(suppress=True)
|
|
34 |
|
35 |
|
36 |
def plagiarism_check(
|
|
|
37 |
input,
|
38 |
year_from,
|
39 |
month_from,
|
@@ -58,8 +59,10 @@ def plagiarism_check(
|
|
58 |
date_from = build_date(year_from, month_from, day_from)
|
59 |
date_to = build_date(year_to, month_to, day_to)
|
60 |
sort_date = f"date:r:{date_from}:{date_to}"
|
|
|
61 |
# get list of URLS to check
|
62 |
urlCount, ScoreArray = googleSearch(
|
|
|
63 |
sentences,
|
64 |
urlCount,
|
65 |
ScoreArray,
|
@@ -210,7 +213,7 @@ def predict_mc(model, tokenizer, text):
|
|
210 |
mc_score[label.upper()] = score.item()
|
211 |
return mc_score
|
212 |
|
213 |
-
def ai_generated_test(input):
|
214 |
|
215 |
cleaned_text = remove_special_characters(input)
|
216 |
bc_score = predict_bc(text_bc_model, text_bc_tokenizer, cleaned_text)
|
@@ -220,7 +223,10 @@ def ai_generated_test(input):
|
|
220 |
for key, value in mc_score.items():
|
221 |
mc_score[key] = value * sum_prob
|
222 |
|
223 |
-
if
|
|
|
|
|
|
|
224 |
mc_score = {}
|
225 |
return bc_score, mc_score
|
226 |
else:
|
@@ -228,6 +234,8 @@ def ai_generated_test(input):
|
|
228 |
|
229 |
# COMBINED
|
230 |
def main(
|
|
|
|
|
231 |
input,
|
232 |
# models,
|
233 |
year_from,
|
@@ -240,6 +248,7 @@ def main(
|
|
240 |
):
|
241 |
|
242 |
formatted_tokens = plagiarism_check(
|
|
|
243 |
input,
|
244 |
year_from,
|
245 |
month_from,
|
@@ -250,7 +259,7 @@ def main(
|
|
250 |
domains_to_skip,
|
251 |
)
|
252 |
depth_analysis_plot = depth_analysis(input)
|
253 |
-
bc_score, mc_score = ai_generated_test(input)
|
254 |
|
255 |
return (
|
256 |
bc_score,
|
@@ -402,13 +411,24 @@ with gr.Blocks() as demo:
|
|
402 |
|
403 |
with gr.Row():
|
404 |
with gr.Column():
|
405 |
-
|
406 |
with gr.Column():
|
407 |
-
|
|
|
|
|
408 |
with gr.Column():
|
409 |
-
|
|
|
410 |
with gr.Column():
|
411 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
412 |
gr.Markdown(
|
413 |
"""
|
414 |
## Output
|
@@ -479,6 +499,8 @@ with gr.Blocks() as demo:
|
|
479 |
full_check_btn.click(
|
480 |
fn=main,
|
481 |
inputs=[
|
|
|
|
|
482 |
input_text,
|
483 |
# models,
|
484 |
year_from,
|
@@ -500,7 +522,7 @@ with gr.Blocks() as demo:
|
|
500 |
|
501 |
only_ai_btn.click(
|
502 |
fn=ai_generated_test,
|
503 |
-
inputs=[input_text],
|
504 |
outputs=[
|
505 |
bcLabel,
|
506 |
mcLabel,
|
@@ -511,6 +533,7 @@ with gr.Blocks() as demo:
|
|
511 |
only_plagiarism_btn.click(
|
512 |
fn=plagiarism_check,
|
513 |
inputs=[
|
|
|
514 |
input_text,
|
515 |
year_from,
|
516 |
month_from,
|
|
|
34 |
|
35 |
|
36 |
def plagiarism_check(
|
37 |
+
plag_option,
|
38 |
input,
|
39 |
year_from,
|
40 |
month_from,
|
|
|
59 |
date_from = build_date(year_from, month_from, day_from)
|
60 |
date_to = build_date(year_to, month_to, day_to)
|
61 |
sort_date = f"date:r:{date_from}:{date_to}"
|
62 |
+
|
63 |
# get list of URLS to check
|
64 |
urlCount, ScoreArray = googleSearch(
|
65 |
+
plag_option,
|
66 |
sentences,
|
67 |
urlCount,
|
68 |
ScoreArray,
|
|
|
213 |
mc_score[label.upper()] = score.item()
|
214 |
return mc_score
|
215 |
|
216 |
+
def ai_generated_test(ai_option, input):
|
217 |
|
218 |
cleaned_text = remove_special_characters(input)
|
219 |
bc_score = predict_bc(text_bc_model, text_bc_tokenizer, cleaned_text)
|
|
|
223 |
for key, value in mc_score.items():
|
224 |
mc_score[key] = value * sum_prob
|
225 |
|
226 |
+
if ai_option == "Human vs AI":
|
227 |
+
mc_score = {}
|
228 |
+
|
229 |
+
if sum_prob < 0.01 :
|
230 |
mc_score = {}
|
231 |
return bc_score, mc_score
|
232 |
else:
|
|
|
234 |
|
235 |
# COMBINED
|
236 |
def main(
|
237 |
+
ai_option,
|
238 |
+
plag_option,
|
239 |
input,
|
240 |
# models,
|
241 |
year_from,
|
|
|
248 |
):
|
249 |
|
250 |
formatted_tokens = plagiarism_check(
|
251 |
+
plag_option,
|
252 |
input,
|
253 |
year_from,
|
254 |
month_from,
|
|
|
259 |
domains_to_skip,
|
260 |
)
|
261 |
depth_analysis_plot = depth_analysis(input)
|
262 |
+
bc_score, mc_score = ai_generated_test(ai_option,input)
|
263 |
|
264 |
return (
|
265 |
bc_score,
|
|
|
411 |
|
412 |
with gr.Row():
|
413 |
with gr.Column():
|
414 |
+
ai_option = gr.Radio(["Human vs AI", "Human vs AI Source Models"], label="Choose an option please.")
|
415 |
with gr.Column():
|
416 |
+
plag_option = gr.Radio(["Standard", "Advanced"], label="Choose an option please.")
|
417 |
+
|
418 |
+
with gr.Row():
|
419 |
with gr.Column():
|
420 |
+
only_ai_btn = gr.Button("AI Check")
|
421 |
+
|
422 |
with gr.Column():
|
423 |
+
only_plagiarism_btn = gr.Button("Plagiarism Check")
|
424 |
+
with gr.Row():
|
425 |
+
|
426 |
+
with gr.Row():
|
427 |
+
depth_analysis_btn = gr.Button("Detailed Writing Analysis")
|
428 |
+
|
429 |
+
with gr.Row():
|
430 |
+
full_check_btn = gr.Button("Full Check")
|
431 |
+
|
432 |
gr.Markdown(
|
433 |
"""
|
434 |
## Output
|
|
|
499 |
full_check_btn.click(
|
500 |
fn=main,
|
501 |
inputs=[
|
502 |
+
ai_option,
|
503 |
+
plag_option,
|
504 |
input_text,
|
505 |
# models,
|
506 |
year_from,
|
|
|
522 |
|
523 |
only_ai_btn.click(
|
524 |
fn=ai_generated_test,
|
525 |
+
inputs=[ai_option, input_text],
|
526 |
outputs=[
|
527 |
bcLabel,
|
528 |
mcLabel,
|
|
|
533 |
only_plagiarism_btn.click(
|
534 |
fn=plagiarism_check,
|
535 |
inputs=[
|
536 |
+
plag_option,
|
537 |
input_text,
|
538 |
year_from,
|
539 |
month_from,
|
utils.py
CHANGED
@@ -96,6 +96,7 @@ def getSentences(text):
|
|
96 |
|
97 |
|
98 |
def googleSearch(
|
|
|
99 |
sentences,
|
100 |
urlCount,
|
101 |
scoreArray,
|
@@ -139,8 +140,12 @@ def googleSearch(
|
|
139 |
urlList.append(url)
|
140 |
scoreArray.append([0] * len(sentences))
|
141 |
urlCount[url] = urlCount[url] + 1 if url in urlCount else 1
|
142 |
-
|
143 |
-
|
|
|
|
|
|
|
|
|
144 |
)
|
145 |
else:
|
146 |
print("Google Search failed")
|
|
|
96 |
|
97 |
|
98 |
def googleSearch(
|
99 |
+
plag_option,
|
100 |
sentences,
|
101 |
urlCount,
|
102 |
scoreArray,
|
|
|
140 |
urlList.append(url)
|
141 |
scoreArray.append([0] * len(sentences))
|
142 |
urlCount[url] = urlCount[url] + 1 if url in urlCount else 1
|
143 |
+
if plag_option == 'Standard':
|
144 |
+
scoreArray[urlList.index(url)][i] = cosineSim(
|
145 |
+
sentence, snippet)
|
146 |
+
else :
|
147 |
+
scoreArray[urlList.index(url)][i] = sentence_similarity(
|
148 |
+
sentence, snippet
|
149 |
)
|
150 |
else:
|
151 |
print("Google Search failed")
|