Spaces:

polygraf-ai
/

copyright_checker

Running

App Files Files Community

copyright_checker / analysis.py

aliasgerovs

Updated

d9962e3 8 months ago

raw

history blame

2.98 kB

	import requests
	import httpx
	import torch
	import re
	from bs4 import BeautifulSoup
	import numpy as np
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import asyncio
	from scipy.special import softmax
	from evaluate import load
	from datetime import date
	import nltk
	import fitz
	from transformers import GPT2LMHeadModel, GPT2TokenizerFast
	import nltk, spacy, subprocess, torch
	import plotly.graph_objects as go
	import torch.nn.functional as F
	import nltk
	from unidecode import unidecode
	import time
	import yaml
	import nltk
	import os
	from explainability import *
	import subprocess

	nltk.download("punkt")
	nltk.download("stopwords")
	with open("config.yaml", "r") as file:
	params = yaml.safe_load(file)
	device = "cuda" if torch.cuda.is_available() else "cpu"
	readability_model_id = params["READABILITY_MODEL_ID"]
	gpt2_model = GPT2LMHeadModel.from_pretrained(readability_model_id).to(device)
	gpt2_tokenizer = GPT2TokenizerFast.from_pretrained(readability_model_id)

	command = ["python3", "-m", "spacy", "download", "en_core_web_sm"]
	subprocess.run(command)
	nlp = spacy.load("en_core_web_sm")


	def depth_analysis(input_text):
	processed_words = preprocess_text1(input_text)
	ttr_value = vocabulary_richness_ttr(processed_words)
	gunning_fog = calculate_gunning_fog(input_text)
	gunning_fog_norm = normalize(gunning_fog, min_value=0, max_value=20)
	words, sentences = preprocess_text2(input_text)
	average_sentence_length = calculate_average_sentence_length(sentences)
	average_word_length = calculate_average_word_length(words)
	average_sentence_length_norm = normalize(
	average_sentence_length, min_value=0, max_value=40
	)
	average_word_length_norm = normalize(
	average_word_length, min_value=0, max_value=8
	)
	average_tree_depth = calculate_syntactic_tree_depth(nlp, input_text)
	average_tree_depth_norm = normalize(
	average_tree_depth, min_value=0, max_value=10
	)
	perplexity = calculate_perplexity(
	input_text, gpt2_model, gpt2_tokenizer, device
	)
	perplexity_norm = normalize(perplexity, min_value=0, max_value=30)

	features = {
	"readability": gunning_fog_norm,
	"syntactic tree depth": average_tree_depth_norm,
	"vocabulary richness": ttr_value,
	"perplexity": perplexity_norm,
	"average sentence length": average_sentence_length_norm,
	"average word length": average_word_length_norm,
	}
	fig = go.Figure()
	fig.add_trace(
	go.Scatterpolar(
	r=list(features.values()),
	theta=list(features.keys()),
	fill="toself",
	name="Radar Plot",
	)
	)
	fig.update_layout(
	polar=dict(
	radialaxis=dict(
	visible=True,
	range=[0, 100],
	)
	),
	showlegend=False,
	margin=dict(
	l=10,
	r=20,
	b=10,
	t=10,
	),
	)
	return fig