import requests import httpx import torch import re from bs4 import BeautifulSoup import numpy as np from transformers import AutoTokenizer, AutoModelForSequenceClassification import asyncio from scipy.special import softmax from evaluate import load from datetime import date import nltk import fitz from transformers import GPT2LMHeadModel, GPT2TokenizerFast import nltk, spacy, subprocess, torch import plotly.graph_objects as go import torch.nn.functional as F import nltk from unidecode import unidecode import time import yaml import nltk import os from explainability import * from dotenv import load_dotenv import subprocess nltk.download("punkt") nltk.download("stopwords") load_dotenv() with open("config.yaml", "r") as file: params = yaml.safe_load(file) device = "cuda" if torch.cuda.is_available() else "cpu" readability_model_id = params["READABILITY_MODEL_ID"] gpt2_model = GPT2LMHeadModel.from_pretrained(readability_model_id).to(device) gpt2_tokenizer = GPT2TokenizerFast.from_pretrained(readability_model_id) command = ["python", "-m", "spacy", "download", "en_core_web_sm"] subprocess.run(command) nlp = spacy.load("en_core_web_sm") def depth_analysis(input_text): processed_words = preprocess_text1(input_text) ttr_value = vocabulary_richness_ttr(processed_words) gunning_fog = calculate_gunning_fog(input_text) gunning_fog_norm = normalize(gunning_fog, min_value=0, max_value=20) words, sentences = preprocess_text2(input_text) average_sentence_length = calculate_average_sentence_length(sentences) average_word_length = calculate_average_word_length(words) average_sentence_length_norm = normalize( average_sentence_length, min_value=0, max_value=40 ) average_word_length_norm = normalize( average_word_length, min_value=0, max_value=8 ) average_tree_depth = calculate_syntactic_tree_depth(nlp, input_text) average_tree_depth_norm = normalize( average_tree_depth, min_value=0, max_value=10 ) perplexity = calculate_perplexity( input_text, gpt2_model, gpt2_tokenizer, device ) perplexity_norm = normalize(perplexity, min_value=0, max_value=30) features = { "readability": gunning_fog_norm, "syntactic tree depth": average_tree_depth_norm, "vocabulary richness": ttr_value, "perplexity": perplexity_norm, "average sentence length": average_sentence_length_norm, "average word length": average_word_length_norm, } fig = go.Figure() fig.add_trace( go.Scatterpolar( r=list(features.values()), theta=list(features.keys()), fill="toself", name="Radar Plot", ) ) fig.update_layout( polar=dict( radialaxis=dict( visible=True, range=[0, 100], ) ), showlegend=False, margin=dict( l=10, r=20, b=10, t=10, ), ) return fig