minko186 commited on
Commit
2aee0ff
·
1 Parent(s): 69ebf10

Create analysis.py

Browse files
Files changed (1) hide show
  1. analysis.py +78 -0
analysis.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import httpx
3
+ import torch
4
+ import re
5
+ from bs4 import BeautifulSoup
6
+ import numpy as np
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
+ import asyncio
9
+ from scipy.special import softmax
10
+ from evaluate import load
11
+ from datetime import date
12
+ import nltk
13
+ import fitz
14
+ from transformers import GPT2LMHeadModel, GPT2TokenizerFast
15
+ import nltk, spacy, subprocess, torch
16
+ import plotly.graph_objects as go
17
+ import torch.nn.functional as F
18
+ import nltk
19
+ from unidecode import unidecode
20
+ import time
21
+ import yaml
22
+ import nltk
23
+ import os
24
+ from explainability import *
25
+ from dotenv import load_dotenv
26
+ nltk.download('punkt')
27
+ nltk.download('stopwords')
28
+ load_dotenv()
29
+ device = "cuda" if torch.cuda.is_available() else "cpu"
30
+ readability_model_id = os.getenv('READABILITY_MODEL_ID')
31
+ gpt2_model = GPT2LMHeadModel.from_pretrained(readability_model_id).to(device)
32
+ gpt2_tokenizer = GPT2TokenizerFast.from_pretrained(readability_model_id)
33
+
34
+ def depth_analysis(input_text):
35
+ processed_words = preprocess_text1(input_text)
36
+ ttr_value = vocabulary_richness_ttr(processed_words)
37
+ gunning_fog = calculate_gunning_fog(input_text)
38
+ gunning_fog_norm = normalize(gunning_fog, min_value=0, max_value=20)
39
+ words, sentences = preprocess_text2(input_text)
40
+ average_sentence_length = calculate_average_sentence_length(sentences)
41
+ average_word_length = calculate_average_word_length(words)
42
+ average_sentence_length_norm = normalize(average_sentence_length, min_value=0, max_value=40)
43
+ average_word_length_norm = normalize(average_word_length, min_value=0, max_value=8)
44
+ average_tree_depth = calculate_syntactic_tree_depth(nlp, input_text)
45
+ average_tree_depth_norm = normalize(average_tree_depth, min_value=0, max_value=10)
46
+ perplexity = calculate_perplexity(input_text, gpt2_model, gpt2_tokenizer, device)
47
+ perplexity_norm = normalize(perplexity, min_value=0, max_value=30)
48
+
49
+ features = {
50
+ "readability": gunning_fog_norm,
51
+ "syntactic tree depth": average_tree_depth_norm,
52
+ "vocabulary richness": ttr_value,
53
+ "perplexity": perplexity_norm,
54
+ "average sentence length": average_sentence_length_norm,
55
+ "average word length": average_word_length_norm,
56
+ }
57
+ fig = go.Figure()
58
+ fig.add_trace(go.Scatterpolar(
59
+ r=list(features.values()),
60
+ theta=list(features.keys()),
61
+ fill='toself',
62
+ name='Radar Plot'
63
+ ))
64
+ fig.update_layout(
65
+ polar=dict(
66
+ radialaxis=dict(
67
+ visible=True,
68
+ range=[0, 100],
69
+ )),
70
+ showlegend=False,
71
+ margin=dict(
72
+ l=10,
73
+ r=20,
74
+ b=10,
75
+ t=10,
76
+ ),
77
+ )
78
+ return fig