Spaces:

polygraf-ai
/

copyright_checker

Runtime error

App Files Files Community

minko186 commited on Mar 6, 2024

Commit

2aee0ff

1 Parent(s): 69ebf10

Create analysis.py

Browse files

Files changed (1) hide show

analysis.py +78 -0

analysis.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import requests
+import httpx
+import torch
+import re
+from bs4 import BeautifulSoup
+import numpy as np
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import asyncio
+from scipy.special import softmax
+from evaluate import load
+from datetime import date
+import nltk
+import fitz
+from transformers import GPT2LMHeadModel, GPT2TokenizerFast
+import nltk, spacy, subprocess, torch
+import plotly.graph_objects as go
+import torch.nn.functional as F
+import nltk
+from unidecode import unidecode
+import time
+import yaml
+import nltk
+import os
+from explainability import *
+from dotenv import load_dotenv
+nltk.download('punkt')
+nltk.download('stopwords')
+load_dotenv()
+device = "cuda" if torch.cuda.is_available() else "cpu"
+readability_model_id = os.getenv('READABILITY_MODEL_ID')
+gpt2_model = GPT2LMHeadModel.from_pretrained(readability_model_id).to(device)
+gpt2_tokenizer = GPT2TokenizerFast.from_pretrained(readability_model_id)
+def depth_analysis(input_text):
+    processed_words = preprocess_text1(input_text)
+    ttr_value = vocabulary_richness_ttr(processed_words)
+    gunning_fog = calculate_gunning_fog(input_text)
+    gunning_fog_norm = normalize(gunning_fog, min_value=0, max_value=20)
+    words, sentences = preprocess_text2(input_text)
+    average_sentence_length = calculate_average_sentence_length(sentences)
+    average_word_length = calculate_average_word_length(words)
+    average_sentence_length_norm = normalize(average_sentence_length, min_value=0, max_value=40)
+    average_word_length_norm = normalize(average_word_length, min_value=0, max_value=8)
+    average_tree_depth = calculate_syntactic_tree_depth(nlp, input_text)
+    average_tree_depth_norm = normalize(average_tree_depth, min_value=0, max_value=10)
+    perplexity = calculate_perplexity(input_text, gpt2_model, gpt2_tokenizer, device)
+    perplexity_norm = normalize(perplexity, min_value=0, max_value=30)
+    features = {
+        "readability": gunning_fog_norm,
+        "syntactic tree depth": average_tree_depth_norm,
+        "vocabulary richness": ttr_value,
+        "perplexity": perplexity_norm,
+        "average sentence length": average_sentence_length_norm,
+        "average word length": average_word_length_norm,
+    }
+    fig = go.Figure()
+    fig.add_trace(go.Scatterpolar(
+        r=list(features.values()),
+        theta=list(features.keys()),
+        fill='toself',
+        name='Radar Plot'
+    ))
+    fig.update_layout(
+        polar=dict(
+            radialaxis=dict(
+                visible=True,
+                range=[0, 100],
+            )),
+        showlegend=False,
+        margin=dict(
+            l=10,
+            r=20,
+            b=10,
+            t=10,
+        ),
+    )
+    return fig