gloignon commited on
Commit
b12f6d8
·
verified ·
1 Parent(s): 54240b2

Update app.R

Browse files
Files changed (1) hide show
  1. app.R +42 -13
app.R CHANGED
@@ -1,21 +1,24 @@
1
  library(shiny)
2
  library(udpipe)
 
3
 
4
  # Load the French bsd model (ensure it's downloaded and adjust path if necessary)
5
  model <- udpipe_load_model("french-gsd-ud-2.5-191206.udpipe")
6
 
7
  # Define UI for the application
8
  ui <- fluidPage(
9
- titlePanel("French Readability Analyzer with UDPipe"),
10
 
11
  sidebarLayout(
12
  sidebarPanel(
13
- textInput("text", "Enter French text:", value = "", placeholder = "Type or paste French text here"),
 
 
14
  actionButton("analyze", "Analyze")
15
  ),
16
 
17
  mainPanel(
18
- h3("Readability Features"),
19
  tableOutput("results")
20
  )
21
  )
@@ -31,27 +34,53 @@ server <- function(input, output) {
31
  annotated <- udpipe_annotate(model, x = text)
32
  annotated_df <- as.data.frame(annotated)
33
 
34
- # Calculate readability metrics
35
- word_count <- nrow(annotated_df[annotated_df$upos == "NOUN" | annotated_df$upos == "VERB" |
36
- annotated_df$upos == "ADJ" | annotated_df$upos == "ADV", ])
37
 
 
 
38
  sentence_count <- length(unique(annotated_df$sentence_id))
39
 
40
  # Syllable count - count vowels in each token as an approximation
41
  syllable_count <- sum(sapply(gregexpr("[aeiouyAEIOUY]", annotated_df$token), function(x) max(0, length(x))))
42
-
43
- # Average sentence length (words per sentence)
44
  avg_sentence_length <- ifelse(sentence_count > 0, word_count / sentence_count, 0)
45
-
46
- # Average syllables per word
47
  avg_syllables_per_word <- ifelse(word_count > 0, syllable_count / word_count, 0)
48
 
49
- # Compile results into a data frame
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  results <- data.frame(
51
  Metric = c("Word Count", "Sentence Count", "Syllable Count",
52
- "Average Sentence Length", "Average Syllables per Word"),
 
 
 
53
  Value = c(word_count, sentence_count, syllable_count,
54
- round(avg_sentence_length, 2), round(avg_syllables_per_word, 2))
 
 
 
55
  )
56
 
57
  return(results)
 
1
  library(shiny)
2
  library(udpipe)
3
+ library(stringr)
4
 
5
  # Load the French bsd model (ensure it's downloaded and adjust path if necessary)
6
  model <- udpipe_load_model("french-gsd-ud-2.5-191206.udpipe")
7
 
8
  # Define UI for the application
9
  ui <- fluidPage(
10
+ titlePanel("French Readability and Cohesion Analyzer with UDPipe"),
11
 
12
  sidebarLayout(
13
  sidebarPanel(
14
+ textAreaInput("text", "Enter French text:", value = "",
15
+ placeholder = "Type or paste French text here",
16
+ width = '100%', height = '200px', resize = "both"),
17
  actionButton("analyze", "Analyze")
18
  ),
19
 
20
  mainPanel(
21
+ h3("Readability and Cohesion Features"),
22
  tableOutput("results")
23
  )
24
  )
 
34
  annotated <- udpipe_annotate(model, x = text)
35
  annotated_df <- as.data.frame(annotated)
36
 
37
+ # Calculate readability and cohesion metrics
 
 
38
 
39
+ # Basic Metrics
40
+ word_count <- nrow(annotated_df[annotated_df$upos %in% c("NOUN", "VERB", "ADJ", "ADV"), ])
41
  sentence_count <- length(unique(annotated_df$sentence_id))
42
 
43
  # Syllable count - count vowels in each token as an approximation
44
  syllable_count <- sum(sapply(gregexpr("[aeiouyAEIOUY]", annotated_df$token), function(x) max(0, length(x))))
 
 
45
  avg_sentence_length <- ifelse(sentence_count > 0, word_count / sentence_count, 0)
 
 
46
  avg_syllables_per_word <- ifelse(word_count > 0, syllable_count / word_count, 0)
47
 
48
+ # Lexical Cohesion Metrics
49
+ # Sentence-to-Sentence Lexical Cohesion (percentage of words shared between sentences)
50
+ sentence_ids <- unique(annotated_df$sentence_id)
51
+ cohesion_values <- c()
52
+ for (i in 2:length(sentence_ids)) {
53
+ current_sentence <- annotated_df[annotated_df$sentence_id == sentence_ids[i], "lemma"]
54
+ previous_sentence <- annotated_df[annotated_df$sentence_id == sentence_ids[i - 1], "lemma"]
55
+ shared_words <- length(intersect(current_sentence, previous_sentence))
56
+ cohesion_values <- c(cohesion_values, shared_words / length(current_sentence))
57
+ }
58
+ avg_sentence_to_sentence_cohesion <- ifelse(length(cohesion_values) > 0, mean(cohesion_values, na.rm = TRUE), 0)
59
+
60
+ # Text-to-Sentence Lexical Cohesion (percentage of words in each sentence shared with entire text)
61
+ text_words <- unique(annotated_df$lemma)
62
+ text_sentence_cohesion <- sapply(sentence_ids, function(sid) {
63
+ sentence_words <- annotated_df[annotated_df$sentence_id == sid, "lemma"]
64
+ shared_words <- length(intersect(sentence_words, text_words))
65
+ shared_words / length(sentence_words)
66
+ })
67
+ avg_text_to_sentence_cohesion <- mean(text_sentence_cohesion, na.rm = TRUE)
68
+
69
+ # Type-Token Ratio (vocabulary diversity)
70
+ type_token_ratio <- length(unique(annotated_df$lemma)) / word_count
71
+
72
+ # Results Data Frame
73
  results <- data.frame(
74
  Metric = c("Word Count", "Sentence Count", "Syllable Count",
75
+ "Average Sentence Length", "Average Syllables per Word",
76
+ "Avg Sentence-to-Sentence Lexical Cohesion",
77
+ "Avg Text-to-Sentence Lexical Cohesion",
78
+ "Type-Token Ratio (Vocabulary Diversity)"),
79
  Value = c(word_count, sentence_count, syllable_count,
80
+ round(avg_sentence_length, 2), round(avg_syllables_per_word, 2),
81
+ round(avg_sentence_to_sentence_cohesion, 2),
82
+ round(avg_text_to_sentence_cohesion, 2),
83
+ round(type_token_ratio, 2))
84
  )
85
 
86
  return(results)