gloignon commited on
Commit
b1a7266
·
verified ·
1 Parent(s): b12f6d8

Update app.R

Browse files
Files changed (1) hide show
  1. app.R +77 -32
app.R CHANGED
@@ -1,6 +1,7 @@
1
  library(shiny)
2
  library(udpipe)
3
  library(stringr)
 
4
 
5
  # Load the French bsd model (ensure it's downloaded and adjust path if necessary)
6
  model <- udpipe_load_model("french-gsd-ud-2.5-191206.udpipe")
@@ -11,42 +12,42 @@ ui <- fluidPage(
11
 
12
  sidebarLayout(
13
  sidebarPanel(
14
- textAreaInput("text", "Enter French text:", value = "",
 
 
15
  placeholder = "Type or paste French text here",
16
- width = '100%', height = '200px', resize = "both"),
17
  actionButton("analyze", "Analyze")
18
  ),
19
 
20
  mainPanel(
21
  h3("Readability and Cohesion Features"),
22
- tableOutput("results")
 
 
 
 
 
 
 
23
  )
24
  )
25
  )
26
 
27
  # Define server logic
28
- server <- function(input, output) {
29
 
30
- analyze_text <- eventReactive(input$analyze, {
31
- text <- input$text
32
-
33
- # Annotate text using udpipe with the French model
34
  annotated <- udpipe_annotate(model, x = text)
35
  annotated_df <- as.data.frame(annotated)
36
 
37
- # Calculate readability and cohesion metrics
38
-
39
- # Basic Metrics
40
  word_count <- nrow(annotated_df[annotated_df$upos %in% c("NOUN", "VERB", "ADJ", "ADV"), ])
41
  sentence_count <- length(unique(annotated_df$sentence_id))
42
-
43
- # Syllable count - count vowels in each token as an approximation
44
  syllable_count <- sum(sapply(gregexpr("[aeiouyAEIOUY]", annotated_df$token), function(x) max(0, length(x))))
45
  avg_sentence_length <- ifelse(sentence_count > 0, word_count / sentence_count, 0)
46
  avg_syllables_per_word <- ifelse(word_count > 0, syllable_count / word_count, 0)
47
 
48
- # Lexical Cohesion Metrics
49
- # Sentence-to-Sentence Lexical Cohesion (percentage of words shared between sentences)
50
  sentence_ids <- unique(annotated_df$sentence_id)
51
  cohesion_values <- c()
52
  for (i in 2:length(sentence_ids)) {
@@ -57,7 +58,6 @@ server <- function(input, output) {
57
  }
58
  avg_sentence_to_sentence_cohesion <- ifelse(length(cohesion_values) > 0, mean(cohesion_values, na.rm = TRUE), 0)
59
 
60
- # Text-to-Sentence Lexical Cohesion (percentage of words in each sentence shared with entire text)
61
  text_words <- unique(annotated_df$lemma)
62
  text_sentence_cohesion <- sapply(sentence_ids, function(sid) {
63
  sentence_words <- annotated_df[annotated_df$sentence_id == sid, "lemma"]
@@ -66,29 +66,74 @@ server <- function(input, output) {
66
  })
67
  avg_text_to_sentence_cohesion <- mean(text_sentence_cohesion, na.rm = TRUE)
68
 
69
- # Type-Token Ratio (vocabulary diversity)
70
  type_token_ratio <- length(unique(annotated_df$lemma)) / word_count
71
 
72
- # Results Data Frame
73
- results <- data.frame(
74
- Metric = c("Word Count", "Sentence Count", "Syllable Count",
75
- "Average Sentence Length", "Average Syllables per Word",
76
- "Avg Sentence-to-Sentence Lexical Cohesion",
77
- "Avg Text-to-Sentence Lexical Cohesion",
78
- "Type-Token Ratio (Vocabulary Diversity)"),
79
- Value = c(word_count, sentence_count, syllable_count,
80
- round(avg_sentence_length, 2), round(avg_syllables_per_word, 2),
81
- round(avg_sentence_to_sentence_cohesion, 2),
82
- round(avg_text_to_sentence_cohesion, 2),
83
- round(type_token_ratio, 2))
84
  )
85
-
86
- return(results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  })
88
 
 
89
  output$results <- renderTable({
90
- analyze_text()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  })
 
92
  }
93
 
94
  # Run the application
 
1
  library(shiny)
2
  library(udpipe)
3
  library(stringr)
4
+ library(ggplot2)
5
 
6
  # Load the French bsd model (ensure it's downloaded and adjust path if necessary)
7
  model <- udpipe_load_model("french-gsd-ud-2.5-191206.udpipe")
 
12
 
13
  sidebarLayout(
14
  sidebarPanel(
15
+ fileInput("corpus_zip", "Upload ZIP with TXT files (optional)",
16
+ accept = c(".zip")),
17
+ textAreaInput("text", "Or enter French text directly:", value = "",
18
  placeholder = "Type or paste French text here",
19
+ width = '100%', height = '200px', resize = "both"),
20
  actionButton("analyze", "Analyze")
21
  ),
22
 
23
  mainPanel(
24
  h3("Readability and Cohesion Features"),
25
+ conditionalPanel(
26
+ condition = "output.isCorpus == false",
27
+ tableOutput("results")
28
+ ),
29
+ conditionalPanel(
30
+ condition = "output.isCorpus == true",
31
+ plotOutput("corpusPlots")
32
+ )
33
  )
34
  )
35
  )
36
 
37
  # Define server logic
38
+ server <- function(input, output, session) {
39
 
40
+ # Helper function to calculate metrics for a given text
41
+ calculate_metrics <- function(text) {
 
 
42
  annotated <- udpipe_annotate(model, x = text)
43
  annotated_df <- as.data.frame(annotated)
44
 
 
 
 
45
  word_count <- nrow(annotated_df[annotated_df$upos %in% c("NOUN", "VERB", "ADJ", "ADV"), ])
46
  sentence_count <- length(unique(annotated_df$sentence_id))
 
 
47
  syllable_count <- sum(sapply(gregexpr("[aeiouyAEIOUY]", annotated_df$token), function(x) max(0, length(x))))
48
  avg_sentence_length <- ifelse(sentence_count > 0, word_count / sentence_count, 0)
49
  avg_syllables_per_word <- ifelse(word_count > 0, syllable_count / word_count, 0)
50
 
 
 
51
  sentence_ids <- unique(annotated_df$sentence_id)
52
  cohesion_values <- c()
53
  for (i in 2:length(sentence_ids)) {
 
58
  }
59
  avg_sentence_to_sentence_cohesion <- ifelse(length(cohesion_values) > 0, mean(cohesion_values, na.rm = TRUE), 0)
60
 
 
61
  text_words <- unique(annotated_df$lemma)
62
  text_sentence_cohesion <- sapply(sentence_ids, function(sid) {
63
  sentence_words <- annotated_df[annotated_df$sentence_id == sid, "lemma"]
 
66
  })
67
  avg_text_to_sentence_cohesion <- mean(text_sentence_cohesion, na.rm = TRUE)
68
 
 
69
  type_token_ratio <- length(unique(annotated_df$lemma)) / word_count
70
 
71
+ data.frame(
72
+ "Word Count" = word_count,
73
+ "Sentence Count" = sentence_count,
74
+ "Syllable Count" = syllable_count,
75
+ "Average Sentence Length" = round(avg_sentence_length, 2),
76
+ "Average Syllables per Word" = round(avg_syllables_per_word, 2),
77
+ "Sentence-to-Sentence Lexical Cohesion" = round(avg_sentence_to_sentence_cohesion, 2),
78
+ "Text-to-Sentence Lexical Cohesion" = round(avg_text_to_sentence_cohesion, 2),
79
+ "Type-Token Ratio" = round(type_token_ratio, 2)
 
 
 
80
  )
81
+ }
82
+
83
+ # Reactive to handle single text or corpus input
84
+ results <- eventReactive(input$analyze, {
85
+ if (is.null(input$corpus_zip)) {
86
+ # Single text mode
87
+ text <- input$text
88
+ if (nchar(text) > 0) {
89
+ list(data = calculate_metrics(text), isCorpus = FALSE)
90
+ } else {
91
+ NULL
92
+ }
93
+ } else {
94
+ # Corpus mode: analyze each file in the uploaded ZIP
95
+ temp_dir <- tempdir()
96
+ unzip(input$corpus_zip$datapath, exdir = temp_dir)
97
+ txt_files <- list.files(temp_dir, pattern = "\\.txt$", full.names = TRUE)
98
+
99
+ # Calculate metrics for each text file and store in a list
100
+ corpus_metrics <- lapply(txt_files, function(file) {
101
+ text <- readLines(file, warn = FALSE)
102
+ calculate_metrics(paste(text, collapse = " "))
103
+ })
104
+
105
+ # Combine metrics into a data frame
106
+ corpus_metrics_df <- do.call(rbind, corpus_metrics)
107
+ list(data = corpus_metrics_df, isCorpus = TRUE)
108
+ }
109
  })
110
 
111
+ # Display results table for single text mode
112
  output$results <- renderTable({
113
+ if (!is.null(results()) && !results()$isCorpus) {
114
+ results()$data
115
+ }
116
+ })
117
+
118
+ # Display box plots for corpus mode
119
+ output$corpusPlots <- renderPlot({
120
+ if (!is.null(results()) && results()$isCorpus) {
121
+ corpus_metrics_df <- results()$data
122
+ melted_df <- reshape2::melt(corpus_metrics_df)
123
+
124
+ ggplot(melted_df, aes(x = variable, y = value)) +
125
+ geom_boxplot() +
126
+ labs(x = "Metric", y = "Value", title = "Corpus Analysis - Readability and Cohesion Metrics") +
127
+ theme_minimal() +
128
+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
129
+ }
130
+ })
131
+
132
+ # Boolean for UI conditionals
133
+ output$isCorpus <- reactive({
134
+ !is.null(results()) && results()$isCorpus
135
  })
136
+ outputOptions(output, "isCorpus", suspendWhenHidden = FALSE)
137
  }
138
 
139
  # Run the application