File size: 5,236 Bytes
fe4a810
39cd34f
b12f6d8
b1a7266
fe4a810
39cd34f
 
fe4a810
39cd34f
 
b12f6d8
39cd34f
 
 
b1a7266
 
 
b12f6d8
b1a7266
39cd34f
fe4a810
39cd34f
 
b12f6d8
b1a7266
 
 
 
 
 
 
 
39cd34f
 
fe4a810
 
39cd34f
b1a7266
39cd34f
b1a7266
 
39cd34f
 
 
b12f6d8
39cd34f
 
 
 
 
b12f6d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1a7266
 
 
 
 
 
 
 
 
39cd34f
b1a7266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39cd34f
 
b1a7266
39cd34f
b1a7266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe4a810
b1a7266
fe4a810
 
39cd34f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
library(shiny)
library(udpipe)
library(stringr)
library(ggplot2)

# Load the French bsd model (ensure it's downloaded and adjust path if necessary)
model <- udpipe_load_model("french-gsd-ud-2.5-191206.udpipe")

# Define UI for the application
ui <- fluidPage(
  titlePanel("French Readability and Cohesion Analyzer with UDPipe"),
  
  sidebarLayout(
    sidebarPanel(
      fileInput("corpus_zip", "Upload ZIP with TXT files (optional)", 
                accept = c(".zip")),
      textAreaInput("text", "Or enter French text directly:", value = "", 
                    placeholder = "Type or paste French text here", 
                    width = '100%', height = '200px', resize = "both"),
      actionButton("analyze", "Analyze")
    ),
    
    mainPanel(
      h3("Readability and Cohesion Features"),
      conditionalPanel(
        condition = "output.isCorpus == false",
        tableOutput("results")
      ),
      conditionalPanel(
        condition = "output.isCorpus == true",
        plotOutput("corpusPlots")
      )
    )
  )
)

# Define server logic
server <- function(input, output, session) {
  
  # Helper function to calculate metrics for a given text
  calculate_metrics <- function(text) {
    annotated <- udpipe_annotate(model, x = text)
    annotated_df <- as.data.frame(annotated)
    
    word_count <- nrow(annotated_df[annotated_df$upos %in% c("NOUN", "VERB", "ADJ", "ADV"), ])
    sentence_count <- length(unique(annotated_df$sentence_id))
    syllable_count <- sum(sapply(gregexpr("[aeiouyAEIOUY]", annotated_df$token), function(x) max(0, length(x))))
    avg_sentence_length <- ifelse(sentence_count > 0, word_count / sentence_count, 0)
    avg_syllables_per_word <- ifelse(word_count > 0, syllable_count / word_count, 0)
    
    sentence_ids <- unique(annotated_df$sentence_id)
    cohesion_values <- c()
    for (i in 2:length(sentence_ids)) {
      current_sentence <- annotated_df[annotated_df$sentence_id == sentence_ids[i], "lemma"]
      previous_sentence <- annotated_df[annotated_df$sentence_id == sentence_ids[i - 1], "lemma"]
      shared_words <- length(intersect(current_sentence, previous_sentence))
      cohesion_values <- c(cohesion_values, shared_words / length(current_sentence))
    }
    avg_sentence_to_sentence_cohesion <- ifelse(length(cohesion_values) > 0, mean(cohesion_values, na.rm = TRUE), 0)
    
    text_words <- unique(annotated_df$lemma)
    text_sentence_cohesion <- sapply(sentence_ids, function(sid) {
      sentence_words <- annotated_df[annotated_df$sentence_id == sid, "lemma"]
      shared_words <- length(intersect(sentence_words, text_words))
      shared_words / length(sentence_words)
    })
    avg_text_to_sentence_cohesion <- mean(text_sentence_cohesion, na.rm = TRUE)
    
    type_token_ratio <- length(unique(annotated_df$lemma)) / word_count
    
    data.frame(
      "Word Count" = word_count,
      "Sentence Count" = sentence_count,
      "Syllable Count" = syllable_count,
      "Average Sentence Length" = round(avg_sentence_length, 2),
      "Average Syllables per Word" = round(avg_syllables_per_word, 2),
      "Sentence-to-Sentence Lexical Cohesion" = round(avg_sentence_to_sentence_cohesion, 2),
      "Text-to-Sentence Lexical Cohesion" = round(avg_text_to_sentence_cohesion, 2),
      "Type-Token Ratio" = round(type_token_ratio, 2)
    )
  }
  
  # Reactive to handle single text or corpus input
  results <- eventReactive(input$analyze, {
    if (is.null(input$corpus_zip)) {
      # Single text mode
      text <- input$text
      if (nchar(text) > 0) {
        list(data = calculate_metrics(text), isCorpus = FALSE)
      } else {
        NULL
      }
    } else {
      # Corpus mode: analyze each file in the uploaded ZIP
      temp_dir <- tempdir()
      unzip(input$corpus_zip$datapath, exdir = temp_dir)
      txt_files <- list.files(temp_dir, pattern = "\\.txt$", full.names = TRUE)
      
      # Calculate metrics for each text file and store in a list
      corpus_metrics <- lapply(txt_files, function(file) {
        text <- readLines(file, warn = FALSE)
        calculate_metrics(paste(text, collapse = " "))
      })
      
      # Combine metrics into a data frame
      corpus_metrics_df <- do.call(rbind, corpus_metrics)
      list(data = corpus_metrics_df, isCorpus = TRUE)
    }
  })
  
  # Display results table for single text mode
  output$results <- renderTable({
    if (!is.null(results()) && !results()$isCorpus) {
      results()$data
    }
  })
  
  # Display box plots for corpus mode
  output$corpusPlots <- renderPlot({
    if (!is.null(results()) && results()$isCorpus) {
      corpus_metrics_df <- results()$data
      melted_df <- reshape2::melt(corpus_metrics_df)
      
      ggplot(melted_df, aes(x = variable, y = value)) +
        geom_boxplot() +
        labs(x = "Metric", y = "Value", title = "Corpus Analysis - Readability and Cohesion Metrics") +
        theme_minimal() +
        theme(axis.text.x = element_text(angle = 45, hjust = 1))
    }
  })
  
  # Boolean for UI conditionals
  output$isCorpus <- reactive({
    !is.null(results()) && results()$isCorpus
  })
  outputOptions(output, "isCorpus", suspendWhenHidden = FALSE)
}

# Run the application 
shinyApp(ui = ui, server = server)