TheBobBob commited on
Commit
966efc7
·
verified ·
1 Parent(s): cf7935a

Update BioinformaticsDashboardv0.0..Rmd

Browse files
Files changed (1) hide show
  1. BioinformaticsDashboardv0.0..Rmd +891 -885
BioinformaticsDashboardv0.0..Rmd CHANGED
@@ -1,886 +1,892 @@
1
- ---
2
- title: "Bioinformatics Dashboard v0.0 🧬 🦠 🧫"
3
- output:
4
- flexdashboard::flex_dashboard:
5
- orientation: columns
6
- vertical_layout: fill
7
- runtime: shiny
8
- ---
9
-
10
- ```{r setup, include=FALSE}
11
- library(flexdashboard)
12
- library(shiny)
13
- library(ggplot2)
14
- library(plotly)
15
- library(clusterProfiler)
16
- library(readxl)
17
- library(tidyverse)
18
- library(DESeq2)
19
- library(biomaRt)
20
- library(tidyr)
21
- library(shinyjs)
22
- library(rentrez)
23
- library(dplyr)
24
- library(ggtext)
25
- library(cowplot)
26
- library(UpSetR)
27
- ```
28
-
29
- Column {.tabset}
30
- -----------------------------------------------------------------------
31
-
32
- ### RNAseq analysis
33
- Analyzes RNAseq data using DESeq2 and GSEA. Visualizes using volcano plot, and other plots to show the GSEA analysis results. Please make sure the uploaded data is in xlsx format, has the first column with the gene names, and there is an even number of data columns. The control condition should be first, the mutant condition second.
34
- ```{r}
35
-
36
- ui <- fluidPage(
37
- titlePanel("Interactive Volcano Plot with Gene and GO Term Search"),
38
- useShinyjs(),
39
-
40
- passwordInput("password", "Enter Password:", value = "", placeholder = "Password"),
41
- actionButton("submit_password", "Submit"),
42
-
43
- uiOutput("main_ui"),
44
-
45
- #if you define a side_ui --> absolute panel, it would have to be defined here
46
- )
47
-
48
- # Server logic
49
- server <- function(input, output, session) {
50
- # Password handling
51
- correct_password <- "my_secret_password"
52
-
53
- observeEvent(input$submit_password, {
54
- if (input$password == correct_password) {
55
- showModal(modalDialog(
56
- title = "Access Granted",
57
- "Welcome! You can now search for genes and view the volcano plot.",
58
- easyClose = TRUE,
59
- footer = NULL
60
- ))
61
-
62
- # Hide the password input and button after validation
63
- shinyjs::hide("password")
64
- shinyjs::hide("submit_password")
65
-
66
- # Main UI appears after password is correct
67
- output$main_ui <- renderUI({
68
- sidebarLayout(
69
- sidebarPanel(
70
- fluidRow(
71
- fileInput("file", "Choose XLSX File", multiple = FALSE, accept = c(".xlsx", "text/xlsx")),
72
- actionButton("analyze_button", "Analyze"),
73
- tags$hr(),
74
- textInput("gene_search", "Search for a gene or keyword (separate multiple genes with ';'):", ""),
75
- actionButton("search_gene", "Search Gene"),
76
- tags$hr(),
77
- selectInput("GO_search", "Select a GO term:", choices = NULL),
78
- actionButton("search_GO_term", "Search GO Term"),
79
- tags$hr(),
80
- selectInput("description_search", "Search for the name of a pathway:", choices = NULL),
81
- actionButton("search_description", "Search Description"),
82
- tags$hr(),
83
- sliderInput("pvalue", "P-value: ",
84
- min = 0, max = 1,
85
- value = 0.01, step = 0.00001),
86
- tags$hr(),
87
- sliderInput("log2fc", "Log2FoldChange: ",
88
- min = 0.0001, max = 100,
89
- value = 2.5, step = 0.05),
90
- tags$hr(),
91
- actionButton("visualize_gse", "Visualize the GSEGO Results:")
92
- )
93
- ),
94
- mainPanel(
95
- plotlyOutput("volcanoPlot"),
96
- plotOutput("dotPlotTitle", width = "100%", height = "100px"),
97
- plotOutput("dotPlot", width = "100%", height = "1000px"),
98
- plotOutput("conceptNetworkTitle", width = "100%", height = "100px"),
99
- plotOutput("conceptNetwork", width = "100%", height = "600px"),
100
- plotOutput("heatMapTitle", width = "100%", height = "100px"),
101
- plotOutput("heatMap", width = "100%", height = "400px"),
102
- plotOutput("upsetPlotTitle", width = "100%", height = "100px"),
103
- plotOutput("upsetPlot", width = "100%", height = "1000px"),
104
- plotOutput("pubmedPathwayPlotTitle", width = "100%", height = "100px"),
105
- plotOutput("pubmedPathwayPlot", width = "100%", height = "1500px")
106
- )
107
- )
108
- })
109
-
110
- # Reactive values to store results and search criteria
111
- searchValues <- reactiveValues(
112
- gene_search = "",
113
- GO_search = "All",
114
- description_search = "",
115
- df_inverted = NULL # Store df_inverted here
116
- )
117
-
118
- # Process uploaded file and perform DESeq2 analysis
119
- observeEvent(input$analyze_button, {
120
- req(input$file) # Ensure a file is uploaded
121
-
122
- # Read in gene counts data
123
- genecounts <- tryCatch({
124
- read_excel(input$file$datapath, sheet = 1, col_names = TRUE)
125
- }, error = function(e) {
126
- showModal(modalDialog(title = "Error", "Could not read the Excel file.", easyClose = TRUE))
127
- return(NULL)
128
- })
129
-
130
- if (is.null(genecounts)) return(NULL) # Stop further processing if reading failed
131
-
132
- genecounts <- as.data.frame(genecounts)
133
- rownames(genecounts) <- genecounts[, 1]
134
- genecounts$Gene_Name <- NULL
135
- genecounts <- genecounts[, -1]
136
-
137
- num_samples <- ncol(genecounts)
138
-
139
- # Check if the number of samples is even
140
- if (num_samples %% 2 != 0) {
141
- showModal(modalDialog(
142
- title = "Error",
143
- "The number of samples must be even for proper grouping.",
144
- easyClose = TRUE
145
- ))
146
- return(NULL)
147
- }
148
-
149
- # Create the condition data frame
150
- condition <- data.frame(genotype = rep(c('C', 'R'), each = num_samples / 2), row.names = colnames(genecounts))
151
-
152
- # Create DESeq2 dataset
153
- dds <- DESeqDataSetFromMatrix(countData = genecounts, colData = condition, design = ~genotype)
154
- de <- DESeq(dds)
155
- res_reactive <- reactiveVal()
156
- res_reactive(results(de))
157
- res <<- results(de)
158
-
159
- # Create additional columns for plotting
160
- res$pvalue_log10 <- -log10(res$pvalue)
161
- pvalue_threshold <- 0.05
162
- fold_change_threshold <- 2
163
-
164
- res$significance <- ifelse(res$pvalue < pvalue_threshold, "Significant", "Not Significant")
165
- res$new_column <- rownames(res)
166
- res$diffexpressed <- ifelse(res$log2FoldChange > 0, "UP", ifelse(res$log2FoldChange < 0, "DOWN", "NO_CHANGE"))
167
-
168
- # Generate gene list for GSEA
169
- organism = "org.Hs.eg.db"
170
- original_gene_list <- res$log2FoldChange
171
- names(original_gene_list) <- res$new_column
172
- gene_list <<- na.omit(original_gene_list)
173
- gene_list = sort(gene_list, decreasing = TRUE)
174
-
175
- # Perform GO enrichment analysis
176
- gse <<- gseGO(geneList = gene_list,
177
- ont = "ALL",
178
- keyType = "SYMBOL",
179
- minGSSize = 3,
180
- maxGSSize = 800,
181
- pvalueCutoff = 0.05,
182
- verbose = TRUE,
183
- OrgDb = organism,
184
- pAdjustMethod = "none")
185
-
186
- # Store inverted results for GO terms in reactive values
187
- searchValues$df_inverted <- gse@result %>% separate_rows(core_enrichment, sep = "/")
188
-
189
- # Update GO term and description choices in UI
190
- updateSelectInput(session, "GO_search", choices = unique(searchValues$df_inverted$ID))
191
- updateSelectInput(session, "description_search", choices = unique(searchValues$df_inverted$Description))
192
-
193
- # Reactive filtering of results based on user input
194
- filteredRes <- reactive({
195
- data <- as.data.frame(res)
196
-
197
- # Apply gene search filter
198
- if (searchValues$gene_search != "") {
199
- genes <- strsplit(searchValues$gene_search, ";")[[1]]
200
- genes <- trimws(genes)
201
- data <- data %>%
202
- filter(rowSums(sapply(genes, function(gene) grepl(gene, new_column, ignore.case = TRUE))) > 0)
203
- }
204
-
205
- # Apply GO term filter
206
- if (searchValues$GO_search != "All") {
207
- selected_genes <- searchValues$df_inverted %>%
208
- filter(ID == searchValues$GO_search) %>%
209
- pull(core_enrichment)
210
- data <- data %>%
211
- filter(new_column %in% selected_genes)
212
- }
213
-
214
- # Apply description search filter
215
- if (searchValues$description_search != "") {
216
- selected_genes <- searchValues$df_inverted %>%
217
- filter(Description == searchValues$description_search) %>%
218
- pull(core_enrichment)
219
- data <- data %>%
220
- filter(new_column %in% selected_genes)
221
- }
222
-
223
- data
224
- })
225
-
226
- # Render volcano plot based on filtered results
227
- output$volcanoPlot <- renderPlotly({
228
- data_res <- filteredRes()
229
- p_value <- input$pvalue #need to add slides here
230
- log2fc <- input$log2fc #need to add slider here
231
-
232
- p <- ggplot(data_res, aes(x = log2FoldChange, y = pvalue_log10,
233
- text = paste("Gene:", new_column, "<br>Log2 Fold Change:", log2FoldChange,
234
- "<br>P-value:", pvalue, "<br>Significance:", significance,
235
- "<br>Differentially Expressed:", diffexpressed, "<br>-log10 Values:", pvalue_log10))) +
236
- geom_point(aes(color = log2FoldChange, shape = diffexpressed)) +
237
- geom_hline(yintercept = -log10(p_value), linetype = "dotted", color = "red") +
238
- geom_vline(xintercept = c(-log2fc, log2fc), linetype = "dotted", color = "darkblue") +
239
- xlim(-5, 5) +
240
- xlab("Log2 Fold Change") +
241
- ylab("-log10(P-value)") +
242
- ggtitle("Volcano Plot") +
243
- scale_color_gradient2(low = "green", mid = "pink", high = "blue", midpoint = 0,
244
- name = "Log2 Fold Change") # Add a custom color scale for the color legends
245
-
246
- ggplotly(p, tooltip = "text")
247
- })
248
- })
249
-
250
- # Update search criteria based on user actions
251
- observeEvent(input$search_gene, {
252
- searchValues$gene_search <- input$gene_search
253
- searchValues$GO_search <- "All"
254
- searchValues$description_search <- ""
255
- })
256
-
257
- observeEvent(input$search_GO_term, {
258
- searchValues$GO_search <- input$GO_search
259
- searchValues$gene_search <- ""
260
- searchValues$description_search <- ""
261
- })
262
-
263
- observeEvent(input$search_description, {
264
- searchValues$description_search <- input$description_search
265
- searchValues$GO_search <- "All"
266
- searchValues$gene_search <- ""
267
- })
268
-
269
- observeEvent(input$visualize_gse, {
270
- library(ggplot2)
271
- library(ggtext)
272
- library(gridExtra)
273
-
274
- output$dotPlotTitle <- renderPlot({
275
- txt <- "Dot Plot with 10 Pathways"
276
- title_plot <- ggplot() +
277
- geom_textbox(
278
- aes(x = 0, y = 0, label = txt),
279
- size = 18 / .pt,
280
- width = unit(6, "inches")
281
- ) +
282
- theme_void()
283
- print(title_plot)
284
- })
285
-
286
- output$dotPlot <- renderPlot({
287
- dot_plot <- dotplot(gse, showCategory = 10)
288
- print(dot_plot)
289
-
290
- })
291
-
292
- output$conceptNetworkTitle <- renderPlot({
293
- txt <- "Gene Concept Network"
294
-
295
- title_plot <- ggplot() +
296
- geom_textbox(
297
- aes(x = 0, y = 0, label = txt),
298
- size = 18 / .pt,
299
- width = unit(6, "inches")
300
- ) +
301
- theme_void()
302
-
303
- print(title_plot)
304
- })
305
-
306
- output$conceptNetwork <- renderPlot({
307
- gsex <- setReadable(gse, 'org.Hs.eg.db', 'ENTREZID')
308
- geneList <- gse@geneList
309
-
310
- p1 <- cnetplot(gsex, foldChange = geneList, max.overlaps = 100)
311
- p2 <- cnetplot(gsex, categorySize = "pvalue", foldChange = geneList, max.overlaps = 100)
312
- p3 <- cnetplot(gsex, foldChange = geneList, circular = TRUE, colorEdge = TRUE, max.overlaps = 100)
313
-
314
- maingene_plot <- cowplot::plot_grid(p1, p2, p3, ncol = 3, labels = LETTERS[1:3], rel_widths = c(.8, .8, 1.2))
315
- print(maingene_plot)
316
-
317
- })
318
-
319
- output$heatMapTitle <- renderPlot({
320
- txt <- "Heatmap-Like Functional Classification"
321
-
322
- title_plot <- ggplot() +
323
- geom_textbox(
324
- aes(x = 0, y = 0, label = txt),
325
- size = 18 / .pt,
326
- width = unit(6, "inches")
327
- ) +
328
- theme_void()
329
-
330
- print(title_plot)
331
- })
332
-
333
- output$heatMap <- renderPlot({
334
-
335
- gsex <- setReadable(gse, 'org.Hs.eg.db', 'ENTREZID')
336
- geneList <- gse@geneList
337
-
338
- p1 <- heatplot(gsex, showCategory=5)
339
- p2 <- heatplot(gsex, foldChange=geneList, showCategory=5)
340
- mainheatmap_plot <- cowplot::plot_grid(p1, p2, ncol=1, labels=LETTERS[1:2])
341
-
342
- print(mainheatmap_plot)
343
- })
344
-
345
- output$upsetPlotTitle <- renderPlot({
346
- txt <- "UpSet Plot"
347
-
348
- title_plot <- ggplot() +
349
- geom_textbox(
350
- aes(x = 0, y = 0, label = txt),
351
- size = 18 / .pt,
352
- width = unit(6, "inches")
353
- ) +
354
- theme_void()
355
-
356
- print(title_plot)
357
- })
358
-
359
- output$upsetPlot <- renderPlot({
360
- gse_df <- gse@result
361
- top_terms <- gse_df %>% arrange(pvalue) %>% head(10)
362
- top_gene_sets <- strsplit(top_terms$core_enrichment, "/")
363
- gene_sets_list <- lapply(top_gene_sets, function(x) unique(trimws(x)))
364
-
365
- gene_sets_df <- fromList(setNames(gene_sets_list, top_terms$ID))
366
-
367
- # Create the UpSet plot
368
- upset_plot <- upset(gene_sets_df,
369
- sets = names(gene_sets_df),
370
- main.bar.color = "steelblue",
371
- sets.bar.color = "darkred",
372
- order.by = "freq",
373
- matrix.color = "gray",
374
- keep.order = TRUE)
375
-
376
- print(upset_plot)
377
- })
378
-
379
- output$pubmedPathwayPlotTitle <- renderPlot({
380
- txt <- "PubMed Pathway Enrichment"
381
-
382
- title_plot <- ggplot() +
383
- geom_textbox(
384
- aes(x = 0, y = 0, label = txt),
385
- size = 18 / .pt,
386
- width = unit(6, "inches")
387
- ) +
388
- theme_void()
389
-
390
- print(title_plot)
391
- })
392
-
393
- output$pubmedPathwayPlot <- renderPlot({
394
- results <- data.frame(Term = character(), Year = integer(), Count = integer(), stringsAsFactors = FALSE)
395
-
396
- terms <- tail(gse$Description, n = 10)
397
-
398
- results <- data.frame()
399
- titles_2024 <- data.frame()
400
-
401
- for (term in terms) {
402
- for (year in 2014:2024) {
403
- query <- paste(term, "[Title/Abstract] AND", year, "[PDAT]")
404
-
405
- # Count results for each term and year
406
- search_results <- entrez_search(db = "pubmed", term = query, retmax = 0)
407
- results <- rbind(results, data.frame(Term = term, Year = year, Count = search_results$count))
408
-
409
- # If the year is 2024, retrieve the first 10 article titles
410
- if (year == 2024) {
411
- search_results_2024 <- entrez_search(db = "pubmed", term = query, retmax = 10)
412
- if (search_results_2024$count > 0) {
413
- article_ids <- search_results_2024$ids
414
- articles <- entrez_fetch(db = "pubmed", id = article_ids, rettype = "abstract", retmode = "text")
415
- titles <- sapply(strsplit(articles, "\n"), function(x) x[1])
416
- titles_2024 <- reactive({rbind(titles_2024, data.frame(Term = term, Title = titles, stringsAsFactors = FALSE))})
417
- }
418
- }
419
- }
420
- }
421
-
422
- total_counts <- results %>%
423
- group_by(Year) %>%
424
- summarize(Total_Count = sum(Count), .groups = 'drop')
425
-
426
- results <- results %>%
427
- left_join(total_counts, by = "Year")
428
-
429
- results <- results %>%
430
- mutate(Ratio = Count / Total_Count)
431
-
432
- print(results)
433
-
434
- mainpubmed_plot <- ggplot(results, aes(x = Year, y = Ratio, color = Term)) +
435
- geom_line() +
436
- geom_point(size = 3, shape = 20, fill = "white", stroke = 1) + # Bolded dots
437
- scale_x_continuous(limits = c(2013, 2025), breaks = seq(2013, 2025, by = 2.5)) + # 2.5-year breaks
438
- labs(title = "Publication Ratio for Enriched Terms", x = "Year", y = "Publication Ratio") +
439
- theme_minimal()
440
-
441
- print(mainpubmed_plot)
442
- })
443
- })
444
-
445
- } else {
446
- showModal(modalDialog(
447
- title = "Access Denied",
448
- "Incorrect password. Please try again.",
449
- easyClose = TRUE,
450
- footer = NULL
451
- ))
452
- }
453
- })
454
- }
455
-
456
- shinyApp(ui = ui, server = server)
457
- ```
458
-
459
- ### Proteomics analysis
460
- Analyzes proteomics data using DESeq2 and GSEA. Visualizes using volcano plot, and other plots to show the GSEA analysis results. Please make sure the uploaded data is in xlsx format, has the first column with the gene names, and the second column has the expanded name of each gene and there is an even number of data columns. The control condition should be first, the mutant condition second.
461
- ```{r}
462
-
463
- ui <- fluidPage(
464
- titlePanel("Interactive Volcano Plot with Gene and GO Term Search"),
465
- useShinyjs(),
466
-
467
- passwordInput("password", "Enter Password:", value = "", placeholder = "Password"),
468
- actionButton("submit_password", "Submit"),
469
-
470
- uiOutput("main_ui"),
471
-
472
- #if you define a side_ui --> absolute panel, it would have to be defined here
473
- )
474
-
475
- # Server logic
476
- server <- function(input, output, session) {
477
- # Password handling
478
- correct_password <- "my_secret_password"
479
-
480
- observeEvent(input$submit_password, {
481
- if (input$password == correct_password) {
482
- showModal(modalDialog(
483
- title = "Access Granted",
484
- "Welcome! You can now search for genes and view the volcano plot.",
485
- easyClose = TRUE,
486
- footer = NULL
487
- ))
488
-
489
- # Hide the password input and button after validation
490
- shinyjs::hide("password")
491
- shinyjs::hide("submit_password")
492
-
493
- # Main UI appears after password is correct
494
- output$main_ui <- renderUI({
495
- sidebarLayout(
496
- sidebarPanel(
497
- fluidRow(
498
- fileInput("file", "Choose XLSX File", multiple = FALSE, accept = c(".xlsx", "text/xlsx")),
499
- actionButton("analyze_button", "Analyze"),
500
- tags$hr(),
501
- textInput("gene_search", "Search for a gene or keyword (separate multiple genes with ';'):", ""),
502
- actionButton("search_gene", "Search Gene"),
503
- tags$hr(),
504
- selectInput("GO_search", "Select a GO term:", choices = NULL),
505
- actionButton("search_GO_term", "Search GO Term"),
506
- tags$hr(),
507
- selectInput("description_search", "Search for the name of a pathway:", choices = NULL),
508
- actionButton("search_description", "Search Description"),
509
- tags$hr(),
510
- sliderInput("pvalue", "P-value: ",
511
- min = 0, max = 1,
512
- value = 0.01, step = 0.00001),
513
- tags$hr(),
514
- sliderInput("log2fc", "Log2FoldChange: ",
515
- min = 0.0001, max = 100,
516
- value = 2.5, step = 0.05),
517
- tags$hr(),
518
- actionButton("visualize_gse", "Visualize the GSEGO Results:")
519
- )
520
- ),
521
- mainPanel(
522
- plotlyOutput("volcanoPlot"),
523
- plotOutput("dotPlotTitle", width = "100%", height = "100px"),
524
- plotOutput("dotPlot", width = "100%", height = "1000px"),
525
- plotOutput("conceptNetworkTitle", width = "100%", height = "100px"),
526
- plotOutput("conceptNetwork", width = "100%", height = "600px"),
527
- plotOutput("heatMapTitle", width = "100%", height = "100px"),
528
- plotOutput("heatMap", width = "100%", height = "400px"),
529
- plotOutput("upsetPlotTitle", width = "100%", height = "100px"),
530
- plotOutput("upsetPlot", width = "100%", height = "1000px"),
531
- plotOutput("pubmedPathwayPlotTitle", width = "100%", height = "100px"),
532
- plotOutput("pubmedPathwayPlot", width = "100%", height = "1500px")
533
- )
534
- )
535
- })
536
-
537
- # Reactive values to store results and search criteria
538
- searchValues <- reactiveValues(
539
- gene_search = "",
540
- GO_search = "All",
541
- description_search = "",
542
- df_inverted = NULL # Store df_inverted here
543
- )
544
-
545
- # Process uploaded file and perform DESeq2 analysis
546
- observeEvent(input$analyze_button, {
547
- req(input$file) # Ensure a file is uploaded
548
-
549
- # Read in gene counts data
550
- genecounts <- tryCatch({
551
- read_excel(input$file$datapath, sheet = 1, col_names = TRUE)
552
- }, error = function(e) {
553
- showModal(modalDialog(title = "Error", "Could not read the Excel file.", easyClose = TRUE))
554
- return(NULL)
555
- })
556
-
557
- if (is.null(genecounts)) return(NULL) # Stop further processing if reading failed
558
-
559
- genecounts <- as.data.frame(genecounts)
560
- rownames(genecounts) <- genecounts[, 1]
561
- genecounts <- genecounts[, -1]
562
- descriptions <<- data.frame(Description = genecounts[, 1])
563
- rownames(descriptions) <- rownames(genecounts)
564
- genecounts <- genecounts[, -1]
565
- num_samples <- ncol(genecounts)
566
-
567
- num_samples <- ncol(genecounts)
568
-
569
- # Check if the number of samples is even
570
- if (num_samples %% 2 != 0) {
571
- showModal(modalDialog(
572
- title = "Error",
573
- "The number of samples must be even for proper grouping.",
574
- easyClose = TRUE
575
- ))
576
- return(NULL)
577
- }
578
-
579
- # Create the condition data frame
580
- condition <- data.frame(genotype = rep(c('C', 'R'), each = num_samples / 2), row.names = colnames(genecounts))
581
-
582
- # Create DESeq2 dataset
583
- dds <- DESeqDataSetFromMatrix(countData = genecounts, colData = condition, design = ~genotype)
584
- de <- DESeq(dds)
585
- res_reactive <- reactiveVal()
586
- res_reactive(results(de))
587
- res <<- results(de)
588
-
589
- # Create additional columns for plotting
590
- res$pvalue_log10 <- -log10(res$pvalue)
591
- pvalue_threshold <- 0.05
592
- fold_change_threshold <- 2
593
-
594
- res$significance <- ifelse(res$pvalue < pvalue_threshold, "Significant", "Not Significant")
595
- res$new_column <- rownames(res)
596
- res$diffexpressed <- ifelse(res$log2FoldChange > 0, "UP", ifelse(res$log2FoldChange < 0, "DOWN", "NO_CHANGE"))
597
-
598
- # Generate gene list for GSEA
599
- organism = "org.Hs.eg.db"
600
- original_gene_list <- res$log2FoldChange
601
- names(original_gene_list) <- res$new_column
602
- gene_list <<- na.omit(original_gene_list)
603
- gene_list = sort(gene_list, decreasing = TRUE)
604
-
605
- # Perform GO enrichment analysis
606
- gse <<- gseGO(geneList = gene_list,
607
- ont = "ALL",
608
- keyType = "SYMBOL",
609
- minGSSize = 3,
610
- maxGSSize = 800,
611
- pvalueCutoff = 0.05,
612
- verbose = TRUE,
613
- OrgDb = organism,
614
- pAdjustMethod = "none")
615
-
616
- # Store inverted results for GO terms in reactive values
617
- searchValues$df_inverted <- gse@result %>% separate_rows(core_enrichment, sep = "/")
618
-
619
- # Update GO term and description choices in UI
620
- updateSelectInput(session, "GO_search", choices = unique(searchValues$df_inverted$ID))
621
- updateSelectInput(session, "description_search", choices = unique(searchValues$df_inverted$Description))
622
-
623
- # Reactive filtering of results based on user input
624
- filteredRes <- reactive({
625
- data <- as.data.frame(res)
626
-
627
- # Apply gene search filter
628
- if (searchValues$gene_search != "") {
629
- genes <- strsplit(searchValues$gene_search, ";")[[1]]
630
- genes <- trimws(genes)
631
- data <- data %>%
632
- filter(rowSums(sapply(genes, function(gene) grepl(gene, new_column, ignore.case = TRUE))) > 0)
633
- }
634
-
635
- # Apply GO term filter
636
- if (searchValues$GO_search != "All") {
637
- selected_genes <- searchValues$df_inverted %>%
638
- filter(ID == searchValues$GO_search) %>%
639
- pull(core_enrichment)
640
- data <- data %>%
641
- filter(new_column %in% selected_genes)
642
- }
643
-
644
- # Apply description search filter
645
- if (searchValues$description_search != "") {
646
- selected_genes <- searchValues$df_inverted %>%
647
- filter(Description == searchValues$description_search) %>%
648
- pull(core_enrichment)
649
- data <- data %>%
650
- filter(new_column %in% selected_genes)
651
- }
652
-
653
- data
654
- })
655
-
656
- # Render volcano plot based on filtered results
657
- output$volcanoPlot <- renderPlotly({
658
- data_res <- filteredRes()
659
- p_value <- input$pvalue #need to add slides here
660
- log2fc <- input$log2fc #need to add slider here
661
-
662
- p <- ggplot(data_res, aes(x = log2FoldChange, y = pvalue_log10,
663
- text = paste("Gene:", new_column, "<br>Log2 Fold Change:", log2FoldChange,
664
- "<br>P-value:", pvalue, "<br>Significance:", significance,
665
- "<br>Differentially Expressed:", diffexpressed, "<br>-log10 Values:", pvalue_log10))) +
666
- geom_point(aes(color = log2FoldChange, shape = diffexpressed)) +
667
- geom_hline(yintercept = -log10(p_value), linetype = "dotted", color = "red") +
668
- geom_vline(xintercept = c(-log2fc, log2fc), linetype = "dotted", color = "darkblue") +
669
- xlim(-5, 5) +
670
- xlab("Log2 Fold Change") +
671
- ylab("-log10(P-value)") +
672
- ggtitle("Volcano Plot") +
673
- scale_color_gradient2(low = "green", mid = "pink", high = "blue", midpoint = 0,
674
- name = "Log2 Fold Change") # Add a custom color scale for the color legends
675
-
676
- ggplotly(p, tooltip = "text")
677
- })
678
- })
679
-
680
- # Update search criteria based on user actions
681
- observeEvent(input$search_gene, {
682
- searchValues$gene_search <- input$gene_search
683
- searchValues$GO_search <- "All"
684
- searchValues$description_search <- ""
685
- })
686
-
687
- observeEvent(input$search_GO_term, {
688
- searchValues$GO_search <- input$GO_search
689
- searchValues$gene_search <- ""
690
- searchValues$description_search <- ""
691
- })
692
-
693
- observeEvent(input$search_description, {
694
- searchValues$description_search <- input$description_search
695
- searchValues$GO_search <- "All"
696
- searchValues$gene_search <- ""
697
- })
698
-
699
- observeEvent(input$visualize_gse, {
700
- library(ggplot2)
701
- library(ggtext)
702
- library(gridExtra)
703
-
704
- output$dotPlotTitle <- renderPlot({
705
- txt <- "Dot Plot with 10 Pathways"
706
- title_plot <- ggplot() +
707
- geom_textbox(
708
- aes(x = 0, y = 0, label = txt),
709
- size = 18 / .pt,
710
- width = unit(6, "inches")
711
- ) +
712
- theme_void()
713
- print(title_plot)
714
- })
715
-
716
- output$dotPlot <- renderPlot({
717
- dot_plot <- dotplot(gse, showCategory = 10)
718
- print(dot_plot)
719
- })
720
-
721
- output$conceptNetworkTitle <- renderPlot({
722
- txt <- "Gene Concept Network"
723
-
724
- title_plot <- ggplot() +
725
- geom_textbox(
726
- aes(x = 0, y = 0, label = txt),
727
- size = 18 / .pt,
728
- width = unit(6, "inches")
729
- ) +
730
- theme_void()
731
-
732
- print(title_plot)
733
- })
734
-
735
- output$conceptNetwork <- renderPlot({
736
- gsex <- setReadable(gse, 'org.Hs.eg.db', 'ENTREZID')
737
- geneList <- gse@geneList
738
-
739
- p1 <- cnetplot(gsex, foldChange = geneList, max.overlaps = 100)
740
- p2 <- cnetplot(gsex, categorySize = "pvalue", foldChange = geneList, max.overlaps = 100)
741
- p3 <- cnetplot(gsex, foldChange = geneList, circular = TRUE, colorEdge = TRUE, max.overlaps = 100)
742
-
743
- maingene_plot <- cowplot::plot_grid(p1, p2, p3, ncol = 3, labels = LETTERS[1:3], rel_widths = c(.8, .8, 1.2))
744
- print(maingene_plot)
745
-
746
- })
747
-
748
- output$heatMapTitle <- renderPlot({
749
- txt <- "Heatmap-Like Functional Classification"
750
-
751
- title_plot <- ggplot() +
752
- geom_textbox(
753
- aes(x = 0, y = 0, label = txt),
754
- size = 18 / .pt,
755
- width = unit(6, "inches")
756
- ) +
757
- theme_void()
758
-
759
- print(title_plot)
760
- })
761
-
762
- output$heatMap <- renderPlot({
763
-
764
- gsex <- setReadable(gse, 'org.Hs.eg.db', 'ENTREZID')
765
- geneList <- gse@geneList
766
-
767
- p1 <- heatplot(gsex, showCategory=5)
768
- p2 <- heatplot(gsex, foldChange=geneList, showCategory=5)
769
- mainheatmap_plot <- cowplot::plot_grid(p1, p2, ncol=1, labels=LETTERS[1:2])
770
-
771
- print(mainheatmap_plot)
772
- })
773
-
774
- output$upsetPlotTitle <- renderPlot({
775
- txt <- "UpSet Plot"
776
-
777
- title_plot <- ggplot() +
778
- geom_textbox(
779
- aes(x = 0, y = 0, label = txt),
780
- size = 18 / .pt,
781
- width = unit(6, "inches")
782
- ) +
783
- theme_void()
784
-
785
- print(title_plot)
786
- })
787
-
788
- output$upsetPlot <- renderPlot({
789
- gse_df <- gse@result
790
- top_terms <- gse_df %>% arrange(pvalue) %>% head(10)
791
- top_gene_sets <- strsplit(top_terms$core_enrichment, "/")
792
- gene_sets_list <- lapply(top_gene_sets, function(x) unique(trimws(x)))
793
-
794
- gene_sets_df <- fromList(setNames(gene_sets_list, top_terms$ID))
795
-
796
- # Create the UpSet plot
797
- upset_plot <- upset(gene_sets_df,
798
- sets = names(gene_sets_df),
799
- main.bar.color = "steelblue",
800
- sets.bar.color = "darkred",
801
- order.by = "freq",
802
- matrix.color = "gray",
803
- keep.order = TRUE)
804
-
805
- print(upset_plot)
806
- })
807
-
808
- output$pubmedPathwayPlotTitle <- renderPlot({
809
- txt <- "PubMed Pathway Enrichment"
810
-
811
- title_plot <- ggplot() +
812
- geom_textbox(
813
- aes(x = 0, y = 0, label = txt),
814
- size = 18 / .pt,
815
- width = unit(6, "inches")
816
- ) +
817
- theme_void()
818
-
819
- print(title_plot)
820
- })
821
-
822
- output$pubmedPathwayPlot <- renderPlot({
823
- results <- data.frame(Term = character(), Year = integer(), Count = integer(), stringsAsFactors = FALSE)
824
-
825
- terms <- tail(gse$Description, n = 10)
826
-
827
- results <- data.frame()
828
- titles_2024 <- data.frame()
829
-
830
- for (term in terms) {
831
- for (year in 2014:2024) {
832
- query <- paste(term, "[Title/Abstract] AND", year, "[PDAT]")
833
-
834
- # Count results for each term and year
835
- search_results <- entrez_search(db = "pubmed", term = query, retmax = 0)
836
- results <- rbind(results, data.frame(Term = term, Year = year, Count = search_results$count))
837
-
838
- # If the year is 2024, retrieve the first 10 article titles
839
- if (year == 2024) {
840
- search_results_2024 <- entrez_search(db = "pubmed", term = query, retmax = 10)
841
- if (search_results_2024$count > 0) {
842
- article_ids <- search_results_2024$ids
843
- articles <- entrez_fetch(db = "pubmed", id = article_ids, rettype = "abstract", retmode = "text")
844
- titles <- sapply(strsplit(articles, "\n"), function(x) x[1])
845
- titles_2024 <- reactive({rbind(titles_2024, data.frame(Term = term, Title = titles, stringsAsFactors = FALSE))})
846
- }
847
- }
848
- }
849
- }
850
-
851
- total_counts <- results %>%
852
- group_by(Year) %>%
853
- summarize(Total_Count = sum(Count), .groups = 'drop')
854
-
855
- results <- results %>%
856
- left_join(total_counts, by = "Year")
857
-
858
- results <- results %>%
859
- mutate(Ratio = Count / Total_Count)
860
-
861
- print(results)
862
-
863
- mainpubmed_plot <- ggplot(results, aes(x = Year, y = Ratio, color = Term)) +
864
- geom_line() +
865
- geom_point(size = 3, shape = 20, fill = "white", stroke = 1) + # Bolded dots
866
- scale_x_continuous(limits = c(2013, 2025), breaks = seq(2013, 2025, by = 2.5)) + # 2.5-year breaks
867
- labs(title = "Publication Ratio for Enriched Terms", x = "Year", y = "Publication Ratio") +
868
- theme_minimal()
869
-
870
- print(mainpubmed_plot)
871
- })
872
- })
873
-
874
- } else {
875
- showModal(modalDialog(
876
- title = "Access Denied",
877
- "Incorrect password. Please try again.",
878
- easyClose = TRUE,
879
- footer = NULL
880
- ))
881
- }
882
- })
883
- }
884
-
885
- shinyApp(ui = ui, server = server)
 
 
 
 
 
 
886
  ```
 
1
+ ---
2
+ title: "Bioinformatics Dashboard v0.0 🧬 🦠 🧫"
3
+ output:
4
+ flexdashboard::flex_dashboard:
5
+ orientation: columns
6
+ vertical_layout: fill
7
+ runtime: shiny
8
+ ---
9
+
10
+ ```{r setup, include=FALSE}
11
+ if (!requireNamespace("pacman", quietly = TRUE)) {
12
+ install.packages("pacman")
13
+ }
14
+
15
+ pacman::p_load(flexdashboard, shiny, ggplot2, plotly, clusterProfiler, readxl, tidyverse, DESeq2, biomaRt, tidyr, shinyjs, rentrez, dplyr, ggtext, cowplot, UpSetR)
16
+
17
+ library(flexdashboard)
18
+ library(shiny)
19
+ library(ggplot2)
20
+ library(plotly)
21
+ library(clusterProfiler)
22
+ library(readxl)
23
+ library(tidyverse)
24
+ library(DESeq2)
25
+ library(biomaRt)
26
+ library(tidyr)
27
+ library(shinyjs)
28
+ library(rentrez)
29
+ library(dplyr)
30
+ library(ggtext)
31
+ library(cowplot)
32
+ library(UpSetR)
33
+ ```
34
+
35
+ Column {.tabset}
36
+ -----------------------------------------------------------------------
37
+
38
+ ### RNAseq analysis
39
+ Analyzes RNAseq data using DESeq2 and GSEA. Visualizes using volcano plot, and other plots to show the GSEA analysis results. Please make sure the uploaded data is in xlsx format, has the first column with the gene names, and there is an even number of data columns. The control condition should be first, the mutant condition second.
40
+ ```{r}
41
+
42
+ ui <- fluidPage(
43
+ titlePanel("Interactive Volcano Plot with Gene and GO Term Search"),
44
+ useShinyjs(),
45
+
46
+ passwordInput("password", "Enter Password:", value = "", placeholder = "Password"),
47
+ actionButton("submit_password", "Submit"),
48
+
49
+ uiOutput("main_ui"),
50
+
51
+ #if you define a side_ui --> absolute panel, it would have to be defined here
52
+ )
53
+
54
+ # Server logic
55
+ server <- function(input, output, session) {
56
+ # Password handling
57
+ correct_password <- "my_secret_password"
58
+
59
+ observeEvent(input$submit_password, {
60
+ if (input$password == correct_password) {
61
+ showModal(modalDialog(
62
+ title = "Access Granted",
63
+ "Welcome! You can now search for genes and view the volcano plot.",
64
+ easyClose = TRUE,
65
+ footer = NULL
66
+ ))
67
+
68
+ # Hide the password input and button after validation
69
+ shinyjs::hide("password")
70
+ shinyjs::hide("submit_password")
71
+
72
+ # Main UI appears after password is correct
73
+ output$main_ui <- renderUI({
74
+ sidebarLayout(
75
+ sidebarPanel(
76
+ fluidRow(
77
+ fileInput("file", "Choose XLSX File", multiple = FALSE, accept = c(".xlsx", "text/xlsx")),
78
+ actionButton("analyze_button", "Analyze"),
79
+ tags$hr(),
80
+ textInput("gene_search", "Search for a gene or keyword (separate multiple genes with ';'):", ""),
81
+ actionButton("search_gene", "Search Gene"),
82
+ tags$hr(),
83
+ selectInput("GO_search", "Select a GO term:", choices = NULL),
84
+ actionButton("search_GO_term", "Search GO Term"),
85
+ tags$hr(),
86
+ selectInput("description_search", "Search for the name of a pathway:", choices = NULL),
87
+ actionButton("search_description", "Search Description"),
88
+ tags$hr(),
89
+ sliderInput("pvalue", "P-value: ",
90
+ min = 0, max = 1,
91
+ value = 0.01, step = 0.00001),
92
+ tags$hr(),
93
+ sliderInput("log2fc", "Log2FoldChange: ",
94
+ min = 0.0001, max = 100,
95
+ value = 2.5, step = 0.05),
96
+ tags$hr(),
97
+ actionButton("visualize_gse", "Visualize the GSEGO Results:")
98
+ )
99
+ ),
100
+ mainPanel(
101
+ plotlyOutput("volcanoPlot"),
102
+ plotOutput("dotPlotTitle", width = "100%", height = "100px"),
103
+ plotOutput("dotPlot", width = "100%", height = "1000px"),
104
+ plotOutput("conceptNetworkTitle", width = "100%", height = "100px"),
105
+ plotOutput("conceptNetwork", width = "100%", height = "600px"),
106
+ plotOutput("heatMapTitle", width = "100%", height = "100px"),
107
+ plotOutput("heatMap", width = "100%", height = "400px"),
108
+ plotOutput("upsetPlotTitle", width = "100%", height = "100px"),
109
+ plotOutput("upsetPlot", width = "100%", height = "1000px"),
110
+ plotOutput("pubmedPathwayPlotTitle", width = "100%", height = "100px"),
111
+ plotOutput("pubmedPathwayPlot", width = "100%", height = "1500px")
112
+ )
113
+ )
114
+ })
115
+
116
+ # Reactive values to store results and search criteria
117
+ searchValues <- reactiveValues(
118
+ gene_search = "",
119
+ GO_search = "All",
120
+ description_search = "",
121
+ df_inverted = NULL # Store df_inverted here
122
+ )
123
+
124
+ # Process uploaded file and perform DESeq2 analysis
125
+ observeEvent(input$analyze_button, {
126
+ req(input$file) # Ensure a file is uploaded
127
+
128
+ # Read in gene counts data
129
+ genecounts <- tryCatch({
130
+ read_excel(input$file$datapath, sheet = 1, col_names = TRUE)
131
+ }, error = function(e) {
132
+ showModal(modalDialog(title = "Error", "Could not read the Excel file.", easyClose = TRUE))
133
+ return(NULL)
134
+ })
135
+
136
+ if (is.null(genecounts)) return(NULL) # Stop further processing if reading failed
137
+
138
+ genecounts <- as.data.frame(genecounts)
139
+ rownames(genecounts) <- genecounts[, 1]
140
+ genecounts$Gene_Name <- NULL
141
+ genecounts <- genecounts[, -1]
142
+
143
+ num_samples <- ncol(genecounts)
144
+
145
+ # Check if the number of samples is even
146
+ if (num_samples %% 2 != 0) {
147
+ showModal(modalDialog(
148
+ title = "Error",
149
+ "The number of samples must be even for proper grouping.",
150
+ easyClose = TRUE
151
+ ))
152
+ return(NULL)
153
+ }
154
+
155
+ # Create the condition data frame
156
+ condition <- data.frame(genotype = rep(c('C', 'R'), each = num_samples / 2), row.names = colnames(genecounts))
157
+
158
+ # Create DESeq2 dataset
159
+ dds <- DESeqDataSetFromMatrix(countData = genecounts, colData = condition, design = ~genotype)
160
+ de <- DESeq(dds)
161
+ res_reactive <- reactiveVal()
162
+ res_reactive(results(de))
163
+ res <<- results(de)
164
+
165
+ # Create additional columns for plotting
166
+ res$pvalue_log10 <- -log10(res$pvalue)
167
+ pvalue_threshold <- 0.05
168
+ fold_change_threshold <- 2
169
+
170
+ res$significance <- ifelse(res$pvalue < pvalue_threshold, "Significant", "Not Significant")
171
+ res$new_column <- rownames(res)
172
+ res$diffexpressed <- ifelse(res$log2FoldChange > 0, "UP", ifelse(res$log2FoldChange < 0, "DOWN", "NO_CHANGE"))
173
+
174
+ # Generate gene list for GSEA
175
+ organism = "org.Hs.eg.db"
176
+ original_gene_list <- res$log2FoldChange
177
+ names(original_gene_list) <- res$new_column
178
+ gene_list <<- na.omit(original_gene_list)
179
+ gene_list = sort(gene_list, decreasing = TRUE)
180
+
181
+ # Perform GO enrichment analysis
182
+ gse <<- gseGO(geneList = gene_list,
183
+ ont = "ALL",
184
+ keyType = "SYMBOL",
185
+ minGSSize = 3,
186
+ maxGSSize = 800,
187
+ pvalueCutoff = 0.05,
188
+ verbose = TRUE,
189
+ OrgDb = organism,
190
+ pAdjustMethod = "none")
191
+
192
+ # Store inverted results for GO terms in reactive values
193
+ searchValues$df_inverted <- gse@result %>% separate_rows(core_enrichment, sep = "/")
194
+
195
+ # Update GO term and description choices in UI
196
+ updateSelectInput(session, "GO_search", choices = unique(searchValues$df_inverted$ID))
197
+ updateSelectInput(session, "description_search", choices = unique(searchValues$df_inverted$Description))
198
+
199
+ # Reactive filtering of results based on user input
200
+ filteredRes <- reactive({
201
+ data <- as.data.frame(res)
202
+
203
+ # Apply gene search filter
204
+ if (searchValues$gene_search != "") {
205
+ genes <- strsplit(searchValues$gene_search, ";")[[1]]
206
+ genes <- trimws(genes)
207
+ data <- data %>%
208
+ filter(rowSums(sapply(genes, function(gene) grepl(gene, new_column, ignore.case = TRUE))) > 0)
209
+ }
210
+
211
+ # Apply GO term filter
212
+ if (searchValues$GO_search != "All") {
213
+ selected_genes <- searchValues$df_inverted %>%
214
+ filter(ID == searchValues$GO_search) %>%
215
+ pull(core_enrichment)
216
+ data <- data %>%
217
+ filter(new_column %in% selected_genes)
218
+ }
219
+
220
+ # Apply description search filter
221
+ if (searchValues$description_search != "") {
222
+ selected_genes <- searchValues$df_inverted %>%
223
+ filter(Description == searchValues$description_search) %>%
224
+ pull(core_enrichment)
225
+ data <- data %>%
226
+ filter(new_column %in% selected_genes)
227
+ }
228
+
229
+ data
230
+ })
231
+
232
+ # Render volcano plot based on filtered results
233
+ output$volcanoPlot <- renderPlotly({
234
+ data_res <- filteredRes()
235
+ p_value <- input$pvalue #need to add slides here
236
+ log2fc <- input$log2fc #need to add slider here
237
+
238
+ p <- ggplot(data_res, aes(x = log2FoldChange, y = pvalue_log10,
239
+ text = paste("Gene:", new_column, "<br>Log2 Fold Change:", log2FoldChange,
240
+ "<br>P-value:", pvalue, "<br>Significance:", significance,
241
+ "<br>Differentially Expressed:", diffexpressed, "<br>-log10 Values:", pvalue_log10))) +
242
+ geom_point(aes(color = log2FoldChange, shape = diffexpressed)) +
243
+ geom_hline(yintercept = -log10(p_value), linetype = "dotted", color = "red") +
244
+ geom_vline(xintercept = c(-log2fc, log2fc), linetype = "dotted", color = "darkblue") +
245
+ xlim(-5, 5) +
246
+ xlab("Log2 Fold Change") +
247
+ ylab("-log10(P-value)") +
248
+ ggtitle("Volcano Plot") +
249
+ scale_color_gradient2(low = "green", mid = "pink", high = "blue", midpoint = 0,
250
+ name = "Log2 Fold Change") # Add a custom color scale for the color legends
251
+
252
+ ggplotly(p, tooltip = "text")
253
+ })
254
+ })
255
+
256
+ # Update search criteria based on user actions
257
+ observeEvent(input$search_gene, {
258
+ searchValues$gene_search <- input$gene_search
259
+ searchValues$GO_search <- "All"
260
+ searchValues$description_search <- ""
261
+ })
262
+
263
+ observeEvent(input$search_GO_term, {
264
+ searchValues$GO_search <- input$GO_search
265
+ searchValues$gene_search <- ""
266
+ searchValues$description_search <- ""
267
+ })
268
+
269
+ observeEvent(input$search_description, {
270
+ searchValues$description_search <- input$description_search
271
+ searchValues$GO_search <- "All"
272
+ searchValues$gene_search <- ""
273
+ })
274
+
275
+ observeEvent(input$visualize_gse, {
276
+ library(ggplot2)
277
+ library(ggtext)
278
+ library(gridExtra)
279
+
280
+ output$dotPlotTitle <- renderPlot({
281
+ txt <- "Dot Plot with 10 Pathways"
282
+ title_plot <- ggplot() +
283
+ geom_textbox(
284
+ aes(x = 0, y = 0, label = txt),
285
+ size = 18 / .pt,
286
+ width = unit(6, "inches")
287
+ ) +
288
+ theme_void()
289
+ print(title_plot)
290
+ })
291
+
292
+ output$dotPlot <- renderPlot({
293
+ dot_plot <- dotplot(gse, showCategory = 10)
294
+ print(dot_plot)
295
+
296
+ })
297
+
298
+ output$conceptNetworkTitle <- renderPlot({
299
+ txt <- "Gene Concept Network"
300
+
301
+ title_plot <- ggplot() +
302
+ geom_textbox(
303
+ aes(x = 0, y = 0, label = txt),
304
+ size = 18 / .pt,
305
+ width = unit(6, "inches")
306
+ ) +
307
+ theme_void()
308
+
309
+ print(title_plot)
310
+ })
311
+
312
+ output$conceptNetwork <- renderPlot({
313
+ gsex <- setReadable(gse, 'org.Hs.eg.db', 'ENTREZID')
314
+ geneList <- gse@geneList
315
+
316
+ p1 <- cnetplot(gsex, foldChange = geneList, max.overlaps = 100)
317
+ p2 <- cnetplot(gsex, categorySize = "pvalue", foldChange = geneList, max.overlaps = 100)
318
+ p3 <- cnetplot(gsex, foldChange = geneList, circular = TRUE, colorEdge = TRUE, max.overlaps = 100)
319
+
320
+ maingene_plot <- cowplot::plot_grid(p1, p2, p3, ncol = 3, labels = LETTERS[1:3], rel_widths = c(.8, .8, 1.2))
321
+ print(maingene_plot)
322
+
323
+ })
324
+
325
+ output$heatMapTitle <- renderPlot({
326
+ txt <- "Heatmap-Like Functional Classification"
327
+
328
+ title_plot <- ggplot() +
329
+ geom_textbox(
330
+ aes(x = 0, y = 0, label = txt),
331
+ size = 18 / .pt,
332
+ width = unit(6, "inches")
333
+ ) +
334
+ theme_void()
335
+
336
+ print(title_plot)
337
+ })
338
+
339
+ output$heatMap <- renderPlot({
340
+
341
+ gsex <- setReadable(gse, 'org.Hs.eg.db', 'ENTREZID')
342
+ geneList <- gse@geneList
343
+
344
+ p1 <- heatplot(gsex, showCategory=5)
345
+ p2 <- heatplot(gsex, foldChange=geneList, showCategory=5)
346
+ mainheatmap_plot <- cowplot::plot_grid(p1, p2, ncol=1, labels=LETTERS[1:2])
347
+
348
+ print(mainheatmap_plot)
349
+ })
350
+
351
+ output$upsetPlotTitle <- renderPlot({
352
+ txt <- "UpSet Plot"
353
+
354
+ title_plot <- ggplot() +
355
+ geom_textbox(
356
+ aes(x = 0, y = 0, label = txt),
357
+ size = 18 / .pt,
358
+ width = unit(6, "inches")
359
+ ) +
360
+ theme_void()
361
+
362
+ print(title_plot)
363
+ })
364
+
365
+ output$upsetPlot <- renderPlot({
366
+ gse_df <- gse@result
367
+ top_terms <- gse_df %>% arrange(pvalue) %>% head(10)
368
+ top_gene_sets <- strsplit(top_terms$core_enrichment, "/")
369
+ gene_sets_list <- lapply(top_gene_sets, function(x) unique(trimws(x)))
370
+
371
+ gene_sets_df <- fromList(setNames(gene_sets_list, top_terms$ID))
372
+
373
+ # Create the UpSet plot
374
+ upset_plot <- upset(gene_sets_df,
375
+ sets = names(gene_sets_df),
376
+ main.bar.color = "steelblue",
377
+ sets.bar.color = "darkred",
378
+ order.by = "freq",
379
+ matrix.color = "gray",
380
+ keep.order = TRUE)
381
+
382
+ print(upset_plot)
383
+ })
384
+
385
+ output$pubmedPathwayPlotTitle <- renderPlot({
386
+ txt <- "PubMed Pathway Enrichment"
387
+
388
+ title_plot <- ggplot() +
389
+ geom_textbox(
390
+ aes(x = 0, y = 0, label = txt),
391
+ size = 18 / .pt,
392
+ width = unit(6, "inches")
393
+ ) +
394
+ theme_void()
395
+
396
+ print(title_plot)
397
+ })
398
+
399
+ output$pubmedPathwayPlot <- renderPlot({
400
+ results <- data.frame(Term = character(), Year = integer(), Count = integer(), stringsAsFactors = FALSE)
401
+
402
+ terms <- tail(gse$Description, n = 10)
403
+
404
+ results <- data.frame()
405
+ titles_2024 <- data.frame()
406
+
407
+ for (term in terms) {
408
+ for (year in 2014:2024) {
409
+ query <- paste(term, "[Title/Abstract] AND", year, "[PDAT]")
410
+
411
+ # Count results for each term and year
412
+ search_results <- entrez_search(db = "pubmed", term = query, retmax = 0)
413
+ results <- rbind(results, data.frame(Term = term, Year = year, Count = search_results$count))
414
+
415
+ # If the year is 2024, retrieve the first 10 article titles
416
+ if (year == 2024) {
417
+ search_results_2024 <- entrez_search(db = "pubmed", term = query, retmax = 10)
418
+ if (search_results_2024$count > 0) {
419
+ article_ids <- search_results_2024$ids
420
+ articles <- entrez_fetch(db = "pubmed", id = article_ids, rettype = "abstract", retmode = "text")
421
+ titles <- sapply(strsplit(articles, "\n"), function(x) x[1])
422
+ titles_2024 <- reactive({rbind(titles_2024, data.frame(Term = term, Title = titles, stringsAsFactors = FALSE))})
423
+ }
424
+ }
425
+ }
426
+ }
427
+
428
+ total_counts <- results %>%
429
+ group_by(Year) %>%
430
+ summarize(Total_Count = sum(Count), .groups = 'drop')
431
+
432
+ results <- results %>%
433
+ left_join(total_counts, by = "Year")
434
+
435
+ results <- results %>%
436
+ mutate(Ratio = Count / Total_Count)
437
+
438
+ print(results)
439
+
440
+ mainpubmed_plot <- ggplot(results, aes(x = Year, y = Ratio, color = Term)) +
441
+ geom_line() +
442
+ geom_point(size = 3, shape = 20, fill = "white", stroke = 1) + # Bolded dots
443
+ scale_x_continuous(limits = c(2013, 2025), breaks = seq(2013, 2025, by = 2.5)) + # 2.5-year breaks
444
+ labs(title = "Publication Ratio for Enriched Terms", x = "Year", y = "Publication Ratio") +
445
+ theme_minimal()
446
+
447
+ print(mainpubmed_plot)
448
+ })
449
+ })
450
+
451
+ } else {
452
+ showModal(modalDialog(
453
+ title = "Access Denied",
454
+ "Incorrect password. Please try again.",
455
+ easyClose = TRUE,
456
+ footer = NULL
457
+ ))
458
+ }
459
+ })
460
+ }
461
+
462
+ shinyApp(ui = ui, server = server)
463
+ ```
464
+
465
+ ### Proteomics analysis
466
+ Analyzes proteomics data using DESeq2 and GSEA. Visualizes using volcano plot, and other plots to show the GSEA analysis results. Please make sure the uploaded data is in xlsx format, has the first column with the gene names, and the second column has the expanded name of each gene and there is an even number of data columns. The control condition should be first, the mutant condition second.
467
+ ```{r}
468
+
469
+ ui <- fluidPage(
470
+ titlePanel("Interactive Volcano Plot with Gene and GO Term Search"),
471
+ useShinyjs(),
472
+
473
+ passwordInput("password", "Enter Password:", value = "", placeholder = "Password"),
474
+ actionButton("submit_password", "Submit"),
475
+
476
+ uiOutput("main_ui"),
477
+
478
+ #if you define a side_ui --> absolute panel, it would have to be defined here
479
+ )
480
+
481
+ # Server logic
482
+ server <- function(input, output, session) {
483
+ # Password handling
484
+ correct_password <- "my_secret_password"
485
+
486
+ observeEvent(input$submit_password, {
487
+ if (input$password == correct_password) {
488
+ showModal(modalDialog(
489
+ title = "Access Granted",
490
+ "Welcome! You can now search for genes and view the volcano plot.",
491
+ easyClose = TRUE,
492
+ footer = NULL
493
+ ))
494
+
495
+ # Hide the password input and button after validation
496
+ shinyjs::hide("password")
497
+ shinyjs::hide("submit_password")
498
+
499
+ # Main UI appears after password is correct
500
+ output$main_ui <- renderUI({
501
+ sidebarLayout(
502
+ sidebarPanel(
503
+ fluidRow(
504
+ fileInput("file", "Choose XLSX File", multiple = FALSE, accept = c(".xlsx", "text/xlsx")),
505
+ actionButton("analyze_button", "Analyze"),
506
+ tags$hr(),
507
+ textInput("gene_search", "Search for a gene or keyword (separate multiple genes with ';'):", ""),
508
+ actionButton("search_gene", "Search Gene"),
509
+ tags$hr(),
510
+ selectInput("GO_search", "Select a GO term:", choices = NULL),
511
+ actionButton("search_GO_term", "Search GO Term"),
512
+ tags$hr(),
513
+ selectInput("description_search", "Search for the name of a pathway:", choices = NULL),
514
+ actionButton("search_description", "Search Description"),
515
+ tags$hr(),
516
+ sliderInput("pvalue", "P-value: ",
517
+ min = 0, max = 1,
518
+ value = 0.01, step = 0.00001),
519
+ tags$hr(),
520
+ sliderInput("log2fc", "Log2FoldChange: ",
521
+ min = 0.0001, max = 100,
522
+ value = 2.5, step = 0.05),
523
+ tags$hr(),
524
+ actionButton("visualize_gse", "Visualize the GSEGO Results:")
525
+ )
526
+ ),
527
+ mainPanel(
528
+ plotlyOutput("volcanoPlot"),
529
+ plotOutput("dotPlotTitle", width = "100%", height = "100px"),
530
+ plotOutput("dotPlot", width = "100%", height = "1000px"),
531
+ plotOutput("conceptNetworkTitle", width = "100%", height = "100px"),
532
+ plotOutput("conceptNetwork", width = "100%", height = "600px"),
533
+ plotOutput("heatMapTitle", width = "100%", height = "100px"),
534
+ plotOutput("heatMap", width = "100%", height = "400px"),
535
+ plotOutput("upsetPlotTitle", width = "100%", height = "100px"),
536
+ plotOutput("upsetPlot", width = "100%", height = "1000px"),
537
+ plotOutput("pubmedPathwayPlotTitle", width = "100%", height = "100px"),
538
+ plotOutput("pubmedPathwayPlot", width = "100%", height = "1500px")
539
+ )
540
+ )
541
+ })
542
+
543
+ # Reactive values to store results and search criteria
544
+ searchValues <- reactiveValues(
545
+ gene_search = "",
546
+ GO_search = "All",
547
+ description_search = "",
548
+ df_inverted = NULL # Store df_inverted here
549
+ )
550
+
551
+ # Process uploaded file and perform DESeq2 analysis
552
+ observeEvent(input$analyze_button, {
553
+ req(input$file) # Ensure a file is uploaded
554
+
555
+ # Read in gene counts data
556
+ genecounts <- tryCatch({
557
+ read_excel(input$file$datapath, sheet = 1, col_names = TRUE)
558
+ }, error = function(e) {
559
+ showModal(modalDialog(title = "Error", "Could not read the Excel file.", easyClose = TRUE))
560
+ return(NULL)
561
+ })
562
+
563
+ if (is.null(genecounts)) return(NULL) # Stop further processing if reading failed
564
+
565
+ genecounts <- as.data.frame(genecounts)
566
+ rownames(genecounts) <- genecounts[, 1]
567
+ genecounts <- genecounts[, -1]
568
+ descriptions <<- data.frame(Description = genecounts[, 1])
569
+ rownames(descriptions) <- rownames(genecounts)
570
+ genecounts <- genecounts[, -1]
571
+ num_samples <- ncol(genecounts)
572
+
573
+ num_samples <- ncol(genecounts)
574
+
575
+ # Check if the number of samples is even
576
+ if (num_samples %% 2 != 0) {
577
+ showModal(modalDialog(
578
+ title = "Error",
579
+ "The number of samples must be even for proper grouping.",
580
+ easyClose = TRUE
581
+ ))
582
+ return(NULL)
583
+ }
584
+
585
+ # Create the condition data frame
586
+ condition <- data.frame(genotype = rep(c('C', 'R'), each = num_samples / 2), row.names = colnames(genecounts))
587
+
588
+ # Create DESeq2 dataset
589
+ dds <- DESeqDataSetFromMatrix(countData = genecounts, colData = condition, design = ~genotype)
590
+ de <- DESeq(dds)
591
+ res_reactive <- reactiveVal()
592
+ res_reactive(results(de))
593
+ res <<- results(de)
594
+
595
+ # Create additional columns for plotting
596
+ res$pvalue_log10 <- -log10(res$pvalue)
597
+ pvalue_threshold <- 0.05
598
+ fold_change_threshold <- 2
599
+
600
+ res$significance <- ifelse(res$pvalue < pvalue_threshold, "Significant", "Not Significant")
601
+ res$new_column <- rownames(res)
602
+ res$diffexpressed <- ifelse(res$log2FoldChange > 0, "UP", ifelse(res$log2FoldChange < 0, "DOWN", "NO_CHANGE"))
603
+
604
+ # Generate gene list for GSEA
605
+ organism = "org.Hs.eg.db"
606
+ original_gene_list <- res$log2FoldChange
607
+ names(original_gene_list) <- res$new_column
608
+ gene_list <<- na.omit(original_gene_list)
609
+ gene_list = sort(gene_list, decreasing = TRUE)
610
+
611
+ # Perform GO enrichment analysis
612
+ gse <<- gseGO(geneList = gene_list,
613
+ ont = "ALL",
614
+ keyType = "SYMBOL",
615
+ minGSSize = 3,
616
+ maxGSSize = 800,
617
+ pvalueCutoff = 0.05,
618
+ verbose = TRUE,
619
+ OrgDb = organism,
620
+ pAdjustMethod = "none")
621
+
622
+ # Store inverted results for GO terms in reactive values
623
+ searchValues$df_inverted <- gse@result %>% separate_rows(core_enrichment, sep = "/")
624
+
625
+ # Update GO term and description choices in UI
626
+ updateSelectInput(session, "GO_search", choices = unique(searchValues$df_inverted$ID))
627
+ updateSelectInput(session, "description_search", choices = unique(searchValues$df_inverted$Description))
628
+
629
+ # Reactive filtering of results based on user input
630
+ filteredRes <- reactive({
631
+ data <- as.data.frame(res)
632
+
633
+ # Apply gene search filter
634
+ if (searchValues$gene_search != "") {
635
+ genes <- strsplit(searchValues$gene_search, ";")[[1]]
636
+ genes <- trimws(genes)
637
+ data <- data %>%
638
+ filter(rowSums(sapply(genes, function(gene) grepl(gene, new_column, ignore.case = TRUE))) > 0)
639
+ }
640
+
641
+ # Apply GO term filter
642
+ if (searchValues$GO_search != "All") {
643
+ selected_genes <- searchValues$df_inverted %>%
644
+ filter(ID == searchValues$GO_search) %>%
645
+ pull(core_enrichment)
646
+ data <- data %>%
647
+ filter(new_column %in% selected_genes)
648
+ }
649
+
650
+ # Apply description search filter
651
+ if (searchValues$description_search != "") {
652
+ selected_genes <- searchValues$df_inverted %>%
653
+ filter(Description == searchValues$description_search) %>%
654
+ pull(core_enrichment)
655
+ data <- data %>%
656
+ filter(new_column %in% selected_genes)
657
+ }
658
+
659
+ data
660
+ })
661
+
662
+ # Render volcano plot based on filtered results
663
+ output$volcanoPlot <- renderPlotly({
664
+ data_res <- filteredRes()
665
+ p_value <- input$pvalue #need to add slides here
666
+ log2fc <- input$log2fc #need to add slider here
667
+
668
+ p <- ggplot(data_res, aes(x = log2FoldChange, y = pvalue_log10,
669
+ text = paste("Gene:", new_column, "<br>Log2 Fold Change:", log2FoldChange,
670
+ "<br>P-value:", pvalue, "<br>Significance:", significance,
671
+ "<br>Differentially Expressed:", diffexpressed, "<br>-log10 Values:", pvalue_log10))) +
672
+ geom_point(aes(color = log2FoldChange, shape = diffexpressed)) +
673
+ geom_hline(yintercept = -log10(p_value), linetype = "dotted", color = "red") +
674
+ geom_vline(xintercept = c(-log2fc, log2fc), linetype = "dotted", color = "darkblue") +
675
+ xlim(-5, 5) +
676
+ xlab("Log2 Fold Change") +
677
+ ylab("-log10(P-value)") +
678
+ ggtitle("Volcano Plot") +
679
+ scale_color_gradient2(low = "green", mid = "pink", high = "blue", midpoint = 0,
680
+ name = "Log2 Fold Change") # Add a custom color scale for the color legends
681
+
682
+ ggplotly(p, tooltip = "text")
683
+ })
684
+ })
685
+
686
+ # Update search criteria based on user actions
687
+ observeEvent(input$search_gene, {
688
+ searchValues$gene_search <- input$gene_search
689
+ searchValues$GO_search <- "All"
690
+ searchValues$description_search <- ""
691
+ })
692
+
693
+ observeEvent(input$search_GO_term, {
694
+ searchValues$GO_search <- input$GO_search
695
+ searchValues$gene_search <- ""
696
+ searchValues$description_search <- ""
697
+ })
698
+
699
+ observeEvent(input$search_description, {
700
+ searchValues$description_search <- input$description_search
701
+ searchValues$GO_search <- "All"
702
+ searchValues$gene_search <- ""
703
+ })
704
+
705
+ observeEvent(input$visualize_gse, {
706
+ library(ggplot2)
707
+ library(ggtext)
708
+ library(gridExtra)
709
+
710
+ output$dotPlotTitle <- renderPlot({
711
+ txt <- "Dot Plot with 10 Pathways"
712
+ title_plot <- ggplot() +
713
+ geom_textbox(
714
+ aes(x = 0, y = 0, label = txt),
715
+ size = 18 / .pt,
716
+ width = unit(6, "inches")
717
+ ) +
718
+ theme_void()
719
+ print(title_plot)
720
+ })
721
+
722
+ output$dotPlot <- renderPlot({
723
+ dot_plot <- dotplot(gse, showCategory = 10)
724
+ print(dot_plot)
725
+ })
726
+
727
+ output$conceptNetworkTitle <- renderPlot({
728
+ txt <- "Gene Concept Network"
729
+
730
+ title_plot <- ggplot() +
731
+ geom_textbox(
732
+ aes(x = 0, y = 0, label = txt),
733
+ size = 18 / .pt,
734
+ width = unit(6, "inches")
735
+ ) +
736
+ theme_void()
737
+
738
+ print(title_plot)
739
+ })
740
+
741
+ output$conceptNetwork <- renderPlot({
742
+ gsex <- setReadable(gse, 'org.Hs.eg.db', 'ENTREZID')
743
+ geneList <- gse@geneList
744
+
745
+ p1 <- cnetplot(gsex, foldChange = geneList, max.overlaps = 100)
746
+ p2 <- cnetplot(gsex, categorySize = "pvalue", foldChange = geneList, max.overlaps = 100)
747
+ p3 <- cnetplot(gsex, foldChange = geneList, circular = TRUE, colorEdge = TRUE, max.overlaps = 100)
748
+
749
+ maingene_plot <- cowplot::plot_grid(p1, p2, p3, ncol = 3, labels = LETTERS[1:3], rel_widths = c(.8, .8, 1.2))
750
+ print(maingene_plot)
751
+
752
+ })
753
+
754
+ output$heatMapTitle <- renderPlot({
755
+ txt <- "Heatmap-Like Functional Classification"
756
+
757
+ title_plot <- ggplot() +
758
+ geom_textbox(
759
+ aes(x = 0, y = 0, label = txt),
760
+ size = 18 / .pt,
761
+ width = unit(6, "inches")
762
+ ) +
763
+ theme_void()
764
+
765
+ print(title_plot)
766
+ })
767
+
768
+ output$heatMap <- renderPlot({
769
+
770
+ gsex <- setReadable(gse, 'org.Hs.eg.db', 'ENTREZID')
771
+ geneList <- gse@geneList
772
+
773
+ p1 <- heatplot(gsex, showCategory=5)
774
+ p2 <- heatplot(gsex, foldChange=geneList, showCategory=5)
775
+ mainheatmap_plot <- cowplot::plot_grid(p1, p2, ncol=1, labels=LETTERS[1:2])
776
+
777
+ print(mainheatmap_plot)
778
+ })
779
+
780
+ output$upsetPlotTitle <- renderPlot({
781
+ txt <- "UpSet Plot"
782
+
783
+ title_plot <- ggplot() +
784
+ geom_textbox(
785
+ aes(x = 0, y = 0, label = txt),
786
+ size = 18 / .pt,
787
+ width = unit(6, "inches")
788
+ ) +
789
+ theme_void()
790
+
791
+ print(title_plot)
792
+ })
793
+
794
+ output$upsetPlot <- renderPlot({
795
+ gse_df <- gse@result
796
+ top_terms <- gse_df %>% arrange(pvalue) %>% head(10)
797
+ top_gene_sets <- strsplit(top_terms$core_enrichment, "/")
798
+ gene_sets_list <- lapply(top_gene_sets, function(x) unique(trimws(x)))
799
+
800
+ gene_sets_df <- fromList(setNames(gene_sets_list, top_terms$ID))
801
+
802
+ # Create the UpSet plot
803
+ upset_plot <- upset(gene_sets_df,
804
+ sets = names(gene_sets_df),
805
+ main.bar.color = "steelblue",
806
+ sets.bar.color = "darkred",
807
+ order.by = "freq",
808
+ matrix.color = "gray",
809
+ keep.order = TRUE)
810
+
811
+ print(upset_plot)
812
+ })
813
+
814
+ output$pubmedPathwayPlotTitle <- renderPlot({
815
+ txt <- "PubMed Pathway Enrichment"
816
+
817
+ title_plot <- ggplot() +
818
+ geom_textbox(
819
+ aes(x = 0, y = 0, label = txt),
820
+ size = 18 / .pt,
821
+ width = unit(6, "inches")
822
+ ) +
823
+ theme_void()
824
+
825
+ print(title_plot)
826
+ })
827
+
828
+ output$pubmedPathwayPlot <- renderPlot({
829
+ results <- data.frame(Term = character(), Year = integer(), Count = integer(), stringsAsFactors = FALSE)
830
+
831
+ terms <- tail(gse$Description, n = 10)
832
+
833
+ results <- data.frame()
834
+ titles_2024 <- data.frame()
835
+
836
+ for (term in terms) {
837
+ for (year in 2014:2024) {
838
+ query <- paste(term, "[Title/Abstract] AND", year, "[PDAT]")
839
+
840
+ # Count results for each term and year
841
+ search_results <- entrez_search(db = "pubmed", term = query, retmax = 0)
842
+ results <- rbind(results, data.frame(Term = term, Year = year, Count = search_results$count))
843
+
844
+ # If the year is 2024, retrieve the first 10 article titles
845
+ if (year == 2024) {
846
+ search_results_2024 <- entrez_search(db = "pubmed", term = query, retmax = 10)
847
+ if (search_results_2024$count > 0) {
848
+ article_ids <- search_results_2024$ids
849
+ articles <- entrez_fetch(db = "pubmed", id = article_ids, rettype = "abstract", retmode = "text")
850
+ titles <- sapply(strsplit(articles, "\n"), function(x) x[1])
851
+ titles_2024 <- reactive({rbind(titles_2024, data.frame(Term = term, Title = titles, stringsAsFactors = FALSE))})
852
+ }
853
+ }
854
+ }
855
+ }
856
+
857
+ total_counts <- results %>%
858
+ group_by(Year) %>%
859
+ summarize(Total_Count = sum(Count), .groups = 'drop')
860
+
861
+ results <- results %>%
862
+ left_join(total_counts, by = "Year")
863
+
864
+ results <- results %>%
865
+ mutate(Ratio = Count / Total_Count)
866
+
867
+ print(results)
868
+
869
+ mainpubmed_plot <- ggplot(results, aes(x = Year, y = Ratio, color = Term)) +
870
+ geom_line() +
871
+ geom_point(size = 3, shape = 20, fill = "white", stroke = 1) + # Bolded dots
872
+ scale_x_continuous(limits = c(2013, 2025), breaks = seq(2013, 2025, by = 2.5)) + # 2.5-year breaks
873
+ labs(title = "Publication Ratio for Enriched Terms", x = "Year", y = "Publication Ratio") +
874
+ theme_minimal()
875
+
876
+ print(mainpubmed_plot)
877
+ })
878
+ })
879
+
880
+ } else {
881
+ showModal(modalDialog(
882
+ title = "Access Denied",
883
+ "Incorrect password. Please try again.",
884
+ easyClose = TRUE,
885
+ footer = NULL
886
+ ))
887
+ }
888
+ })
889
+ }
890
+
891
+ shinyApp(ui = ui, server = server)
892
  ```