Hack90 commited on
Commit
42c997a
·
verified ·
1 Parent(s): 61488b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -62
app.py CHANGED
@@ -1,46 +1,56 @@
1
  import pandas as pd
2
  import numpy as np
 
3
  import matplotlib.pyplot as plt
4
  from scipy.interpolate import interp1d
5
  from shiny import render
6
  from shiny.express import input, output, ui
7
- from utils import (
8
- generate_2d_sequence,
9
- plot_seq_full_label
10
- )
11
  import os
12
  import matplotlib as mpl
13
  import seaborn as sns
14
  mpl.rcParams.update(mpl.rcParamsDefault)
15
 
16
- df_gene_varient = pd.read_parquet("gene_varient.parquet")
17
- df_histone = pd.read_parquet("histone.parquet")
18
- df_gene_len = len(df_gene_varient)
19
- df_histone_len = len(df_histone)
20
- df_enhancer_annotation = pd.read_parquet('enhancer_annotation.parquet')
21
- df_enhancer_annotation_len = len(df_enhancer_annotation)
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  ui.page_opts(fillable=True)
23
 
24
  with ui.navset_card_tab(id="tab"):
25
- with ui.nav_panel("Gene Varient"):
26
- ui.panel_title("Is there a pattern to gene varient location?")
27
- with ui.layout_columns():
28
- with ui.card():
29
- ui.input_slider("sample", "sample", 0, df_gene_len, 40)
30
 
31
- def plot_loss_rates(df, sample, enhancer=False):
32
- y_values = generate_2d_sequence(df['seq'].iloc[sample])[0]
33
- x_values = generate_2d_sequence(df['seq'].iloc[sample])[1]
 
 
34
 
35
- integers = df['labels'].iloc[sample]
36
- if enhancer:
37
- K= 128
38
- res = []
39
- for i in integers:
40
- res.extend([i]*K)
41
- integers = res
42
- # Create a DataFrame with the x values, y values, and integers
43
- data = {'x': x_values, 'y': y_values, 'color': integers}
44
 
45
  # fig, ax = plt.subplots()
46
 
@@ -49,7 +59,7 @@ with ui.navset_card_tab(id="tab"):
49
  fig, ax = plt.subplots()
50
 
51
  # Create the scatter plot
52
- scatter = ax.scatter(data['x'], data['y'], c=data['color'], cmap='tab20', s=0.5)
53
 
54
  # Add a colorbar
55
  cbar = fig.colorbar(scatter, ax=ax)
@@ -65,46 +75,46 @@ with ui.navset_card_tab(id="tab"):
65
 
66
  @render.plot()
67
  def plot_context_size_scaling():
68
- fig = plot_loss_rates(df_gene_varient,input.sample() )
69
  if fig:
70
  return fig
71
- with ui.nav_panel("Histone Modification"):
72
- ui.panel_title("Is there a pattern to histone modification?")
73
- with ui.layout_columns():
74
- with ui.card():
75
- ui.input_slider("sample_histone", "sample", 0, df_histone_len, 40)
76
 
77
 
78
- def plot_histone(df,sample):
79
- y_values = generate_2d_sequence(df['seq'].iloc[sample])[0]
80
- x_values = generate_2d_sequence(df['seq'].iloc[sample])[1]
81
 
82
- integers = str((np.argwhere(df['labels'][sample] == np.amax(df['labels'][sample]))).flatten().tolist())
83
- # Create a DataFrame with the x values, y values, and integers
84
- data = {'x': x_values, 'y': y_values, 'color': integers}
85
 
86
- fig, ax = plt.subplots()
87
 
88
- sns.scatterplot(x='x', y='y', hue='color', data=data, palette='viridis', ax=ax)
89
- ax.legend()
90
- # ax.set_title(f"Loss ra")
91
- # ax.set_xlabel("Training steps")
92
- # ax.set_ylabel("Loss rate")
93
- return fig
94
- @render.plot()
95
- def plot_histones_two():
96
- fig = plot_histone(df_histone,input.sample_histone() )
97
- if fig:
98
- return fig
99
- with ui.nav_panel("Enhancer Annontations"):
100
- ui.panel_title("Is there a pattern to enhancer annotations?")
101
- with ui.layout_columns():
102
- with ui.card():
103
- ui.input_slider("sample_enhancer", "sample", 0, df_enhancer_annotation_len, 40)
104
- @render.plot()
105
- def plot_enhancer():
106
- fig = plot_loss_rates(df_enhancer_annotation,input.sample_enhancer() , True)
107
- if fig:
108
- return fig
109
 
110
 
 
1
  import pandas as pd
2
  import numpy as np
3
+ from datasets import load_dataset
4
  import matplotlib.pyplot as plt
5
  from scipy.interpolate import interp1d
6
  from shiny import render
7
  from shiny.express import input, output, ui
8
+ # from utils import (
9
+ # generate_2d_sequence,
10
+ # plot_seq_full_label
11
+ # )
12
  import os
13
  import matplotlib as mpl
14
  import seaborn as sns
15
  mpl.rcParams.update(mpl.rcParamsDefault)
16
 
17
+ ds = load_dataset('Hack90/virus_tiny')
18
+ df_virus = pd.DataFrame(ds['train'])
19
+
20
+ def shannon_entropy(seq):
21
+ seq=re.sub("[^ATCG]","",seq)
22
+ seq = seq.replace('A', 'T')
23
+ seq = seq.replace('G', 'C')
24
+ p = seq.count('T') / len(seq)
25
+ e = 8.69 - 8.31
26
+ c_h = ((-p * math.log(p)) - (1-p)* math.log(1-p)) * math.log((1-p)/p)
27
+ c_h = c_h /e
28
+ seq=seq.replace('T', '5 ')
29
+ seq=seq.replace('C', '4 ')
30
+ seq = np.array(seq.split()).astype(int)
31
+ shann = -sum((p*math.log(p), ((1-p)*math.log(1-p))))
32
+ shann = shann/2
33
+ return c_h , shann
34
+
35
+
36
  ui.page_opts(fillable=True)
37
 
38
  with ui.navset_card_tab(id="tab"):
39
+ with ui.nav_panel("Species View"):
40
+ ui.panel_title("What is the distribution of complexity across viral species?")
41
+ with ui.card():
42
+ ui.input_slider("sample", "samples", 0, len(df_virus), 40)
 
43
 
44
+ def plot_loss_rates(df,samples enhancer=False):
45
+ for
46
+ complexity = []
47
+ for k in range(len(df.iloc[:df_virus])):
48
+ complexity.append(shannon_entropy(df['sequence'].iloc[k]))
49
 
50
+ df_nana = pd.DataFrame(complexity)
51
+ df_nana['x'] = df_nana[1] * 2
52
+ df_nana['y'] = df_nana[0]
53
+
 
 
 
 
 
54
 
55
  # fig, ax = plt.subplots()
56
 
 
59
  fig, ax = plt.subplots()
60
 
61
  # Create the scatter plot
62
+ scatter = ax.scatter(df_nana['x'], df_nana['y'], s=0.5)
63
 
64
  # Add a colorbar
65
  cbar = fig.colorbar(scatter, ax=ax)
 
75
 
76
  @render.plot()
77
  def plot_context_size_scaling():
78
+ fig = plot_loss_rates(df_virus,input.sample() )
79
  if fig:
80
  return fig
81
+ # with ui.nav_panel("Histone Modification"):
82
+ # ui.panel_title("Is there a pattern to histone modification?")
83
+ # with ui.layout_columns():
84
+ # with ui.card():
85
+ # ui.input_slider("sample_histone", "sample", 0, df_histone_len, 40)
86
 
87
 
88
+ # def plot_histone(df,sample):
89
+ # y_values = generate_2d_sequence(df['seq'].iloc[sample])[0]
90
+ # x_values = generate_2d_sequence(df['seq'].iloc[sample])[1]
91
 
92
+ # integers = str((np.argwhere(df['labels'][sample] == np.amax(df['labels'][sample]))).flatten().tolist())
93
+ # # Create a DataFrame with the x values, y values, and integers
94
+ # data = {'x': x_values, 'y': y_values, 'color': integers}
95
 
96
+ # fig, ax = plt.subplots()
97
 
98
+ # sns.scatterplot(x='x', y='y', hue='color', data=data, palette='viridis', ax=ax)
99
+ # ax.legend()
100
+ # # ax.set_title(f"Loss ra")
101
+ # # ax.set_xlabel("Training steps")
102
+ # # ax.set_ylabel("Loss rate")
103
+ # return fig
104
+ # @render.plot()
105
+ # def plot_histones_two():
106
+ # fig = plot_histone(df_histone,input.sample_histone() )
107
+ # if fig:
108
+ # return fig
109
+ # with ui.nav_panel("Enhancer Annontations"):
110
+ # ui.panel_title("Is there a pattern to enhancer annotations?")
111
+ # with ui.layout_columns():
112
+ # with ui.card():
113
+ # ui.input_slider("sample_enhancer", "sample", 0, df_enhancer_annotation_len, 40)
114
+ # @render.plot()
115
+ # def plot_enhancer():
116
+ # fig = plot_loss_rates(df_enhancer_annotation,input.sample_enhancer() , True)
117
+ # if fig:
118
+ # return fig
119
 
120