File size: 4,129 Bytes
aa04930
6a306c0
42c997a
6a306c0
 
 
 
42c997a
 
 
 
6a306c0
 
 
 
 
42c997a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a306c0
 
 
42c997a
 
 
 
6a306c0
7b6f4c7
42c997a
2e02671
42c997a
6a306c0
42c997a
 
 
 
6a306c0
 
 
 
 
 
 
 
42c997a
6a306c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42c997a
6a306c0
 
42c997a
 
 
 
 
6a306c0
 
42c997a
 
 
6a306c0
42c997a
 
 
6a306c0
42c997a
6a306c0
42c997a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a306c0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import pandas as pd
import numpy as np
from datasets import load_dataset
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
from shiny import render
from shiny.express import input, output, ui
# from utils import (
#     generate_2d_sequence,
#     plot_seq_full_label
# )
import os
import matplotlib as mpl
import seaborn as sns
mpl.rcParams.update(mpl.rcParamsDefault)

ds = load_dataset('Hack90/virus_tiny')
df_virus = pd.DataFrame(ds['train'])

def shannon_entropy(seq):
  seq=re.sub("[^ATCG]","",seq)
  seq = seq.replace('A', 'T')
  seq = seq.replace('G', 'C')
  p = seq.count('T') / len(seq)
  e = 8.69 - 8.31
  c_h = ((-p * math.log(p)) - (1-p)* math.log(1-p)) * math.log((1-p)/p)
  c_h = c_h /e
  seq=seq.replace('T', '5 ')
  seq=seq.replace('C', '4 ')
  seq = np.array(seq.split()).astype(int)
  shann = -sum((p*math.log(p), ((1-p)*math.log(1-p))))
  shann = shann/2
  return c_h , shann


ui.page_opts(fillable=True)

with ui.navset_card_tab(id="tab"):
    with ui.nav_panel("Species View"):
        ui.panel_title("What is the distribution of complexity across viral species?")
        with ui.card():
            ui.input_slider("sample", "samples", 0, len(df_virus), 40)
                
        def plot_loss_rates(df,samples):
            complexity = []
            for k in range(len(df.iloc[:samples])):
              complexity.append(shannon_entropy(df['sequence'].iloc[k]))
            
            df_nana = pd.DataFrame(complexity)
            df_nana['x'] = df_nana[1] * 2
            df_nana['y'] = df_nana[0]


            # fig, ax = plt.subplots()


            # Create a figure and axis
            fig, ax = plt.subplots()
            
            # Create the scatter plot
            scatter = ax.scatter(df_nana['x'], df_nana['y'], s=0.5)
            
            # Add a colorbar
            cbar = fig.colorbar(scatter, ax=ax)
            cbar.set_label('Label')
            
            # Set labels and title
            # ax.set_xlabel('X')
            # ax.set_ylabel('Y')
            # ax.set_title(f"Loss ra")
            # ax.set_xlabel("Training steps")
            # ax.set_ylabel("Loss rate")
            return fig

        @render.plot()
        def plot_context_size_scaling():
            fig = plot_loss_rates(df_virus,input.sample() )
            if fig:
                return fig
    # with ui.nav_panel("Histone Modification"):
    #     ui.panel_title("Is there a pattern to histone modification?")
    #     with ui.layout_columns():
    #         with ui.card():
    #             ui.input_slider("sample_histone", "sample", 0, df_histone_len, 40)
        
        
    #     def plot_histone(df,sample):
    #         y_values = generate_2d_sequence(df['seq'].iloc[sample])[0]
    #         x_values = generate_2d_sequence(df['seq'].iloc[sample])[1]
            
    #         integers = str((np.argwhere(df['labels'][sample] == np.amax(df['labels'][sample]))).flatten().tolist())
    #         # Create a DataFrame with the x values, y values, and integers
    #         data = {'x': x_values, 'y': y_values, 'color': integers}

    #         fig, ax = plt.subplots()

    #         sns.scatterplot(x='x', y='y', hue='color', data=data, palette='viridis', ax=ax)
    #         ax.legend()
    #         # ax.set_title(f"Loss ra")
    #         # ax.set_xlabel("Training steps")
    #         # ax.set_ylabel("Loss rate")
    #         return fig      
    #     @render.plot()
    #     def plot_histones_two():
    #         fig = plot_histone(df_histone,input.sample_histone() )
    #         if fig:
    #             return fig
    # with ui.nav_panel("Enhancer Annontations"):
    #     ui.panel_title("Is there a pattern to enhancer annotations?")
    #     with ui.layout_columns():
    #         with ui.card():
    #             ui.input_slider("sample_enhancer", "sample", 0, df_enhancer_annotation_len, 40)
    #     @render.plot()
    #     def plot_enhancer():
    #         fig = plot_loss_rates(df_enhancer_annotation,input.sample_enhancer() , True)
    #         if fig:
    #             return fig