import pandas as pd import numpy as np from datasets import load_dataset import matplotlib.pyplot as plt from scipy.interpolate import interp1d from shiny import render from shiny.express import input, output, ui # from utils import ( # generate_2d_sequence, # plot_seq_full_label # ) import os import matplotlib as mpl import seaborn as sns mpl.rcParams.update(mpl.rcParamsDefault) ds = load_dataset('Hack90/virus_tiny', keep_in_memory = True, cache_dir = None) df_virus = pd.DataFrame(ds['train']) def shannon_entropy(seq): seq=re.sub("[^ATCG]","",seq) seq = seq.replace('A', 'T') seq = seq.replace('G', 'C') p = seq.count('T') / len(seq) e = 8.69 - 8.31 c_h = ((-p * math.log(p)) - (1-p)* math.log(1-p)) * math.log((1-p)/p) c_h = c_h /e seq=seq.replace('T', '5 ') seq=seq.replace('C', '4 ') seq = np.array(seq.split()).astype(int) shann = -sum((p*math.log(p), ((1-p)*math.log(1-p)))) shann = shann/2 return c_h , shann ui.page_opts(fillable=True) with ui.navset_card_tab(id="tab"): with ui.nav_panel("Species View"): ui.panel_title("What is the distribution of complexity across viral species?") with ui.card(): ui.input_slider("sample", "samples", 0, len(df_virus), 40) def plot_loss_rates(df,samples): complexity = [] for k in range(len(df.iloc[:samples])): complexity.append(shannon_entropy(df['sequence'].iloc[k])) df_nana = pd.DataFrame(complexity) df_nana['x'] = df_nana[1] * 2 df_nana['y'] = df_nana[0] # fig, ax = plt.subplots() # Create a figure and axis fig, ax = plt.subplots() # Create the scatter plot scatter = ax.scatter(df_nana['x'], df_nana['y'], s=0.5) # Add a colorbar cbar = fig.colorbar(scatter, ax=ax) cbar.set_label('Label') # Set labels and title # ax.set_xlabel('X') # ax.set_ylabel('Y') # ax.set_title(f"Loss ra") # ax.set_xlabel("Training steps") # ax.set_ylabel("Loss rate") return fig @render.plot() def plot_context_size_scaling(): fig = plot_loss_rates(df_virus,input.sample() ) if fig: return fig # with ui.nav_panel("Histone Modification"): # ui.panel_title("Is there a pattern to histone modification?") # with ui.layout_columns(): # with ui.card(): # ui.input_slider("sample_histone", "sample", 0, df_histone_len, 40) # def plot_histone(df,sample): # y_values = generate_2d_sequence(df['seq'].iloc[sample])[0] # x_values = generate_2d_sequence(df['seq'].iloc[sample])[1] # integers = str((np.argwhere(df['labels'][sample] == np.amax(df['labels'][sample]))).flatten().tolist()) # # Create a DataFrame with the x values, y values, and integers # data = {'x': x_values, 'y': y_values, 'color': integers} # fig, ax = plt.subplots() # sns.scatterplot(x='x', y='y', hue='color', data=data, palette='viridis', ax=ax) # ax.legend() # # ax.set_title(f"Loss ra") # # ax.set_xlabel("Training steps") # # ax.set_ylabel("Loss rate") # return fig # @render.plot() # def plot_histones_two(): # fig = plot_histone(df_histone,input.sample_histone() ) # if fig: # return fig # with ui.nav_panel("Enhancer Annontations"): # ui.panel_title("Is there a pattern to enhancer annotations?") # with ui.layout_columns(): # with ui.card(): # ui.input_slider("sample_enhancer", "sample", 0, df_enhancer_annotation_len, 40) # @render.plot() # def plot_enhancer(): # fig = plot_loss_rates(df_enhancer_annotation,input.sample_enhancer() , True) # if fig: # return fig