Spaces:
Runtime error
Runtime error
File size: 4,129 Bytes
aa04930 6a306c0 42c997a 6a306c0 42c997a 6a306c0 42c997a 6a306c0 42c997a 6a306c0 7b6f4c7 42c997a 2e02671 42c997a 6a306c0 42c997a 6a306c0 42c997a 6a306c0 42c997a 6a306c0 42c997a 6a306c0 42c997a 6a306c0 42c997a 6a306c0 42c997a 6a306c0 42c997a 6a306c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import pandas as pd
import numpy as np
from datasets import load_dataset
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
from shiny import render
from shiny.express import input, output, ui
# from utils import (
# generate_2d_sequence,
# plot_seq_full_label
# )
import os
import matplotlib as mpl
import seaborn as sns
mpl.rcParams.update(mpl.rcParamsDefault)
ds = load_dataset('Hack90/virus_tiny')
df_virus = pd.DataFrame(ds['train'])
def shannon_entropy(seq):
seq=re.sub("[^ATCG]","",seq)
seq = seq.replace('A', 'T')
seq = seq.replace('G', 'C')
p = seq.count('T') / len(seq)
e = 8.69 - 8.31
c_h = ((-p * math.log(p)) - (1-p)* math.log(1-p)) * math.log((1-p)/p)
c_h = c_h /e
seq=seq.replace('T', '5 ')
seq=seq.replace('C', '4 ')
seq = np.array(seq.split()).astype(int)
shann = -sum((p*math.log(p), ((1-p)*math.log(1-p))))
shann = shann/2
return c_h , shann
ui.page_opts(fillable=True)
with ui.navset_card_tab(id="tab"):
with ui.nav_panel("Species View"):
ui.panel_title("What is the distribution of complexity across viral species?")
with ui.card():
ui.input_slider("sample", "samples", 0, len(df_virus), 40)
def plot_loss_rates(df,samples):
complexity = []
for k in range(len(df.iloc[:samples])):
complexity.append(shannon_entropy(df['sequence'].iloc[k]))
df_nana = pd.DataFrame(complexity)
df_nana['x'] = df_nana[1] * 2
df_nana['y'] = df_nana[0]
# fig, ax = plt.subplots()
# Create a figure and axis
fig, ax = plt.subplots()
# Create the scatter plot
scatter = ax.scatter(df_nana['x'], df_nana['y'], s=0.5)
# Add a colorbar
cbar = fig.colorbar(scatter, ax=ax)
cbar.set_label('Label')
# Set labels and title
# ax.set_xlabel('X')
# ax.set_ylabel('Y')
# ax.set_title(f"Loss ra")
# ax.set_xlabel("Training steps")
# ax.set_ylabel("Loss rate")
return fig
@render.plot()
def plot_context_size_scaling():
fig = plot_loss_rates(df_virus,input.sample() )
if fig:
return fig
# with ui.nav_panel("Histone Modification"):
# ui.panel_title("Is there a pattern to histone modification?")
# with ui.layout_columns():
# with ui.card():
# ui.input_slider("sample_histone", "sample", 0, df_histone_len, 40)
# def plot_histone(df,sample):
# y_values = generate_2d_sequence(df['seq'].iloc[sample])[0]
# x_values = generate_2d_sequence(df['seq'].iloc[sample])[1]
# integers = str((np.argwhere(df['labels'][sample] == np.amax(df['labels'][sample]))).flatten().tolist())
# # Create a DataFrame with the x values, y values, and integers
# data = {'x': x_values, 'y': y_values, 'color': integers}
# fig, ax = plt.subplots()
# sns.scatterplot(x='x', y='y', hue='color', data=data, palette='viridis', ax=ax)
# ax.legend()
# # ax.set_title(f"Loss ra")
# # ax.set_xlabel("Training steps")
# # ax.set_ylabel("Loss rate")
# return fig
# @render.plot()
# def plot_histones_two():
# fig = plot_histone(df_histone,input.sample_histone() )
# if fig:
# return fig
# with ui.nav_panel("Enhancer Annontations"):
# ui.panel_title("Is there a pattern to enhancer annotations?")
# with ui.layout_columns():
# with ui.card():
# ui.input_slider("sample_enhancer", "sample", 0, df_enhancer_annotation_len, 40)
# @render.plot()
# def plot_enhancer():
# fig = plot_loss_rates(df_enhancer_annotation,input.sample_enhancer() , True)
# if fig:
# return fig
|