Spaces:

DNA-LLM
/

viral_complexity

Runtime error

App Files Files Community

viral_complexity / app.py

Hack90

Update app.py

6f72e57 verified about 1 year ago

raw

history blame contribute delete

4.17 kB

	import pandas as pd
	import numpy as np
	from datasets import load_dataset
	import matplotlib.pyplot as plt
	from scipy.interpolate import interp1d
	from shiny import render
	from shiny.express import input, output, ui
	# from utils import (
	# generate_2d_sequence,
	# plot_seq_full_label
	# )
	import os
	import matplotlib as mpl
	import seaborn as sns
	mpl.rcParams.update(mpl.rcParamsDefault)

	ds = load_dataset('Hack90/virus_tiny', keep_in_memory = True, cache_dir = None)
	df_virus = pd.DataFrame(ds['train'])

	def shannon_entropy(seq):
	seq=re.sub("[^ATCG]","",seq)
	seq = seq.replace('A', 'T')
	seq = seq.replace('G', 'C')
	p = seq.count('T') / len(seq)
	e = 8.69 - 8.31
	c_h = ((-p * math.log(p)) - (1-p)* math.log(1-p)) * math.log((1-p)/p)
	c_h = c_h /e
	seq=seq.replace('T', '5 ')
	seq=seq.replace('C', '4 ')
	seq = np.array(seq.split()).astype(int)
	shann = -sum((pmath.log(p), ((1-p)math.log(1-p))))
	shann = shann/2
	return c_h , shann


	ui.page_opts(fillable=True)

	with ui.navset_card_tab(id="tab"):
	with ui.nav_panel("Species View"):
	ui.panel_title("What is the distribution of complexity across viral species?")
	with ui.card():
	ui.input_slider("sample", "samples", 0, len(df_virus), 40)

	def plot_loss_rates(df,samples):
	complexity = []
	for k in range(len(df.iloc[:samples])):
	complexity.append(shannon_entropy(df['sequence'].iloc[k]))

	df_nana = pd.DataFrame(complexity)
	df_nana['x'] = df_nana[1] * 2
	df_nana['y'] = df_nana[0]


	# fig, ax = plt.subplots()


	# Create a figure and axis
	fig, ax = plt.subplots()

	# Create the scatter plot
	scatter = ax.scatter(df_nana['x'], df_nana['y'], s=0.5)

	# Add a colorbar
	cbar = fig.colorbar(scatter, ax=ax)
	cbar.set_label('Label')

	# Set labels and title
	# ax.set_xlabel('X')
	# ax.set_ylabel('Y')
	# ax.set_title(f"Loss ra")
	# ax.set_xlabel("Training steps")
	# ax.set_ylabel("Loss rate")
	return fig

	@render.plot()
	def plot_context_size_scaling():
	fig = plot_loss_rates(df_virus,input.sample() )
	if fig:
	return fig
	# with ui.nav_panel("Histone Modification"):
	# ui.panel_title("Is there a pattern to histone modification?")
	# with ui.layout_columns():
	# with ui.card():
	# ui.input_slider("sample_histone", "sample", 0, df_histone_len, 40)


	# def plot_histone(df,sample):
	# y_values = generate_2d_sequence(df['seq'].iloc[sample])[0]
	# x_values = generate_2d_sequence(df['seq'].iloc[sample])[1]

	# integers = str((np.argwhere(df['labels'][sample] == np.amax(df['labels'][sample]))).flatten().tolist())
	# # Create a DataFrame with the x values, y values, and integers
	# data = {'x': x_values, 'y': y_values, 'color': integers}

	# fig, ax = plt.subplots()

	# sns.scatterplot(x='x', y='y', hue='color', data=data, palette='viridis', ax=ax)
	# ax.legend()
	# # ax.set_title(f"Loss ra")
	# # ax.set_xlabel("Training steps")
	# # ax.set_ylabel("Loss rate")
	# return fig
	# @render.plot()
	# def plot_histones_two():
	# fig = plot_histone(df_histone,input.sample_histone() )
	# if fig:
	# return fig
	# with ui.nav_panel("Enhancer Annontations"):
	# ui.panel_title("Is there a pattern to enhancer annotations?")
	# with ui.layout_columns():
	# with ui.card():
	# ui.input_slider("sample_enhancer", "sample", 0, df_enhancer_annotation_len, 40)
	# @render.plot()
	# def plot_enhancer():
	# fig = plot_loss_rates(df_enhancer_annotation,input.sample_enhancer() , True)
	# if fig:
	# return fig