jennzhuge commited on
Commit
fa1b7c0
·
1 Parent(s): 2b207de
Files changed (1) hide show
  1. app.py +15 -5
app.py CHANGED
@@ -3,7 +3,7 @@ import pandas as pd
3
  import gradio as gr
4
  from transformers import PreTrainedTokenizerFast, BertForMaskedLM
5
  from datasets import load_dataset
6
- import xgboost_infer
7
 
8
  embeddings_train = load_dataset("LofiAmazon/BOLD-Embeddings-Ecolayers-Amazon", split='train').to_pandas()
9
 
@@ -43,20 +43,23 @@ def preprocess():
43
 
44
  def predict_genus():
45
  data = preprocess()
46
- out = xgboost_infer.infer_dna(data)
47
 
48
  results = []
49
 
50
- genuses = xgboost_infer.infer()
51
 
52
  results.append({
53
  "sequence": dna_df['nucraw'],
54
  # "predictions": pd.concat([dna_genuses, envdna_genuses], axis=0)
55
- 'predictions': genuses
56
- })
57
 
58
  return results
59
 
 
 
 
 
60
 
61
  with gr.Blocks() as demo:
62
  # Header section
@@ -88,6 +91,13 @@ with gr.Blocks() as demo:
88
 
89
  with gr.Tab('DNA Embedding Space Similarity Visualizer'):
90
  gr.Markdown("If the highest genus probability is very low for your DNA sequence, we can still examine the DNA embedding of the sequence in relation to known samples for clues.")
 
 
 
 
 
 
 
91
 
92
 
93
  demo.launch()
 
3
  import gradio as gr
4
  from transformers import PreTrainedTokenizerFast, BertForMaskedLM
5
  from datasets import load_dataset
6
+ import infer
7
 
8
  embeddings_train = load_dataset("LofiAmazon/BOLD-Embeddings-Ecolayers-Amazon", split='train').to_pandas()
9
 
 
43
 
44
  def predict_genus():
45
  data = preprocess()
46
+ out = infer.infer_dna(data)
47
 
48
  results = []
49
 
50
+ genuses = infer.infer()
51
 
52
  results.append({
53
  "sequence": dna_df['nucraw'],
54
  # "predictions": pd.concat([dna_genuses, envdna_genuses], axis=0)
55
+ 'predictions': genuses})
 
56
 
57
  return results
58
 
59
+ def tsne():
60
+
61
+ return plots
62
+
63
 
64
  with gr.Blocks() as demo:
65
  # Header section
 
91
 
92
  with gr.Tab('DNA Embedding Space Similarity Visualizer'):
93
  gr.Markdown("If the highest genus probability is very low for your DNA sequence, we can still examine the DNA embedding of the sequence in relation to known samples for clues.")
94
+ with gr.Column():
95
+ gr.Markdown("Plot of your DNA sequence among other known species clusters.")
96
+
97
+ with gr.Column():
98
+ gr.Markdown("Plot of the five most common species at your sample coordinate.")
99
+
100
+
101
 
102
 
103
  demo.launch()