Spaces:

Zhaohan-Meng
/

FusionGDA

Sleeping

App Files Files Community

Zhaohan Meng commited on Aug 20, 2024

Commit

c54bfd4

verified ·

1 Parent(s): 8760168

Delete .ipynb_checkpoints

Browse files

Files changed (3) hide show

.ipynb_checkpoints/app-checkpoint.py +0 -64
.ipynb_checkpoints/model-checkpoint.sh +0 -24
.ipynb_checkpoints/requirements-checkpoint.txt +0 -7

.ipynb_checkpoints/app-checkpoint.py DELETED Viewed

@@ -1,64 +0,0 @@
-# -*- coding: utf-8 -*-
-import gradio as gr
-import pandas as pd
-import os
-import subprocess
-def predict_top_100_genes(disease_id):
-    # Initialize paths
-    input_csv_path = 'data/downstream/{}_disease.csv'.format(disease_id)
-    output_csv_path = 'data/downstream/{}_top100.csv'.format(disease_id)
-    # Check if the output CSV already exists
-    if not os.path.exists(output_csv_path):
-        # Proceed with your existing code if the output file doesn't exist
-        df = pd.read_csv('data/pretrain/disgenet_latest.csv')
-        df = df[df['proteinSeq'].notna()]
-        # Check if the disease_id is present in the dataframe
-        if disease_id not in df['diseaseId'].values:
-            return f"Error: Disease ID '{disease_id}' not found in the database. Please check the ID and try again."
-        desired_diseaseDes = df[df['diseaseId'] == disease_id]['diseaseDes'].iloc[0]
-        related_proteins = df[df['diseaseDes'] == desired_diseaseDes]['proteinSeq'].unique()
-        df['score'] = df['proteinSeq'].isin(related_proteins).astype(int)
-        new_df = pd.DataFrame({
-            'diseaseId': disease_id,
-            'diseaseDes': desired_diseaseDes,
-            'geneSymbol': df['geneSymbol'],
-            'proteinSeq': df['proteinSeq'],
-            'score': df['score']
-        }).drop_duplicates().reset_index(drop=True)
-        new_df.to_csv(input_csv_path, index=False)
-        # Call the model script only if the output CSV does not exist
-        script_path = 'model.sh'
-        subprocess.run(['bash', script_path, input_csv_path, output_csv_path], check=True)
-    # Read the model output file or the existing file to get the top 100 genes
-    output_df = pd.read_csv(output_csv_path)
-    # Update here to select only the required columns and rename them
-    result_df = output_df[['geneSymbol', 'Prediction_score']].rename(columns={'geneSymbol': 'Gene', 'Prediction_score': 'Score'}).head(100)
-    return result_df
-iface = gr.Interface(
-    fn=predict_top_100_genes,
-    inputs=gr.Textbox(lines=1, placeholder="Enter Disease ID Here...", label="Disease ID"),
-    outputs=gr.Dataframe(label="Predicted Top 100 Related Genes"),
-    title="Gene Disease Association Prediction",
-    description = (
-    "This AI model predicts the top 100 genes associated with a given disease based on 16,733 genes."
-    " To get started, you need a Disease ID (UMLS CUI), which can be obtained from the DisGeNET database. "
-    "\n\n**Steps to Obtain a Disease ID from DisGeNET:**\n"
-    "1. Visit the DisGeNET website: [https://www.disgenet.org/search](https://www.disgenet.org/search).\n"
-    "2. Use the search bar to enter your disease of interest. For instance, if you're interested in 'Alzheimer's Disease', type 'Alzheimer's Disease' into the search bar.\n"
-    "3. From the search results, identify the disease you're researching. The Disease ID (UMLS CUI) is listed alongside each disease name, e.g. C0002395.\n"
-    "4. Enter the Disease ID into the input box below and submit.\n\n"
-    "The DisGeNET database contains all known gene-disease associations and associated evidence. In addition, it is able to find the corresponding diseases based on a gene.\n"
-    "\n**The model will take about 18 minutes to inference a new disease.**\n"
-    )
-)
-iface.launch(share=True)

.ipynb_checkpoints/model-checkpoint.sh DELETED Viewed

@@ -1,24 +0,0 @@
-#!/bin/bash
-input_csv_path="$1"
-output_csv_path="$2"
-max_depth=6
-device='cuda:0'
-model_path_list=(
-    "../../save_model_ckp/gda_infoNCE_2024_GPU3090" \
-)
-cd ../src/finetune/
-for save_model_path in ${model_path_list[@]}; do
-        num_leaves=$((2**($max_depth-1)))
-        python finetune.py \
-                --input_csv_path $input_csv_path \
-                --output_csv_path $output_csv_path \
-                --save_model_path $save_model_path \
-                --device $device \
-                --batch_size 128 \
-                --step "300" \
-                --use_pooled \
-                --num_leaves $num_leaves \
-                --max_depth $max_depth
-done

.ipynb_checkpoints/requirements-checkpoint.txt DELETED Viewed

@@ -1,7 +0,0 @@
-lightgbm
-pytorch-metric-learning
-torch
-transformers
-PyTDC
-gradio
-numpy