#!/usr/bin/env python
# coding: utf-8
# In[1]:
from datasets import Dataset, ClassLabel, Sequence, load_dataset, load_metric
import numpy as np
import pandas as pd
import bioc
from spacy import displacy
import transformers
#import evaluate
from transformers import (AutoModelForTokenClassification,
AutoTokenizer,
DataCollatorForTokenClassification,
pipeline,
TrainingArguments,
Trainer)
# In[2]:
label_list = ['O', 'B-DRUG', 'I-DRUG', 'B-DISEASE', 'I-DISEASE', 'B-GENE', 'I-GENE']
model_checkpoint = './trainedSB2'
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForTokenClassification.from_pretrained(model_checkpoint, num_labels=len(label_list))
effect_ner_model = pipeline(task="ner", model=model, tokenizer=tokenizer)
# In[21]:
def visualize_entities(sentence):
tokens = effect_ner_model(sentence)
entities = []
# ['O', 'B-DRUG', 'I-DRUG', 'B-DISEASE', 'I-DISEASE', 'B-GENE', 'I-GENE']
for token in tokens:
label = int(token["entity"][-1])
if label != 0:
token["label"] = label_list[label]
entities.append(token)
params = [{"text": sentence,
"ents": entities,
"title": None}]
html = displacy.render(params, style="ent", manual=True, options={
"colors": {
"B-DRUG": "#f08080",
"I-DRUG": "#f08080",
"B-DISEASE": "#9bddff",
"I-DISEASE": "#9bddff",
"B-GENE": "#008080",
"I-GENE": "#008080",
},
})
return html
# In[25]:
import gradio as gr
exampleList = [
"The BCR::ABL1 tyrosine kinase protein is significant in the context of Imatinib therapy for chronic myeloid leukemia (CML) due to its role as the target of Imatinib, which inhibits its activity and helps control the progression of CML.",
"Famotidine is a histamine H2-receptor antagonist used in inpatient settings for the prevention of stress ulcers and is gaining popularity due to its low cost and effectiveness in reducing gastric acid secretion.",
"A randomized Phase III trial demonstrated noninferiority of APF530 500 mg SC (granisetron 10 mg) to intravenous palonosetron 0.25 mg in preventing chemotherapy-induced nausea and vomiting (CINV) in patients receiving moderately or highly emetogenic chemotherapy (MEC or HEC) in both acute (0 - 24 hours) and delayed (24 - 120 hours) settings, with sustained activity over 120 hours.",
"The known interactions between Aspirin and the COX-1 enzyme involve the irreversible inhibition of COX-1, leading to reduced production of prostaglandins and thromboxane A2, which are involved in platelet aggregation and inflammation.",
"The mechanism of action of Metformin involves the activation of AMP-activated protein kinase (AMPK) in liver cells, which leads to decreased glucose production and increased glucose uptake in peripheral tissues, resulting in improved insulin sensitivity and reduced blood glucose levels.",
"Genetic variations in the CYP2C9 gene may influence the response to Warfarin therapy by affecting the metabolism of Warfarin in the liver, leading to variations in the drug's anticoagulant effect.",
"Herceptin targets the HER2/neu protein in breast cancer treatment and works by binding to the HER2 receptors on cancer cells, blocking their growth signals, and promoting immune-mediated destruction of the cancer cells.",
"The common side effects associated with Lisinopril, an angiotensin-converting enzyme (ACE) inhibitor, include dizziness, dry cough, hypotension, and an increased risk of hyperkalemia.",
"Ibuprofen affects the COX-2 enzyme differently compared to COX-1 by selectively inhibiting COX-2, which is responsible for inflammation, without significantly affecting COX-1, which plays a role in protecting the stomach lining.",
"Recent studies explore the use of Pembrolizab as an immune checkpoint inhibitor targeting PD-1 in various types of cancer, showing promising results in improving the antitumor immune response and increasing survival rates.",
"The SLC6A4 gene is associated with serotonin reuptake inhibitors (SSRIs) like Fluoxetine, and genetic variations in this gene may influence individual responses to SSRIs and affect their efficacy in treating depression and other psychiatric disorders.",
"Insights into the BRAF mutation and its relevance in response to Vemurafenib treatment in melanoma patients indicate that Vemurafenib, a BRAF inhibitor, is particularly effective in patients with the BRAF V600E mutation, leading to tumor regression and improved outcomes."
]
footer = """
LLMGeneLinker uses a domain-specific transformer like SciBERT finetuned on AllenAI drug dataset, BC5CDR disease, NCBI disease, DrugProt and GeneTAG datasets. The resulting SciBERT model performs Named Entity Recognition to tag drug, protein, gene, diseases in input text. Sentence embedding of SciBERT is then fed into BERT
This was made during the LLMs for Bio Hackathon organised by 4Catalyzer and SGInnovate.
Made by Team GeneLink (Nicholas, Yew Chong, Ting Wei, Brendan)