{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "AIMERS" ], "metadata": { "id": "D1-ngpe5C5_X" } }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "FU57l9-06L5O", "outputId": "66c73c45-6609-4209-d235-c5164cdc2f44" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.9/dist-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML\n", " warnings.warn(\"Can't initialize NVML\")\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Accuracy: 0.0\n", "Classification Report:\n", " precision recall f1-score support\n", "\n", " Acanthosis nigricans 0.00 0.00 0.00 0.0\n", " Acariasis 0.00 0.00 0.00 0.0\n", " Acne 0.00 0.00 0.00 0.0\n", " Acute bronchitis 0.00 0.00 0.00 1.0\n", " Acute bronchospasm 0.00 0.00 0.00 1.0\n", " Acute glaucoma 0.00 0.00 0.00 1.0\n", " Acute pancreatitis 0.00 0.00 0.00 0.0\n", " Acute stress reaction 0.00 0.00 0.00 1.0\n", " Adjustment reaction 0.00 0.00 0.00 1.0\n", " Alcohol intoxication 0.00 0.00 0.00 0.0\n", " Alcohol withdrawal 0.00 0.00 0.00 1.0\n", " Alcoholic liver disease 0.00 0.00 0.00 0.0\n", " Allergy 0.00 0.00 0.00 0.0\n", " Allergy to animals 0.00 0.00 0.00 1.0\n", " Anemia due to chronic kidney disease 0.00 0.00 0.00 1.0\n", " Anemia of chronic disease 0.00 0.00 0.00 1.0\n", " Angina 0.00 0.00 0.00 0.0\n", " Ankylosing spondylitis 0.00 0.00 0.00 0.0\n", " Aphakia 0.00 0.00 0.00 0.0\n", " Aphthous ulcer 0.00 0.00 0.00 1.0\n", " Arthritis of the hip 0.00 0.00 0.00 1.0\n", " Asthma 0.00 0.00 0.00 0.0\n", " Atelectasis 0.00 0.00 0.00 0.0\n", " Athlete's foot 0.00 0.00 0.00 1.0\n", " Atonic bladder 0.00 0.00 0.00 0.0\n", " Atrial fibrillation 0.00 0.00 0.00 0.0\n", " Benign vaginal discharge (leukorrhea) 0.00 0.00 0.00 0.0\n", " Bipolar disorder 0.00 0.00 0.00 1.0\n", " Birth trauma 0.00 0.00 0.00 0.0\n", " Bladder cancer 0.00 0.00 0.00 0.0\n", " Breast cancer 0.00 0.00 0.00 1.0\n", " Breast cyst 0.00 0.00 0.00 0.0\n", " Bursitis 0.00 0.00 0.00 1.0\n", " Carbon monoxide poisoning 0.00 0.00 0.00 0.0\n", " Cellulitis or abscess of mouth 0.00 0.00 0.00 1.0\n", " Cervicitis 0.00 0.00 0.00 0.0\n", " Chalazion 0.00 0.00 0.00 0.0\n", " Cholecystitis 0.00 0.00 0.00 0.0\n", " Choledocholithiasis 0.00 0.00 0.00 0.0\n", " Cholesteatoma 0.00 0.00 0.00 0.0\n", " Chondromalacia of the patella 0.00 0.00 0.00 0.0\n", " Chronic back pain 0.00 0.00 0.00 0.0\n", " Chronic glaucoma 0.00 0.00 0.00 1.0\n", " Chronic kidney disease 0.00 0.00 0.00 0.0\n", " Chronic obstructive pulmonary disease (COPD) 0.00 0.00 0.00 0.0\n", " Chronic otitis media 0.00 0.00 0.00 1.0\n", " Chronic pain disorder 0.00 0.00 0.00 1.0\n", " Chronic pancreatitis 0.00 0.00 0.00 1.0\n", " Chronic rheumatic fever 0.00 0.00 0.00 0.0\n", " Chronic ulcer 0.00 0.00 0.00 0.0\n", " Cirrhosis 0.00 0.00 0.00 1.0\n", " Cold sore 0.00 0.00 0.00 0.0\n", " Colorectal cancer 0.00 0.00 0.00 0.0\n", " Congenital rubella 0.00 0.00 0.00 1.0\n", " Conjunctivitis due to allergy 0.00 0.00 0.00 1.0\n", " Coronary atherosclerosis 0.00 0.00 0.00 1.0\n", " Croup 0.00 0.00 0.00 0.0\n", " Crushing injury 0.00 0.00 0.00 1.0\n", " Cyst of the eyelid 0.00 0.00 0.00 1.0\n", " Cystic Fibrosis 0.00 0.00 0.00 1.0\n", " Cytomegalovirus infection 0.00 0.00 0.00 1.0\n", " De Quervain disease 0.00 0.00 0.00 1.0\n", " Degenerative disc disease 0.00 0.00 0.00 1.0\n", " Dengue fever 0.00 0.00 0.00 0.0\n", " Depression 0.00 0.00 0.00 0.0\n", " Diabetes insipidus 0.00 0.00 0.00 1.0\n", " Diaper rash 0.00 0.00 0.00 0.0\n", " Dislocation of the ankle 0.00 0.00 0.00 0.0\n", " Dislocation of the finger 0.00 0.00 0.00 1.0\n", " Dislocation of the foot 0.00 0.00 0.00 1.0\n", " Dislocation of the hip 0.00 0.00 0.00 1.0\n", " Dislocation of the shoulder 0.00 0.00 0.00 0.0\n", " Dissociative disorder 0.00 0.00 0.00 1.0\n", " Down syndrome 0.00 0.00 0.00 1.0\n", " Drug abuse (cocaine) 0.00 0.00 0.00 0.0\n", " Drug reaction 0.00 0.00 0.00 1.0\n", " Dry eye of unknown cause 0.00 0.00 0.00 0.0\n", " Dyshidrosis 0.00 0.00 0.00 1.0\n", " Ear drum damage 0.00 0.00 0.00 0.0\n", " Ear wax impaction 0.00 0.00 0.00 1.0\n", " Emphysema 0.00 0.00 0.00 0.0\n", " Empyema 0.00 0.00 0.00 1.0\n", " Encephalitis 0.00 0.00 0.00 0.0\n", " Endocarditis 0.00 0.00 0.00 1.0\n", " Endometrial hyperplasia 0.00 0.00 0.00 1.0\n", " Esophageal cancer 0.00 0.00 0.00 0.0\n", " Essential tremor 0.00 0.00 0.00 1.0\n", " Factitious disorder 0.00 0.00 0.00 1.0\n", " Fat embolism 0.00 0.00 0.00 1.0\n", " Female genitalia infection 0.00 0.00 0.00 1.0\n", " Fibroadenoma 0.00 0.00 0.00 1.0\n", " Fibromyalgia 0.00 0.00 0.00 0.0\n", " Floaters 0.00 0.00 0.00 0.0\n", " Fluid overload 0.00 0.00 0.00 1.0\n", " Foreign body in the eye 0.00 0.00 0.00 0.0\n", " Foreign body in the throat 0.00 0.00 0.00 0.0\n", " Foreign body in the vagina 0.00 0.00 0.00 0.0\n", " Fracture of the ankle 0.00 0.00 0.00 1.0\n", " Fracture of the arm 0.00 0.00 0.00 1.0\n", " Fracture of the finger 0.00 0.00 0.00 0.0\n", " Fracture of the hand 0.00 0.00 0.00 0.0\n", " Fracture of the jaw 0.00 0.00 0.00 1.0\n", " Fracture of the leg 0.00 0.00 0.00 0.0\n", " Fracture of the patella 0.00 0.00 0.00 1.0\n", " G6PD enzyme deficiency 0.00 0.00 0.00 0.0\n", " Galactorrhea of unknown cause 0.00 0.00 0.00 0.0\n", " Gallstone 0.00 0.00 0.00 0.0\n", " Gastritis 0.00 0.00 0.00 0.0\n", " Gastroduodenal ulcer 0.00 0.00 0.00 1.0\n", " Gout 0.00 0.00 0.00 0.0\n", " Granuloma inguinale 0.00 0.00 0.00 0.0\n", " Gynecomastia 0.00 0.00 0.00 0.0\n", " Hashimoto thyroiditis 0.00 0.00 0.00 1.0\n", " Head and neck cancer 0.00 0.00 0.00 1.0\n", " Heart attack 0.00 0.00 0.00 1.0\n", " Heart contusion 0.00 0.00 0.00 0.0\n", " Heart failure 0.00 0.00 0.00 1.0\n", " Hemarthrosis 0.00 0.00 0.00 1.0\n", " Hematoma 0.00 0.00 0.00 1.0\n", " Hemolytic anemia 0.00 0.00 0.00 1.0\n", " High blood pressure 0.00 0.00 0.00 0.0\n", " Hirsutism 0.00 0.00 0.00 1.0\n", " Human immunodeficiency virus infection (HIV) 0.00 0.00 0.00 1.0\n", " Hydatidiform mole 0.00 0.00 0.00 1.0\n", " Hydrocele of the testicle 0.00 0.00 0.00 0.0\n", " Hydronephrosis 0.00 0.00 0.00 1.0\n", " Hyperemesis gravidarum 0.00 0.00 0.00 0.0\n", " Hypergammaglobulinemia 0.00 0.00 0.00 1.0\n", " Hyperkalemia 0.00 0.00 0.00 0.0\n", " Hypernatremia 0.00 0.00 0.00 1.0\n", "Hypertrophic obstructive cardiomyopathy (HOCM) 0.00 0.00 0.00 1.0\n", " Hyponatremia 0.00 0.00 0.00 0.0\n", " Impetigo 0.00 0.00 0.00 1.0\n", " Indigestion 0.00 0.00 0.00 1.0\n", " Infectious gastroenteritis 0.00 0.00 0.00 1.0\n", " Ingrown toe nail 0.00 0.00 0.00 1.0\n", " Inguinal hernia 0.00 0.00 0.00 0.0\n", " Injury of the ankle 0.00 0.00 0.00 0.0\n", " Injury to the abdomen 0.00 0.00 0.00 1.0\n", " Injury to the finger 0.00 0.00 0.00 1.0\n", " Injury to the hip 0.00 0.00 0.00 1.0\n", " Injury to the knee 0.00 0.00 0.00 0.0\n", " Insect bite 0.00 0.00 0.00 0.0\n", " Intestinal cancer 0.00 0.00 0.00 1.0\n", " Intestinal malabsorption 0.00 0.00 0.00 1.0\n", " Intestinal obstruction 0.00 0.00 0.00 0.0\n", " Intracranial abscess 0.00 0.00 0.00 1.0\n", " Irritable bowel syndrome 0.00 0.00 0.00 0.0\n", " Kaposi sarcoma 0.00 0.00 0.00 1.0\n", " Kidney cancer 0.00 0.00 0.00 1.0\n", " Kidney stone 0.00 0.00 0.00 1.0\n", " Knee ligament or meniscus tear 0.00 0.00 0.00 1.0\n", " Lactose intolerance 0.00 0.00 0.00 1.0\n", " Leishmaniasis 0.00 0.00 0.00 1.0\n", " Lichen planus 0.00 0.00 0.00 1.0\n", " Lipoma 0.00 0.00 0.00 1.0\n", " Lung cancer 0.00 0.00 0.00 1.0\n", " Lymphadenitis 0.00 0.00 0.00 0.0\n", " Lymphangitis 0.00 0.00 0.00 1.0\n", " Lymphogranuloma venereum 0.00 0.00 0.00 1.0\n", " Magnesium deficiency 0.00 0.00 0.00 1.0\n", " Malignant hypertension 0.00 0.00 0.00 1.0\n", " Marijuana abuse 0.00 0.00 0.00 0.0\n", " Mastoiditis 0.00 0.00 0.00 1.0\n", " Meckel diverticulum 0.00 0.00 0.00 0.0\n", " Migraine 0.00 0.00 0.00 1.0\n", " Mitral valve disease 0.00 0.00 0.00 1.0\n", " Molluscum contagiosum 0.00 0.00 0.00 1.0\n", " Mononucleosis 0.00 0.00 0.00 0.0\n", " Moyamoya disease 0.00 0.00 0.00 0.0\n", " Mucositis 0.00 0.00 0.00 0.0\n", " Mumps 0.00 0.00 0.00 1.0\n", " Muscle spasm 0.00 0.00 0.00 1.0\n", " Narcolepsy 0.00 0.00 0.00 0.0\n", " Neonatal jaundice 0.00 0.00 0.00 1.0\n", " Neurosis 0.00 0.00 0.00 0.0\n", " Noninfectious gastroenteritis 0.00 0.00 0.00 0.0\n", " Obstructive sleep apnea (OSA) 0.00 0.00 0.00 1.0\n", " Onychomycosis 0.00 0.00 0.00 0.0\n", " Open wound of the cheek 0.00 0.00 0.00 1.0\n", " Open wound of the finger 0.00 0.00 0.00 0.0\n", " Open wound of the hand 0.00 0.00 0.00 1.0\n", " Open wound of the head 0.00 0.00 0.00 1.0\n", " Open wound of the hip 0.00 0.00 0.00 0.0\n", " Open wound of the mouth 0.00 0.00 0.00 1.0\n", " Open wound of the neck 0.00 0.00 0.00 1.0\n", " Open wound of the shoulder 0.00 0.00 0.00 0.0\n", " Oral leukoplakia 0.00 0.00 0.00 0.0\n", " Oral mucosal lesion 0.00 0.00 0.00 0.0\n", " Oral thrush (yeast infection) 0.00 0.00 0.00 1.0\n", " Osteoarthritis 0.00 0.00 0.00 0.0\n", " Otitis externa (swimmer's ear) 0.00 0.00 0.00 0.0\n", " Pancreatic cancer 0.00 0.00 0.00 1.0\n", " Panic disorder 0.00 0.00 0.00 0.0\n", " Parkinson disease 0.00 0.00 0.00 0.0\n", " Paronychia 0.00 0.00 0.00 0.0\n", " Patau syndrome 0.00 0.00 0.00 0.0\n", " Pelvic fistula 0.00 0.00 0.00 1.0\n", " Pelvic organ prolapse 0.00 0.00 0.00 0.0\n", " Pemphigus 0.00 0.00 0.00 0.0\n", " Pericarditis 0.00 0.00 0.00 1.0\n", " Perirectal infection 0.00 0.00 0.00 1.0\n", " Peritonsillar abscess 0.00 0.00 0.00 1.0\n", " Personality disorder 0.00 0.00 0.00 0.0\n", " Phimosis 0.00 0.00 0.00 1.0\n", " Pilonidal cyst 0.00 0.00 0.00 1.0\n", " Placental abruption 0.00 0.00 0.00 1.0\n", " Pleural effusion 0.00 0.00 0.00 1.0\n", " Pneumonia 0.00 0.00 0.00 0.0\n", " Pneumothorax 0.00 0.00 0.00 1.0\n", " Poisoning due to analgesics 0.00 0.00 0.00 1.0\n", " Poisoning due to antidepressants 0.00 0.00 0.00 0.0\n", " Polycystic ovarian syndrome (PCOS) 0.00 0.00 0.00 0.0\n", " Premature ovarian failure 0.00 0.00 0.00 1.0\n", " Premenstrual tension syndrome 0.00 0.00 0.00 0.0\n", " Problem during pregnancy 0.00 0.00 0.00 0.0\n", " Protein deficiency 0.00 0.00 0.00 0.0\n", " Pseudohypoparathyroidism 0.00 0.00 0.00 1.0\n", " Psoriasis 0.00 0.00 0.00 0.0\n", " Psychotic disorder 0.00 0.00 0.00 1.0\n", " Pulmonary embolism 0.00 0.00 0.00 0.0\n", " Pulmonary eosinophilia 0.00 0.00 0.00 1.0\n", " Pulmonary fibrosis 0.00 0.00 0.00 0.0\n", " Pyelonephritis 0.00 0.00 0.00 0.0\n", " Pyloric stenosis 0.00 0.00 0.00 1.0\n", " Rabies 0.00 0.00 0.00 0.0\n", " Reactive arthritis 0.00 0.00 0.00 1.0\n", " Sarcoidosis 0.00 0.00 0.00 1.0\n", " Scarlet fever 0.00 0.00 0.00 1.0\n", " Sciatica 0.00 0.00 0.00 0.0\n", " Scoliosis 0.00 0.00 0.00 1.0\n", " Scurvy 0.00 0.00 0.00 1.0\n", " Sebaceous cyst 0.00 0.00 0.00 0.0\n", " Sepsis 0.00 0.00 0.00 1.0\n", " Septic arthritis 0.00 0.00 0.00 1.0\n", " Shingles (herpes zoster) 0.00 0.00 0.00 0.0\n", " Sickle cell crisis 0.00 0.00 0.00 1.0\n", " Sjogren syndrome 0.00 0.00 0.00 1.0\n", " Skin pigmentation disorder 0.00 0.00 0.00 1.0\n", " Smoking or tobacco addiction 0.00 0.00 0.00 1.0\n", " Spermatocele 0.00 0.00 0.00 1.0\n", " Spondylitis 0.00 0.00 0.00 0.0\n", " Spondylolisthesis 0.00 0.00 0.00 1.0\n", " Spondylosis 0.00 0.00 0.00 0.0\n", " Sporotrichosis 0.00 0.00 0.00 1.0\n", " Sprain or strain 0.00 0.00 0.00 0.0\n", " Stenosis of the tear duct 0.00 0.00 0.00 1.0\n", " Strep throat 0.00 0.00 0.00 1.0\n", " Stress incontinence 0.00 0.00 0.00 1.0\n", " Stroke 0.00 0.00 0.00 1.0\n", " Subarachnoid hemorrhage 0.00 0.00 0.00 1.0\n", " Subconjunctival hemorrhage 0.00 0.00 0.00 1.0\n", " Tendinitis 0.00 0.00 0.00 1.0\n", " Testicular torsion 0.00 0.00 0.00 1.0\n", " Thoracic aortic aneurysm 0.00 0.00 0.00 1.0\n", " Tietze syndrome 0.00 0.00 0.00 0.0\n", " Tonsillar hypertrophy 0.00 0.00 0.00 1.0\n", " Tonsillitis 0.00 0.00 0.00 0.0\n", " Tooth abscess 0.00 0.00 0.00 0.0\n", " Tooth disorder 0.00 0.00 0.00 0.0\n", " Torticollis 0.00 0.00 0.00 1.0\n", " Tourette syndrome 0.00 0.00 0.00 1.0\n", " Toxoplasmosis 0.00 0.00 0.00 1.0\n", " Tracheitis 0.00 0.00 0.00 1.0\n", " Transient ischemic attack 0.00 0.00 0.00 0.0\n", " Trichinosis 0.00 0.00 0.00 1.0\n", " Trichomonas infection 0.00 0.00 0.00 1.0\n", " Tricuspid valve disease 0.00 0.00 0.00 1.0\n", " Turner syndrome 0.00 0.00 0.00 1.0\n", " Urethral stricture 0.00 0.00 0.00 0.0\n", " Urge incontinence 0.00 0.00 0.00 1.0\n", " Urinary tract obstruction 0.00 0.00 0.00 0.0\n", " Vaginal yeast infection 0.00 0.00 0.00 0.0\n", " Vaginitis 0.00 0.00 0.00 0.0\n", " Varicocele of the testicles 0.00 0.00 0.00 1.0\n", " Viral exanthem 0.00 0.00 0.00 1.0\n", " Viral warts 0.00 0.00 0.00 0.0\n", " Vitamin A deficiency 0.00 0.00 0.00 1.0\n", " Vitreous degeneration 0.00 0.00 0.00 0.0\n", " Vulvar cancer 0.00 0.00 0.00 1.0\n", " Vulvar disorder 0.00 0.00 0.00 1.0\n", " Vulvodynia 0.00 0.00 0.00 1.0\n", " West Nile virus 0.00 0.00 0.00 1.0\n", " Whooping cough 0.00 0.00 0.00 0.0\n", " Wilson disease 0.00 0.00 0.00 0.0\n", "\n", " accuracy 0.00 160.0\n", " macro avg 0.00 0.00 0.00 160.0\n", " weighted avg 0.00 0.00 0.00 160.0\n", "\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n", "/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n", "/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n", "/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n", "/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n", "/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n" ] } ], "source": [ "import pandas as pd\n", "import re\n", "import spacy\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.metrics import accuracy_score, classification_report\n", "from sklearn.linear_model import LogisticRegression\n", "\n", "# Load the data\n", "data = pd.read_csv('symptomssingle.csv')\n", "\n", "# Check for any missing values and remove them\n", "data = data.dropna()\n", "\n", "# Define a function to separate symptoms and diseases from the text\n", "def separate_symptoms_and_diseases(text):\n", " symptoms = re.findall(r'{\"symptoms\":\"(.*?)\"}', text)\n", " disease = re.sub(r'(?:{\"symptoms\":\".*?\"},?)+', '', text).strip()\n", " disease = disease.replace('],', '').strip() # Remove '],' from the disease name\n", " return symptoms, disease\n", "\n", "# Apply the function to the data\n", "data['symptoms_and_diseases'] = data['data'].apply(separate_symptoms_and_diseases)\n", "data[['symptoms', 'disease']] = pd.DataFrame(data['symptoms_and_diseases'].tolist(), index=data.index)\n", "data = data.drop(columns=['data', 'symptoms_and_diseases'])\n", "\n", "# Load the spaCy model\n", "nlp = spacy.load('en_core_web_sm')\n", "\n", "# Preprocessing function\n", "def preprocess(symptoms):\n", " processed_symptoms = []\n", " for symptom in symptoms:\n", " doc = nlp(symptom)\n", " processed_symptom = ' '.join(token.lemma_.lower() for token in doc if not token.is_stop and token.is_alpha)\n", " processed_symptoms.append(processed_symptom)\n", " return ' '.join(processed_symptoms)\n", "\n", "# Preprocess the symptoms column\n", "data['symptoms_preprocessed'] = data['symptoms'].apply(preprocess)\n", "\n", "\n", "# Split the data into train and test sets\n", "X_train, X_test, y_train, y_test = train_test_split(data['symptoms_preprocessed'], data['disease'], test_size=0.2, random_state=42)\n", "\n", "# Create a pipeline for text classification\n", "pipeline = Pipeline([\n", " ('tfidf', TfidfVectorizer(ngram_range=(1, 2))),\n", " ('classifier', LogisticRegression(solver='liblinear', C=10))\n", "])\n", "\n", "# Train the model\n", "pipeline.fit(X_train, y_train)\n", "\n", "# Make predictions\n", "y_pred = pipeline.predict(X_test)\n", "\n", "# Evaluate the model\n", "print(\"Accuracy: \", accuracy_score(y_test, y_pred))\n", "print(\"Classification Report:\\n\", classification_report(y_test, y_pred))\n" ] }, { "cell_type": "code", "source": [ "!pip install joblib\n", "import joblib\n", "\n", "# Save the trained model\n", "joblib.dump(pipeline, 'DiseasePredictionBasedonSymptoms.joblib')\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "emwnJJVwAupA", "outputId": "c7d92a0a-5cbe-4e33-b48c-47f98c6cb2ca" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.9/dist-packages (1.1.1)\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "['DiseasePredictionBasedonSymptoms.joblib']" ] }, "metadata": {}, "execution_count": 2 } ] }, { "cell_type": "code", "source": [ "import joblib\n", "\n", "# Load the saved model\n", "loaded_pipeline = joblib.load('DiseasePredictionBasedonSymptoms.joblib')\n", "\n", "# Make predictions using the loaded model (example)\n", "sample_symptom = input(\"Enter the symptom: \")\n", "processed_symptom = preprocess([sample_symptom])\n", "prediction = loaded_pipeline.predict([processed_symptom])\n", "\n", "print(\"Predicted disease:\", prediction[0])\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Tu4fmj1bBYNw", "outputId": "e73e7e54-2e72-433a-a3d9-9577bdd774a3" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Enter the symptom: head ache\n", "Predicted disease: Protein deficiency\n" ] } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "CY5qrRCkBGuJ" }, "execution_count": null, "outputs": [] } ] }