Spaces:

DjPapzin
/

Medi-Scape

Sleeping

App Files Files Community

Medi-Scape / Symptoms_Detection /app.py

DjPapzin

Add falcondemo.mp4 and .keras model to Git LFS

c77acf1 8 months ago

raw

history blame contribute delete

5.6 kB


	import pandas as pd

	dataset_1= pd.read_csv("training_data.csv")
	#dataset_1

	#for i in dataset_1.columns:

	#print(i)


	# Create a new column with merged column names where value is 1
	dataset_1['symptoms_text'] = dataset_1.apply(lambda row: ','.join([col for col in dataset_1.columns if row[col] == 1]), axis=1)

	#print("Original DataFrame:")
	#print(dataset_1)



	#dataset_1.to_csv("training_data_after_changes.csv")


	final_dataset = pd.DataFrame(dataset_1[["prognosis","symptoms_text"]])
	final_dataset.columns = ['label', 'text']
	#final_dataset.to_csv("final_dataset.csv")
	#final_dataset

	##############3
	import pandas as pd
	dataset_2= pd.read_csv("Symptom2Disease.csv")
	dataset_2 = dataset_2[["label","text"]]
	#dataset_2

	#################
	df_combined = pd.concat([final_dataset, dataset_2], axis=0, ignore_index=True)
	#df_combined

	################
	import nltk
	nltk.download('stopwords')
	import pandas as pd
	import re
	import string
	from nltk.tokenize import word_tokenize
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer


	# Download necessary NLTK data files
	nltk.download('punkt')
	nltk.download('stopwords')
	nltk.download('wordnet')

	def preprocess_text(text):
	# Convert to lowercase
	text = text.lower()

	cleaned_text = re.sub(r'[^a-zA-Z0-9\s\,]', ' ', text)
	# Tokenize text
	tokens = word_tokenize(cleaned_text)

	# Remove stop words
	stop_words = set(stopwords.words('english'))
	tokens = [word for word in tokens if word not in stop_words]


	# Rejoin tokens into a single string
	cleaned_text = ' '.join(tokens)

	return cleaned_text

	df_combined["cleaned_text"] = df_combined["text"].apply(preprocess_text)

	#print(df_combined)


	###########
	#df_combined.to_csv("final_dataset_llms.csv")

	###########

	import pandas as pd
	from sklearn.feature_extraction.text import CountVectorizer
	print("scikit-learn imported successfully!")
	from sklearn.model_selection import train_test_split
	from sklearn.linear_model import LogisticRegression
	from sklearn.metrics import accuracy_score, classification_report

	# Load your dataset
	data = pd.read_csv('final_dataset_llms.csv') # Replace with your file path

	# Example columns: 'symptoms' and 'label'
	X = data['cleaned_text']
	y = data['label']

	# Convert text data to numerical data
	vectorizer = CountVectorizer()
	X_vectorized = vectorizer.fit_transform(X)

	# Split the data into training and testing sets
	X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

	# Train the model
	model = LogisticRegression()
	model.fit(X_train, y_train)

	# Make predictions
	y_pred = model.predict(X_test)

	# Evaluate the model
	accuracy = accuracy_score(y_test, y_pred)
	print(f'Accuracy: {accuracy:.2f}')
	print('Classification Report:')
	print(classification_report(y_test, y_pred))

	########################pip
	#########################
	###############################
	###########################################

	data['label'].nunique()

	#############################################

	def precaution(label):
	dataset_precau = pd.read_csv("disease_precaution.csv", encoding='latin1')
	label = str(label)
	label = label.lower()

	dataset_precau["Disease"] = dataset_precau["Disease"].str.lower()
	# Filter the DataFrame for the given label
	filtered_precautions = dataset_precau[dataset_precau["Disease"] == label]

	# Extract precaution columns
	precautions = filtered_precautions[["Precaution_1", "Precaution_2", "Precaution_3", "Precaution_4"]]
	return precautions.values.tolist() # Convert DataFrame to a list of lists
	# Return an empty list if no matching label is found

	def occurance(label):
	dataset_occur = pd.read_csv("disease_riskFactors.csv", encoding='latin1')
	label = str(label)
	label = label.lower()

	dataset_occur["DNAME"] = dataset_occur["DNAME"].str.lower()
	# Filter the DataFrame for the given label
	filtered_occurrence = dataset_occur[dataset_occur["DNAME"] == label]

	occurrences = filtered_occurrence["OCCUR"].tolist() # Convert Series to list
	return occurrences
	# Return an empty list if no matching label is found
	################################################################################

	import streamlit as st
	import numpy as np
	import sklearn
	from sklearn.feature_extraction.text import CountVectorizer

	st.title("SYMPTOMS DETECTION, PRECAUTION n OCCURANCE")

	symptoms = st.text_area("Enter your symptoms (comma-separated):")

	if symptoms.lower() != "exit":
	# Convert input string to a list of symptoms


	# Function to predict new symptoms
	def predict_symptoms(new_symptoms):
	preprocessed_text = preprocess_text(new_symptoms)

	if isinstance(preprocessed_text, str):
	new_symptoms = [preprocessed_text]

	# Vectorize the new symptoms
	new_symptoms_vectorized = vectorizer.transform(new_symptoms)
	# Make predictions
	prediction = model.predict(new_symptoms_vectorized)

	return prediction

	st.write("disease :")
	symptoms_list = [symptom.strip() for symptom in symptoms.split(',')]

	# Predict symptoms
	prediction = predict_symptoms(' '.join(symptoms_list))


	st.write(prediction)
	st.write("precautions:")
	precautions_names = precaution(prediction)
	st.write(precautions_names)
	st.write("Occurance:")
	occurance_name = occurance(prediction)
	st.write(occurance_name)

	else:
	st.write("Please enter symptoms to get the disease.")




	# Get user input

	# Make a prediction