Spaces:

Moditha24
/

ML_project

Sleeping

App Files Files Community

ML_project / app.py

Moditha24

Update app.py

e8182c9 verified 9 months ago

raw

history blame contribute delete

6.03 kB

	import gradio as gr
	import numpy as np
	from joblib import load
	from tensorflow.keras.models import load_model
	import tensorflow as tf
	import pickle
	import pandas as pd # Ensure pandas is imported for DataFrame operations
	from sklearn.preprocessing import OneHotEncoder
	from sklearn.compose import ColumnTransformer
	# Load dataset
	df = pd.read_csv('processed_data.csv') # Replace with the correct path to your dataset

	# Load the LabelEncoder and ColumnTransformer before prediction
	with open('label_encoder.pkl', 'rb') as f:
	label_encoder = pickle.load(f)

	categorical_features = [0, 1, 9, 10] # Update if column positions change

	ct = ColumnTransformer(
	transformers=[('encoder', OneHotEncoder(sparse_output=False, drop="first"), categorical_features)],
	remainder="passthrough"
	)

	# Fit it using your training data
	ct.fit(df[['Gender', 'Race (Reported)', 'Age', 'Height (cm)', 'Weight (kg)',
	'Diabetes', 'Simvastatin (Zocor)', 'Amiodarone (Cordarone)',
	'INR on Reported Therapeutic Dose of Warfarin', 'Cyp2C9 genotypes',
	'VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T']])

	# Assuming 'ct' is your ColumnTransformer (replace this with the actual loading code for your preprocessor)
	# Make sure that 'ct' is properly loaded, or use the same transformation logic here.

	# UI Components for user input
	input_Gender = gr.Radio(["male", "female"], label="Gender")
	input_Race = gr.Dropdown(list(dict(df['Race (Reported)'].value_counts()).keys()), label="Race")
	input_Age = gr.Dropdown(list(dict(df['Age'].value_counts())), label='Age')
	input_Height = gr.Number(label='Height (cm)')
	input_Weight = gr.Number(label='Weight (kg)')
	input_Diabetes = gr.Radio([0.0, 1.0], label='Diabetes')
	input_Simvastatin = gr.Radio([0.0, 1.0], label='Simvastatin (Zocor)')
	input_Amiodarone = gr.Radio([0.0, 1.0], label='Amiodarone (Cordarone)')
	input_INR_reported = gr.Number(label='INR on Reported Therapeutic Dose of Warfarin')
	input_Cyp2C9_genotypes = gr.Dropdown(list(dict(df['Cyp2C9 genotypes'].value_counts())), label='Cyp2C9 genotypes')
	input_VKORC1_genotypes = gr.Radio(list(dict(df['VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T'].value_counts())), label='VKORC1 genotypes')
	input_model = gr.Dropdown(['Decision Tree Regression', 'Support Vector Regression', 'Random Forest Regression', 'Deep Learning'], label='Model Selection')

	# Output textbox to display predicted dose
	output_warfarin_dosage = gr.Textbox(label='Therapeutic Dose of Warfarin')

	# Prediction function with renamed input variables
	def predict_dosage(gender, race, age, height, weight, diabetes, simvastatin, amiodarone, inr, cyp2c9, vkorc1, selected_model):
	import numpy as np
	from joblib import load
	from tensorflow.keras.models import load_model
	import tensorflow as tf

	# Optional debug function to inspect data before prediction
	def print_input_debug(transformed_input, final_array):
	print("Transformed input shape:", transformed_input.shape)
	print("Final input shape:", final_array.shape)
	print("Input data type:", final_array.dtype)

	try:
	# Load the selected model
	if selected_model == 'Deep Learning':
	model = load_model('best_DeepLearning_model (2).h5')
	elif selected_model == 'Support Vector Regression':
	model = load('SVR_optimized.joblib')
	elif selected_model == 'Random Forest Regression':
	model = load('RandomForestRegressor_optimized.joblib')
	else:
	model = load("DecisionTreeRegressor_optimized.joblib")

	# Handle unseen labels by attempting to map them to known labels
	def safe_transform_label(encoder, label, default=None):
	try:
	return encoder.transform([label])[0]
	except ValueError:
	# If label is unseen, return default (e.g., most frequent label or a fallback value)
	return default if default is not None else encoder.transform([encoder.classes_[0]])[0]

	# Encode Age using LabelEncoder (catching unseen labels)
	encoded_age = safe_transform_label(label_encoder, age, default=label_encoder.classes_[0])

	# Ensure numerical inputs are valid floats
	height = float(height) if height is not None else 0.0
	weight = float(weight) if weight is not None else 0.0
	inr = float(inr) if inr is not None else 0.0

	# Assemble input for transformation
	raw_inputs = [
	str(gender),
	str(race),
	str(age),
	height,
	weight,
	float(diabetes),
	float(simvastatin),
	float(amiodarone),
	inr,
	str(cyp2c9),
	str(vkorc1)
	]

	# Apply preprocessing pipeline (ct should be defined or loaded)
	transformed_input = ct.transform([raw_inputs])
	transformed_input[0][-7] = encoded_age # Age is encoded, so replace it in the transformed input

	# Convert to NumPy array for model input
	input_array = np.array(transformed_input, dtype=np.float32)
	print_input_debug(transformed_input, input_array)

	# Predict using appropriate model type
	if selected_model == 'Deep Learning':
	tensor_input = tf.convert_to_tensor(input_array)
	prediction = model.predict(tensor_input, verbose=0)
	return float(prediction[0][0])
	else:
	prediction = model.predict(input_array)
	return float(prediction[0])

	except Exception as e:
	print(f"Error in prediction: {str(e)}")
	return f"Error in prediction: {str(e)}"

	# Launch Gradio app
	gr.Interface(
	fn=predict_dosage,
	inputs=[input_Gender, input_Race, input_Age, input_Height, input_Weight,
	input_Diabetes, input_Simvastatin, input_Amiodarone,
	input_INR_reported, input_Cyp2C9_genotypes, input_VKORC1_genotypes, input_model],
	outputs=[output_warfarin_dosage]
	).launch(debug=True)