From 6670a8dfc419cf3d1f60427774de99e7010987e5 Mon Sep 17 00:00:00 2001 From: Biswakalyan Bhuyan Date: Wed, 27 Nov 2024 20:03:53 +0530 Subject: Added the Feature to predict price --- main.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'main.py') diff --git a/main.py b/main.py index 6fbb0c2..3355b34 100644 --- a/main.py +++ b/main.py @@ -27,7 +27,7 @@ def preprocess_data(df): df['Seats'] = df['Seats'].fillna(df['Seats'].mode()[0]) # Remove rows with missing target variable - df = df.dropna(subset=['Mileage Km/L']) + df = df.dropna(subset=['Mileage Km/L', 'Price']) # Remove outliers in 'Kilometers Driven' q1, q3 = df['Kilometers_Driven'].quantile([0.25, 0.75]) @@ -40,10 +40,10 @@ def preprocess_data(df): return df # Train Model -def train_model(df): +def train_model(df, target, model_name): # Features and target - X = df.drop(columns=['Mileage Km/L', 'Name', 'Price']) - y = df['Mileage Km/L'] + X = df.drop(columns=['Mileage Km/L', 'Price', 'Name']) + y = df[target] # Split data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) @@ -81,14 +81,15 @@ def train_model(df): y_pred = best_model.predict(X_test) # Evaluation - print("Model Performance:") + print(f"Model Performance for {target}:") print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f}") print(f"RMSE: {mean_squared_error(y_test, y_pred, squared=False):.2f}") print(f"R^2: {r2_score(y_test, y_pred):.2f}") # Save the model - joblib.dump(best_model, 'model.pkl') - print("Model saved as 'model.pkl'") + model_file = f'{model_name}.pkl' + joblib.dump(best_model, model_file) + print("Model saved as '{model_file}'") # Main Function def main(): @@ -98,10 +99,12 @@ def main(): df = preprocess_data(df) print("Data preprocessing complete.") - print("Model is training...") - train_model(df) - print("Model is trained.") + print("Training mileage prediction model...") + train_model(df, target='Mileage Km/L', model_name='mileage_predictor') + + print("Training price prediction model...") + train_model(df, target='Price', model_name='price_predictor') if __name__ == "__main__": main() -- cgit v1.2.3-59-g8ed1b