diff options
Diffstat (limited to 'main.py')
-rw-r--r-- | main.py | 23 |
1 files changed, 13 insertions, 10 deletions
@@ -27,7 +27,7 @@ def preprocess_data(df): df['Seats'] = df['Seats'].fillna(df['Seats'].mode()[0]) # Remove rows with missing target variable - df = df.dropna(subset=['Mileage Km/L']) + df = df.dropna(subset=['Mileage Km/L', 'Price']) # Remove outliers in 'Kilometers Driven' q1, q3 = df['Kilometers_Driven'].quantile([0.25, 0.75]) @@ -40,10 +40,10 @@ def preprocess_data(df): return df # Train Model -def train_model(df): +def train_model(df, target, model_name): # Features and target - X = df.drop(columns=['Mileage Km/L', 'Name', 'Price']) - y = df['Mileage Km/L'] + X = df.drop(columns=['Mileage Km/L', 'Price', 'Name']) + y = df[target] # Split data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) @@ -81,14 +81,15 @@ def train_model(df): y_pred = best_model.predict(X_test) # Evaluation - print("Model Performance:") + print(f"Model Performance for {target}:") print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f}") print(f"RMSE: {mean_squared_error(y_test, y_pred, squared=False):.2f}") print(f"R^2: {r2_score(y_test, y_pred):.2f}") # Save the model - joblib.dump(best_model, 'model.pkl') - print("Model saved as 'model.pkl'") + model_file = f'{model_name}.pkl' + joblib.dump(best_model, model_file) + print("Model saved as '{model_file}'") # Main Function def main(): @@ -98,10 +99,12 @@ def main(): df = preprocess_data(df) print("Data preprocessing complete.") - print("Model is training...") - train_model(df) - print("Model is trained.") + print("Training mileage prediction model...") + train_model(df, target='Mileage Km/L', model_name='mileage_predictor') + + print("Training price prediction model...") + train_model(df, target='Price', model_name='price_predictor') if __name__ == "__main__": main() |