diff options
author | Biswakalyan Bhuyan <biswa@surgot.in> | 2024-11-27 20:03:53 +0530 |
---|---|---|
committer | Biswakalyan Bhuyan <biswa@surgot.in> | 2024-11-27 20:03:53 +0530 |
commit | 6670a8dfc419cf3d1f60427774de99e7010987e5 (patch) | |
tree | f554360fe4cb7dfcc091643979e8538eb4e17a03 /main.py | |
parent | 7947a931d66697bca3af1003703296ee0edcdfd0 (diff) | |
download | autopredict-6670a8dfc419cf3d1f60427774de99e7010987e5.tar.gz autopredict-6670a8dfc419cf3d1f60427774de99e7010987e5.tar.bz2 autopredict-6670a8dfc419cf3d1f60427774de99e7010987e5.zip |
Added the Feature to predict price
Diffstat (limited to 'main.py')
-rw-r--r-- | main.py | 23 |
1 files changed, 13 insertions, 10 deletions
@@ -27,7 +27,7 @@ def preprocess_data(df): df['Seats'] = df['Seats'].fillna(df['Seats'].mode()[0]) # Remove rows with missing target variable - df = df.dropna(subset=['Mileage Km/L']) + df = df.dropna(subset=['Mileage Km/L', 'Price']) # Remove outliers in 'Kilometers Driven' q1, q3 = df['Kilometers_Driven'].quantile([0.25, 0.75]) @@ -40,10 +40,10 @@ def preprocess_data(df): return df # Train Model -def train_model(df): +def train_model(df, target, model_name): # Features and target - X = df.drop(columns=['Mileage Km/L', 'Name', 'Price']) - y = df['Mileage Km/L'] + X = df.drop(columns=['Mileage Km/L', 'Price', 'Name']) + y = df[target] # Split data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) @@ -81,14 +81,15 @@ def train_model(df): y_pred = best_model.predict(X_test) # Evaluation - print("Model Performance:") + print(f"Model Performance for {target}:") print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f}") print(f"RMSE: {mean_squared_error(y_test, y_pred, squared=False):.2f}") print(f"R^2: {r2_score(y_test, y_pred):.2f}") # Save the model - joblib.dump(best_model, 'model.pkl') - print("Model saved as 'model.pkl'") + model_file = f'{model_name}.pkl' + joblib.dump(best_model, model_file) + print("Model saved as '{model_file}'") # Main Function def main(): @@ -98,10 +99,12 @@ def main(): df = preprocess_data(df) print("Data preprocessing complete.") - print("Model is training...") - train_model(df) - print("Model is trained.") + print("Training mileage prediction model...") + train_model(df, target='Mileage Km/L', model_name='mileage_predictor') + + print("Training price prediction model...") + train_model(df, target='Price', model_name='price_predictor') if __name__ == "__main__": main() |