From 6670a8dfc419cf3d1f60427774de99e7010987e5 Mon Sep 17 00:00:00 2001 From: Biswakalyan Bhuyan Date: Wed, 27 Nov 2024 20:03:53 +0530 Subject: Added the Feature to predict price --- main.py | 23 +++++++++++++---------- predict.py | 48 +++++++++++++++++++++++++++++++++--------------- 2 files changed, 46 insertions(+), 25 deletions(-) diff --git a/main.py b/main.py index 6fbb0c2..3355b34 100644 --- a/main.py +++ b/main.py @@ -27,7 +27,7 @@ def preprocess_data(df): df['Seats'] = df['Seats'].fillna(df['Seats'].mode()[0]) # Remove rows with missing target variable - df = df.dropna(subset=['Mileage Km/L']) + df = df.dropna(subset=['Mileage Km/L', 'Price']) # Remove outliers in 'Kilometers Driven' q1, q3 = df['Kilometers_Driven'].quantile([0.25, 0.75]) @@ -40,10 +40,10 @@ def preprocess_data(df): return df # Train Model -def train_model(df): +def train_model(df, target, model_name): # Features and target - X = df.drop(columns=['Mileage Km/L', 'Name', 'Price']) - y = df['Mileage Km/L'] + X = df.drop(columns=['Mileage Km/L', 'Price', 'Name']) + y = df[target] # Split data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) @@ -81,14 +81,15 @@ def train_model(df): y_pred = best_model.predict(X_test) # Evaluation - print("Model Performance:") + print(f"Model Performance for {target}:") print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f}") print(f"RMSE: {mean_squared_error(y_test, y_pred, squared=False):.2f}") print(f"R^2: {r2_score(y_test, y_pred):.2f}") # Save the model - joblib.dump(best_model, 'model.pkl') - print("Model saved as 'model.pkl'") + model_file = f'{model_name}.pkl' + joblib.dump(best_model, model_file) + print("Model saved as '{model_file}'") # Main Function def main(): @@ -98,10 +99,12 @@ def main(): df = preprocess_data(df) print("Data preprocessing complete.") - print("Model is training...") - train_model(df) - print("Model is trained.") + print("Training mileage prediction model...") + train_model(df, target='Mileage Km/L', model_name='mileage_predictor') + + print("Training price prediction model...") + train_model(df, target='Price', model_name='price_predictor') if __name__ == "__main__": main() diff --git a/predict.py b/predict.py index 707ef92..d7f3a9b 100644 --- a/predict.py +++ b/predict.py @@ -2,25 +2,43 @@ import joblib import pandas as pd # Load the model -model = joblib.load('model.pkl') +mileage_model = joblib.load('mileage_predictor.pkl') +price_model = joblib.load('price_predictor.pkl') + +# Prepare input data for prediction +def prepare_input(data_dict): + # Prepare a DataFrame from a dictionary of input data + input_df = pd.DataFrame([data_dict]) + input_df['Car_Age'] = 2024 - input_df['Year'] + input_df.drop(columns=['Year'], inplace=True) + + return input_df + +# Make prediction +def predict(input_data): + # Predict mileage and price for a given input. + prepared_data = prepare_input(input_data) + mileage = mileage_model.predict(prepared_data)[0] + price = price_model.predict(prepared_data)[0] + + return mileage, price # Sample data for prediction data = { - 'Kilometers_Driven': [50000], - 'Fuel_Type': ['Petrol'], - 'Transmission': ['Manual'], - 'Owner_Type': ['First'], - 'Engine CC': [1197], - 'Power': [82], - 'Seats': [5], - 'Car_Age': [6], - 'Location': ['Mumbai'] + 'Year': 2018, + 'Kilometers_Driven': 30000, + 'Fuel_Type': 'Petrol', + 'Transmission': 'Manual', + 'Owner_Type': 'First', + 'Location': 'Mumbai', + 'Engine CC': 1200, + 'Power': 85, + 'Seats': 5 } -# Convert to DataFrame -input_data = pd.DataFrame(data) +# Make prediction -# Predict -predicted_mileage = model.predict(input_data) +predicted_mileage, predicted_price = predict(data) -print(f"Predicted Mileage: {predicted_mileage[0]:.2f} Km/L") +print(f"Predicted Mileage (Km/L): {predicted_mileage:.2f}") +print(f"Predicted Price: ₹{predicted_price:,.2f} Lakhs") -- cgit v1.2.3-59-g8ed1b