diff options
author | Biswakalyan Bhuyan <biswa@surgot.in> | 2024-11-27 20:03:53 +0530 |
---|---|---|
committer | Biswakalyan Bhuyan <biswa@surgot.in> | 2024-11-27 20:03:53 +0530 |
commit | 6670a8dfc419cf3d1f60427774de99e7010987e5 (patch) | |
tree | f554360fe4cb7dfcc091643979e8538eb4e17a03 | |
parent | 7947a931d66697bca3af1003703296ee0edcdfd0 (diff) | |
download | autopredict-6670a8dfc419cf3d1f60427774de99e7010987e5.tar.gz autopredict-6670a8dfc419cf3d1f60427774de99e7010987e5.tar.bz2 autopredict-6670a8dfc419cf3d1f60427774de99e7010987e5.zip |
Added the Feature to predict price
-rw-r--r-- | main.py | 23 | ||||
-rw-r--r-- | predict.py | 48 |
2 files changed, 46 insertions, 25 deletions
@@ -27,7 +27,7 @@ def preprocess_data(df): df['Seats'] = df['Seats'].fillna(df['Seats'].mode()[0]) # Remove rows with missing target variable - df = df.dropna(subset=['Mileage Km/L']) + df = df.dropna(subset=['Mileage Km/L', 'Price']) # Remove outliers in 'Kilometers Driven' q1, q3 = df['Kilometers_Driven'].quantile([0.25, 0.75]) @@ -40,10 +40,10 @@ def preprocess_data(df): return df # Train Model -def train_model(df): +def train_model(df, target, model_name): # Features and target - X = df.drop(columns=['Mileage Km/L', 'Name', 'Price']) - y = df['Mileage Km/L'] + X = df.drop(columns=['Mileage Km/L', 'Price', 'Name']) + y = df[target] # Split data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) @@ -81,14 +81,15 @@ def train_model(df): y_pred = best_model.predict(X_test) # Evaluation - print("Model Performance:") + print(f"Model Performance for {target}:") print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f}") print(f"RMSE: {mean_squared_error(y_test, y_pred, squared=False):.2f}") print(f"R^2: {r2_score(y_test, y_pred):.2f}") # Save the model - joblib.dump(best_model, 'model.pkl') - print("Model saved as 'model.pkl'") + model_file = f'{model_name}.pkl' + joblib.dump(best_model, model_file) + print("Model saved as '{model_file}'") # Main Function def main(): @@ -98,10 +99,12 @@ def main(): df = preprocess_data(df) print("Data preprocessing complete.") - print("Model is training...") - train_model(df) - print("Model is trained.") + print("Training mileage prediction model...") + train_model(df, target='Mileage Km/L', model_name='mileage_predictor') + + print("Training price prediction model...") + train_model(df, target='Price', model_name='price_predictor') if __name__ == "__main__": main() @@ -2,25 +2,43 @@ import joblib import pandas as pd # Load the model -model = joblib.load('model.pkl') +mileage_model = joblib.load('mileage_predictor.pkl') +price_model = joblib.load('price_predictor.pkl') + +# Prepare input data for prediction +def prepare_input(data_dict): + # Prepare a DataFrame from a dictionary of input data + input_df = pd.DataFrame([data_dict]) + input_df['Car_Age'] = 2024 - input_df['Year'] + input_df.drop(columns=['Year'], inplace=True) + + return input_df + +# Make prediction +def predict(input_data): + # Predict mileage and price for a given input. + prepared_data = prepare_input(input_data) + mileage = mileage_model.predict(prepared_data)[0] + price = price_model.predict(prepared_data)[0] + + return mileage, price # Sample data for prediction data = { - 'Kilometers_Driven': [50000], - 'Fuel_Type': ['Petrol'], - 'Transmission': ['Manual'], - 'Owner_Type': ['First'], - 'Engine CC': [1197], - 'Power': [82], - 'Seats': [5], - 'Car_Age': [6], - 'Location': ['Mumbai'] + 'Year': 2018, + 'Kilometers_Driven': 30000, + 'Fuel_Type': 'Petrol', + 'Transmission': 'Manual', + 'Owner_Type': 'First', + 'Location': 'Mumbai', + 'Engine CC': 1200, + 'Power': 85, + 'Seats': 5 } -# Convert to DataFrame -input_data = pd.DataFrame(data) +# Make prediction -# Predict -predicted_mileage = model.predict(input_data) +predicted_mileage, predicted_price = predict(data) -print(f"Predicted Mileage: {predicted_mileage[0]:.2f} Km/L") +print(f"Predicted Mileage (Km/L): {predicted_mileage:.2f}") +print(f"Predicted Price: ₹{predicted_price:,.2f} Lakhs") |