summaryrefslogtreecommitdiffstats
path: root/main.py
diff options
context:
space:
mode:
Diffstat (limited to 'main.py')
-rw-r--r--main.py23
1 files changed, 13 insertions, 10 deletions
diff --git a/main.py b/main.py
index 6fbb0c2..3355b34 100644
--- a/main.py
+++ b/main.py
@@ -27,7 +27,7 @@ def preprocess_data(df):
df['Seats'] = df['Seats'].fillna(df['Seats'].mode()[0])
# Remove rows with missing target variable
- df = df.dropna(subset=['Mileage Km/L'])
+ df = df.dropna(subset=['Mileage Km/L', 'Price'])
# Remove outliers in 'Kilometers Driven'
q1, q3 = df['Kilometers_Driven'].quantile([0.25, 0.75])
@@ -40,10 +40,10 @@ def preprocess_data(df):
return df
# Train Model
-def train_model(df):
+def train_model(df, target, model_name):
# Features and target
- X = df.drop(columns=['Mileage Km/L', 'Name', 'Price'])
- y = df['Mileage Km/L']
+ X = df.drop(columns=['Mileage Km/L', 'Price', 'Name'])
+ y = df[target]
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
@@ -81,14 +81,15 @@ def train_model(df):
y_pred = best_model.predict(X_test)
# Evaluation
- print("Model Performance:")
+ print(f"Model Performance for {target}:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f}")
print(f"RMSE: {mean_squared_error(y_test, y_pred, squared=False):.2f}")
print(f"R^2: {r2_score(y_test, y_pred):.2f}")
# Save the model
- joblib.dump(best_model, 'model.pkl')
- print("Model saved as 'model.pkl'")
+ model_file = f'{model_name}.pkl'
+ joblib.dump(best_model, model_file)
+ print("Model saved as '{model_file}'")
# Main Function
def main():
@@ -98,10 +99,12 @@ def main():
df = preprocess_data(df)
print("Data preprocessing complete.")
- print("Model is training...")
- train_model(df)
- print("Model is trained.")
+ print("Training mileage prediction model...")
+ train_model(df, target='Mileage Km/L', model_name='mileage_predictor')
+
+ print("Training price prediction model...")
+ train_model(df, target='Price', model_name='price_predictor')
if __name__ == "__main__":
main()