summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--main.py23
-rw-r--r--predict.py48
2 files changed, 46 insertions, 25 deletions
diff --git a/main.py b/main.py
index 6fbb0c2..3355b34 100644
--- a/main.py
+++ b/main.py
@@ -27,7 +27,7 @@ def preprocess_data(df):
df['Seats'] = df['Seats'].fillna(df['Seats'].mode()[0])
# Remove rows with missing target variable
- df = df.dropna(subset=['Mileage Km/L'])
+ df = df.dropna(subset=['Mileage Km/L', 'Price'])
# Remove outliers in 'Kilometers Driven'
q1, q3 = df['Kilometers_Driven'].quantile([0.25, 0.75])
@@ -40,10 +40,10 @@ def preprocess_data(df):
return df
# Train Model
-def train_model(df):
+def train_model(df, target, model_name):
# Features and target
- X = df.drop(columns=['Mileage Km/L', 'Name', 'Price'])
- y = df['Mileage Km/L']
+ X = df.drop(columns=['Mileage Km/L', 'Price', 'Name'])
+ y = df[target]
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
@@ -81,14 +81,15 @@ def train_model(df):
y_pred = best_model.predict(X_test)
# Evaluation
- print("Model Performance:")
+ print(f"Model Performance for {target}:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f}")
print(f"RMSE: {mean_squared_error(y_test, y_pred, squared=False):.2f}")
print(f"R^2: {r2_score(y_test, y_pred):.2f}")
# Save the model
- joblib.dump(best_model, 'model.pkl')
- print("Model saved as 'model.pkl'")
+ model_file = f'{model_name}.pkl'
+ joblib.dump(best_model, model_file)
+ print("Model saved as '{model_file}'")
# Main Function
def main():
@@ -98,10 +99,12 @@ def main():
df = preprocess_data(df)
print("Data preprocessing complete.")
- print("Model is training...")
- train_model(df)
- print("Model is trained.")
+ print("Training mileage prediction model...")
+ train_model(df, target='Mileage Km/L', model_name='mileage_predictor')
+
+ print("Training price prediction model...")
+ train_model(df, target='Price', model_name='price_predictor')
if __name__ == "__main__":
main()
diff --git a/predict.py b/predict.py
index 707ef92..d7f3a9b 100644
--- a/predict.py
+++ b/predict.py
@@ -2,25 +2,43 @@ import joblib
import pandas as pd
# Load the model
-model = joblib.load('model.pkl')
+mileage_model = joblib.load('mileage_predictor.pkl')
+price_model = joblib.load('price_predictor.pkl')
+
+# Prepare input data for prediction
+def prepare_input(data_dict):
+ # Prepare a DataFrame from a dictionary of input data
+ input_df = pd.DataFrame([data_dict])
+ input_df['Car_Age'] = 2024 - input_df['Year']
+ input_df.drop(columns=['Year'], inplace=True)
+
+ return input_df
+
+# Make prediction
+def predict(input_data):
+ # Predict mileage and price for a given input.
+ prepared_data = prepare_input(input_data)
+ mileage = mileage_model.predict(prepared_data)[0]
+ price = price_model.predict(prepared_data)[0]
+
+ return mileage, price
# Sample data for prediction
data = {
- 'Kilometers_Driven': [50000],
- 'Fuel_Type': ['Petrol'],
- 'Transmission': ['Manual'],
- 'Owner_Type': ['First'],
- 'Engine CC': [1197],
- 'Power': [82],
- 'Seats': [5],
- 'Car_Age': [6],
- 'Location': ['Mumbai']
+ 'Year': 2018,
+ 'Kilometers_Driven': 30000,
+ 'Fuel_Type': 'Petrol',
+ 'Transmission': 'Manual',
+ 'Owner_Type': 'First',
+ 'Location': 'Mumbai',
+ 'Engine CC': 1200,
+ 'Power': 85,
+ 'Seats': 5
}
-# Convert to DataFrame
-input_data = pd.DataFrame(data)
+# Make prediction
-# Predict
-predicted_mileage = model.predict(input_data)
+predicted_mileage, predicted_price = predict(data)
-print(f"Predicted Mileage: {predicted_mileage[0]:.2f} Km/L")
+print(f"Predicted Mileage (Km/L): {predicted_mileage:.2f}")
+print(f"Predicted Price: ₹{predicted_price:,.2f} Lakhs")