blob: d2bf57af5af04221b2a122bf6d68b61c4c3d1e7c (
plain) (
tree)
|
|
import joblib
import pandas as pd
# Load the models
mileage_model = joblib.load('mileage_predictor.pkl')
price_model = joblib.load('price_predictor.pkl')
year_model = joblib.load('year_predictor.pkl')
# Required columns
REQUIRED_COLUMNS = [
'Name', 'Manufacturer', 'Location', 'Year', 'Kilometers_Driven',
'Fuel_Type', 'Transmission', 'Owner_Type', 'Engine CC', 'Power', 'Seats'
]
# Prepare input data for prediction
def prepare_input(df):
# Select only necessary columns
df = df[REQUIRED_COLUMNS]
# Add 'Car_Age' and drop 'Year'
df['Car_Age'] = 2024 - df['Year']
df.drop(columns=['Year'], inplace=True)
return df
# Make predictions
def predict_from_csv(input_csv, output_csv):
# Load the input CSV file
data = pd.read_csv(input_csv)
# Ensure the required columns exist
if not all(col in data.columns for col in REQUIRED_COLUMNS):
raise ValueError(f"The input CSV must contain these columns: {REQUIRED_COLUMNS}")
# Prepare the input data
prepared_data = prepare_input(data.copy())
# Perform predictions
data['Predicted_Mileage'] = mileage_model.predict(prepared_data)
data['Predicted_Price'] = price_model.predict(prepared_data)
data['Predicted_Year'] = year_model.predict(prepared_data).astype(int)
# Format numeric predictions to two decimal places
data['Predicted_Mileage'] = data['Predicted_Mileage'].map(lambda x: round(x, 2))
data['Predicted_Price'] = data['Predicted_Price'].map(lambda x: round(x, 2))
# Save results to a new CSV file
data.to_csv(output_csv, index=False)
print(f"Predictions saved to {output_csv}")
# Input and output CSV file paths
input_csv = 'data.csv' # Change to your input CSV file name
output_csv = 'predicted_data.csv' # Change to your desired output file name
# Run the prediction
predict_from_csv(input_csv, output_csv)
|