summaryrefslogtreecommitdiffstats
path: root/edit.py
blob: d2bf57af5af04221b2a122bf6d68b61c4c3d1e7c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import joblib
import pandas as pd

# Load the models
mileage_model = joblib.load('mileage_predictor.pkl')
price_model = joblib.load('price_predictor.pkl')
year_model = joblib.load('year_predictor.pkl')

# Required columns
REQUIRED_COLUMNS = [
    'Name', 'Manufacturer', 'Location', 'Year', 'Kilometers_Driven',
    'Fuel_Type', 'Transmission', 'Owner_Type', 'Engine CC', 'Power', 'Seats'
]

# Prepare input data for prediction
def prepare_input(df):
    # Select only necessary columns
    df = df[REQUIRED_COLUMNS]
    
    # Add 'Car_Age' and drop 'Year'
    df['Car_Age'] = 2024 - df['Year']
    df.drop(columns=['Year'], inplace=True)
    return df

# Make predictions
def predict_from_csv(input_csv, output_csv):
    # Load the input CSV file
    data = pd.read_csv(input_csv)
    
    # Ensure the required columns exist
    if not all(col in data.columns for col in REQUIRED_COLUMNS):
        raise ValueError(f"The input CSV must contain these columns: {REQUIRED_COLUMNS}")
    
    # Prepare the input data
    prepared_data = prepare_input(data.copy())
    
    # Perform predictions
    data['Predicted_Mileage'] = mileage_model.predict(prepared_data)
    data['Predicted_Price'] = price_model.predict(prepared_data)
    data['Predicted_Year'] = year_model.predict(prepared_data).astype(int)
    
    # Format numeric predictions to two decimal places
    data['Predicted_Mileage'] = data['Predicted_Mileage'].map(lambda x: round(x, 2))
    data['Predicted_Price'] = data['Predicted_Price'].map(lambda x: round(x, 2))
    
    # Save results to a new CSV file
    data.to_csv(output_csv, index=False)
    print(f"Predictions saved to {output_csv}")

# Input and output CSV file paths
input_csv = 'data.csv'  # Change to your input CSV file name
output_csv = 'predicted_data.csv'  # Change to your desired output file name

# Run the prediction
predict_from_csv(input_csv, output_csv)