Update project structure and enhance model persistence
- Added new model and scaler files to .gitignore and output directory. - Updated Dockerfile to create output/models directory. - Revised README to include instructions for using a .env file for configuration. - Enhanced config.py to load database credentials from environment variables. - Implemented model saving functionality in salary_predictor.py for consistent and inconsistent earners.
This commit is contained in:
@@ -3,25 +3,31 @@ Configuration settings for the salary analytics package.
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Base directories
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
OUTPUT_DIR = os.path.join(BASE_DIR, "output")
|
||||
PLOTS_DIR = os.path.join(OUTPUT_DIR, "plots")
|
||||
CSV_DIR = os.path.join(OUTPUT_DIR, "csv")
|
||||
MODEL_DIR = os.path.join(OUTPUT_DIR, "models")
|
||||
|
||||
# Create directories if they don't exist
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
os.makedirs(PLOTS_DIR, exist_ok=True)
|
||||
os.makedirs(CSV_DIR, exist_ok=True)
|
||||
os.makedirs(MODEL_DIR, exist_ok=True)
|
||||
|
||||
# Database Configuration
|
||||
DB_CONFIG = {
|
||||
"user": "salaryloan",
|
||||
"password": "salaryloan",
|
||||
"name": "salaryloan",
|
||||
"port": "10532",
|
||||
"host": "dev-data.simbrellang.net"
|
||||
"user": os.getenv("DB_USER", "salaryloan"), # Default value as fallback
|
||||
"password": os.getenv("DB_PASSWORD", "salaryloan"),
|
||||
"name": os.getenv("DB_NAME", "salaryloan"),
|
||||
"port": os.getenv("DB_PORT", "10532"),
|
||||
"host": os.getenv("DB_HOST", "dev-data.simbrellang.net")
|
||||
}
|
||||
|
||||
# Table Configuration
|
||||
@@ -57,5 +63,9 @@ OUTPUT_PATHS = {
|
||||
"final_table": os.path.join(CSV_DIR, "final_table.csv"),
|
||||
"consistent_earners_plot": os.path.join(PLOTS_DIR, "consistent_earners_predictions.png"),
|
||||
"inconsistent_earners_plot": os.path.join(PLOTS_DIR, "inconsistent_earners_predictions.png"),
|
||||
"hypothesis_overlap_plot": os.path.join(PLOTS_DIR, "hypothesis_overlap.png")
|
||||
"hypothesis_overlap_plot": os.path.join(PLOTS_DIR, "hypothesis_overlap.png"),
|
||||
"consistent_model": os.path.join(MODEL_DIR, "consistent_model.joblib"),
|
||||
"inconsistent_model": os.path.join(MODEL_DIR, "inconsistent_model.joblib"),
|
||||
"consistent_scaler": os.path.join(MODEL_DIR, "consistent_scaler.joblib"),
|
||||
"inconsistent_scaler": os.path.join(MODEL_DIR, "inconsistent_scaler.joblib")
|
||||
}
|
||||
@@ -8,6 +8,7 @@ import matplotlib.pyplot as plt
|
||||
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
|
||||
from joblib import dump
|
||||
from .config import OUTPUT_PATHS
|
||||
|
||||
class SalaryPredictor:
|
||||
@@ -129,6 +130,11 @@ class SalaryPredictor:
|
||||
self.model_cons, self.scaler_cons = self.train_model(X_train_cons, y_train_cons, X_test_cons, y_test_cons)
|
||||
print("Model trained for consistent salary earners.")
|
||||
|
||||
# Save model and scaler
|
||||
dump(self.model_cons, OUTPUT_PATHS['consistent_model'])
|
||||
dump(self.scaler_cons, OUTPUT_PATHS['consistent_scaler'])
|
||||
print("Saved consistent salary earner model and scaler.")
|
||||
|
||||
# Plot predictions
|
||||
X_test_cons_scaled = self.scaler_cons.transform(X_test_cons)
|
||||
y_pred = self.model_cons.predict(X_test_cons_scaled)
|
||||
@@ -147,6 +153,11 @@ class SalaryPredictor:
|
||||
print("\nTraining model for inconsistent salary earners...")
|
||||
self.model_incons, self.scaler_incons = self.train_model(X_train_incons, y_train_incons, X_test_incons, y_test_incons)
|
||||
|
||||
# Save model and scaler
|
||||
dump(self.model_incons, OUTPUT_PATHS['inconsistent_model'])
|
||||
dump(self.scaler_incons, OUTPUT_PATHS['inconsistent_scaler'])
|
||||
print("Saved inconsistent salary earner model and scaler.")
|
||||
|
||||
# Plot predictions
|
||||
X_test_incons_scaled = self.scaler_incons.transform(X_test_incons)
|
||||
y_pred = self.model_incons.predict(X_test_incons_scaled)
|
||||
|
||||
Reference in New Issue
Block a user