Files
AnalysisTesting/salary_analytics/config.py
T
salakojoshua1234_gmail.com 5767f55686 Update project structure and enhance model persistence
- Added new model and scaler files to .gitignore and output directory.
- Updated Dockerfile to create output/models directory.
- Revised README to include instructions for using a .env file for configuration.
- Enhanced config.py to load database credentials from environment variables.
- Implemented model saving functionality in salary_predictor.py for consistent and inconsistent earners.
2025-05-02 00:16:46 +01:00

71 lines
2.7 KiB
Python

"""
Configuration settings for the salary analytics package.
"""
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Base directories
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
OUTPUT_DIR = os.path.join(BASE_DIR, "output")
PLOTS_DIR = os.path.join(OUTPUT_DIR, "plots")
CSV_DIR = os.path.join(OUTPUT_DIR, "csv")
MODEL_DIR = os.path.join(OUTPUT_DIR, "models")
# Create directories if they don't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(PLOTS_DIR, exist_ok=True)
os.makedirs(CSV_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
# Database Configuration
DB_CONFIG = {
"user": os.getenv("DB_USER", "salaryloan"), # Default value as fallback
"password": os.getenv("DB_PASSWORD", "salaryloan"),
"name": os.getenv("DB_NAME", "salaryloan"),
"port": os.getenv("DB_PORT", "10532"),
"host": os.getenv("DB_HOST", "dev-data.simbrellang.net")
}
# Table Configuration
TABLE_NAME = "customer_account_transaction_hx"
# Salary Keywords
SALARY_KEYWORDS = [
"salary", "payroll", "income", "wage", "wages",
"earnings", "earning", "monthly pay", "net pay", "gross pay", "compensation",
"monthlypay", "netpay", "grosspay",
"remuneration", "stipend", "allowance", "bonus", "commission",
"pension", "retirement", "dividend", "benefits", "reimbursement",
"overtime", "incentive", "paycheck", "paycheque", "salary advance",
"monthly income", "income tax refund", "employer deposit",
"payroll deposit", "salary credit", "income credit", "salary transfer",
"income transfer", "salary received", "income received", "hr deposit",
"company deposit", "employer payment", "employee payment",
"sal",
]
# Model Configuration
MODEL_CONFIG = {
"cv_threshold": 0.10,
"min_transactions": 3,
"threshold": 0.7,
"high_earner_threshold": 10000
}
# File Paths
OUTPUT_PATHS = {
"high_earner_details": os.path.join(CSV_DIR, "high_earner_details.csv"),
"likely_salary_earner": os.path.join(CSV_DIR, "likely_salary_earner.csv"),
"final_table": os.path.join(CSV_DIR, "final_table.csv"),
"consistent_earners_plot": os.path.join(PLOTS_DIR, "consistent_earners_predictions.png"),
"inconsistent_earners_plot": os.path.join(PLOTS_DIR, "inconsistent_earners_predictions.png"),
"hypothesis_overlap_plot": os.path.join(PLOTS_DIR, "hypothesis_overlap.png"),
"consistent_model": os.path.join(MODEL_DIR, "consistent_model.joblib"),
"inconsistent_model": os.path.join(MODEL_DIR, "inconsistent_model.joblib"),
"consistent_scaler": os.path.join(MODEL_DIR, "consistent_scaler.joblib"),
"inconsistent_scaler": os.path.join(MODEL_DIR, "inconsistent_scaler.joblib")
}