""" Configuration settings for the salary analytics package. """ import os from dotenv import load_dotenv import random # Load environment variables load_dotenv() # Base directories BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) OUTPUT_DIR = os.path.join(BASE_DIR, "output") PLOTS_DIR = os.path.join(OUTPUT_DIR, "plots") CSV_DIR = os.path.join(OUTPUT_DIR, "csv") MODEL_DIR = os.path.join(OUTPUT_DIR, "models") # Create directories if they don't exist os.makedirs(OUTPUT_DIR, exist_ok=True) os.makedirs(PLOTS_DIR, exist_ok=True) os.makedirs(CSV_DIR, exist_ok=True) os.makedirs(MODEL_DIR, exist_ok=True) # Database Configuration DB_CONFIG = { "user": os.getenv("DB_USER"), # Default value as fallback "password": os.getenv("DB_PASSWORD"), "name": os.getenv("DB_NAME"), "port": os.getenv("DB_PORT"), "host": os.getenv("DB_HOST") } # SQLAlchemy Configuration SQLALCHEMY_DATABASE_URI = ( f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@" f"{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}" ) SQLALCHEMY_TRACK_MODIFICATIONS = False # Table Configuration TABLE_NAME = "customer_account_transaction_hx" BATCH_RESULTS_TABLE = "salary_analytics_batch_results" # Salary Keywords SALARY_KEYWORDS = [ "salary", "payroll", "income", "wage", "wages", "earnings", "earning", "monthly pay", "net pay", "gross pay", "compensation", "monthlypay", "netpay", "grosspay", "remuneration", "stipend", "allowance", "bonus", "commission", "pension", "retirement", "dividend", "benefits", "reimbursement", "overtime", "incentive", "paycheck", "paycheque", "salary advance", "monthly income", "income tax refund", "employer deposit", "payroll deposit", "salary credit", "income credit", "salary transfer", "income transfer", "salary received", "income received", "hr deposit", "company deposit", "employer payment", "employee payment", "sal", ] # Model Configuration MODEL_CONFIG = { "cv_threshold": 0.10, "min_transactions": 3, "threshold": 0.7, "high_earner_threshold": 10000 } # File Paths OUTPUT_PATHS = { "high_earner_details": os.path.join(CSV_DIR, "high_earner_details.csv"), "likely_salary_earner": os.path.join(CSV_DIR, "likely_salary_earner.csv"), "final_table": os.path.join(CSV_DIR, "final_table.csv"), "consistent_earners_plot": os.path.join(PLOTS_DIR, "consistent_earners_predictions.png"), "inconsistent_earners_plot": os.path.join(PLOTS_DIR, "inconsistent_earners_predictions.png"), "hypothesis_overlap_plot": os.path.join(PLOTS_DIR, "hypothesis_overlap.png"), "consistent_model": os.path.join(MODEL_DIR, "consistent_model.joblib"), "inconsistent_model": os.path.join(MODEL_DIR, "inconsistent_model.joblib"), "consistent_scaler": os.path.join(MODEL_DIR, "consistent_scaler.joblib"), "inconsistent_scaler": os.path.join(MODEL_DIR, "inconsistent_scaler.joblib") } SIMBRELLA_BASE_URL = os.getenv("SIMBRELLA_BASE_URL", "http://127.0.0.1:6337") SIMBRELLA_ENDPOINT_RAC_CHECKS = os.getenv("SIMBRELLA_ENDPOINT_RAC_CHECKS","api/rac-check") # Salary Detect Endpoint Config SALARY_DETECT_URL = "http://www.simbrellang.net:5000/autocall/analytic-salary-detect" SALARY_DETECT_HEADERS = { "accept": "/", "Content-Type": "application/json" } SALARY_DETECT_PAYLOADS = [ {"salaryDate": "2022-01-01", "customerId": "CN621868", "accountId": "2017821799", "salaryAmount": 200000}, {"customerId": "CUC2268333011", "accountId": "ACC8116931898", "salaryDate": "2025-07-08", "salaryAmount": 200000}, {"customerId": "CUC2163677018", "accountId": "ACC8118539484", "salaryDate": "2025-07-08", "salaryAmount": 200000}, {"customerId": "CUC1968062010", "accountId": "ACC8115473093", "salaryDate": "2025-07-08", "salaryAmount": 200000}, {"customerId": "CUC1302360013", "accountId": "ACC8117628489", "salaryDate": "2025-07-08", "salaryAmount": 200000} ] def get_random_salary_payload(): return [random.choice(SALARY_DETECT_PAYLOADS)]