From 6de9583aafd69827e4370089fea0b5924d70ef26 Mon Sep 17 00:00:00 2001 From: VivianDee <115420678+VivianDee@users.noreply.github.com> Date: Sun, 7 Sep 2025 23:07:40 +0100 Subject: [PATCH 01/19] [add]: code refractoring and cleanup --- Dockerfile | 13 ++++++---- {salary_analytics/app => app}/__init__.py | 11 ++++++++- .../analytics/integrations}/rac_check.py | 6 ++--- .../analytics/integrations}/salary_detect.py | 7 ++---- app/analytics/services/__init__.py | 24 +++++++++++++++++++ .../services}/consistent_amount_analyzer.py | 0 .../analytics/services}/data_loader.py | 3 +-- .../analytics/services}/keyword_analyzer.py | 0 .../analytics/services}/main.py | 3 +-- .../services}/salary_earner_analyzer.py | 9 +------ .../analytics/services}/salary_predictor.py | 0 .../services}/transaction_type_analyzer.py | 0 {salary_analytics => app}/api.py | 11 +++------ .../app => app/commands}/commands.py | 4 ++-- {salary_analytics => app}/config.py | 0 {salary_analytics => app}/db_operations.py | 4 +--- {salary_analytics/app => app}/extensions.py | 0 .../models/raw_transaction.py | 2 +- app/utils/logger.py | 13 ++++++++++ docker-compose.yml | 13 +++++----- migrations/env.py | 2 +- requirements.txt | 4 +++- run.py | 4 ---- salary_analytics/__init__.py | 6 ----- wsgi.py | 7 ++++++ 25 files changed, 86 insertions(+), 60 deletions(-) rename {salary_analytics/app => app}/__init__.py (65%) rename {salary_analytics => app/analytics/integrations}/rac_check.py (90%) rename {salary_analytics => app/analytics/integrations}/salary_detect.py (85%) create mode 100644 app/analytics/services/__init__.py rename {salary_analytics => app/analytics/services}/consistent_amount_analyzer.py (100%) rename {salary_analytics => app/analytics/services}/data_loader.py (99%) rename {salary_analytics => app/analytics/services}/keyword_analyzer.py (100%) rename {salary_analytics => app/analytics/services}/main.py (99%) rename {salary_analytics => app/analytics/services}/salary_earner_analyzer.py (97%) rename {salary_analytics => app/analytics/services}/salary_predictor.py (100%) rename {salary_analytics => app/analytics/services}/transaction_type_analyzer.py (100%) rename {salary_analytics => app}/api.py (98%) rename {salary_analytics/app => app/commands}/commands.py (94%) rename {salary_analytics => app}/config.py (100%) rename {salary_analytics => app}/db_operations.py (98%) rename {salary_analytics/app => app}/extensions.py (100%) rename salary_analytics/app/models.py => app/models/raw_transaction.py (91%) create mode 100644 app/utils/logger.py delete mode 100644 run.py delete mode 100644 salary_analytics/__init__.py create mode 100644 wsgi.py diff --git a/Dockerfile b/Dockerfile index 2109548..5b599cb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,20 +1,23 @@ FROM python:3.11-slim +# Set the working directory in the container WORKDIR /app +# Copy the current directory contents into the container at /app +COPY . /app + RUN apt-get update && apt-get install -y libpq-dev && rm -rf /var/lib/apt/lists/* COPY requirements.txt . RUN pip install -r requirements.txt -COPY salary_analytics/ ./salary_analytics/ RUN mkdir -p output/csv output/plots output/models -ENV PYTHONPATH=/app -ENV HOST=0.0.0.0 -ENV PORT=8000 + +ENV FLASK_APP=wsgi.py +ENV FLASK_RUN_HOST=0.0.0.0 EXPOSE 8000 -CMD ["uvicorn", "salary_analytics.api:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] \ No newline at end of file +CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:8000", "wsgi:wsgi_app"] \ No newline at end of file diff --git a/salary_analytics/app/__init__.py b/app/__init__.py similarity index 65% rename from salary_analytics/app/__init__.py rename to app/__init__.py index b286361..90bbc1a 100644 --- a/salary_analytics/app/__init__.py +++ b/app/__init__.py @@ -2,6 +2,15 @@ from flask import Flask import os from .extensions import db, migrate + +""" +Salary Analytics Package +A package for analyzing and predicting salary patterns from transaction data. +""" + +__version__ = "0.1.0" + + def create_app(): app = Flask(__name__) app.config.from_object('salary_analytics.config') @@ -11,7 +20,7 @@ def create_app(): migrate.init_app(app, db) # Register blueprints or CLI commands here if needed - from . import commands + from .commands import commands app.cli.add_command(commands.upload_xls_cli) return app \ No newline at end of file diff --git a/salary_analytics/rac_check.py b/app/analytics/integrations/rac_check.py similarity index 90% rename from salary_analytics/rac_check.py rename to app/analytics/integrations/rac_check.py index d8f16d3..86c21ff 100644 --- a/salary_analytics/rac_check.py +++ b/app/analytics/integrations/rac_check.py @@ -1,10 +1,8 @@ from django.conf import settings import httpx import json -from salary_analytics.config import SIMBRELLA_BASE_URL, SIMBRELLA_ENDPOINT_RAC_CHECKS -import logging - -logger = logging.getLogger(__name__) +from app.config import SIMBRELLA_BASE_URL, SIMBRELLA_ENDPOINT_RAC_CHECKS +from app.utils.logger import logger class SimbrellaIntegration: BASE_URL = SIMBRELLA_BASE_URL diff --git a/salary_analytics/salary_detect.py b/app/analytics/integrations/salary_detect.py similarity index 85% rename from salary_analytics/salary_detect.py rename to app/analytics/integrations/salary_detect.py index 0824ae2..684abcb 100644 --- a/salary_analytics/salary_detect.py +++ b/app/analytics/integrations/salary_detect.py @@ -1,11 +1,8 @@ import time -import logging import threading import requests -from .config import SALARY_DETECT_URL, SALARY_DETECT_HEADERS, get_random_salary_payload - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) +from ...config import SALARY_DETECT_URL, SALARY_DETECT_HEADERS, get_random_salary_payload +from app.utils.logger import logger class SalaryDetect: def __init__(self): diff --git a/app/analytics/services/__init__.py b/app/analytics/services/__init__.py new file mode 100644 index 0000000..0f844f8 --- /dev/null +++ b/app/analytics/services/__init__.py @@ -0,0 +1,24 @@ +from .main import SalaryAnalyticsPipeline +from .data_loader import DataLoader +from .keyword_analyzer import KeywordAnalyzer +from .consistent_amount_analyzer import ConsistentAmountAnalyzer +from .transaction_type_analyzer import TransactionTypeAnalyzer +from .salary_earner_analyzer import SalaryEarnerAnalyzer +from .salary_predictor import SalaryPredictor + + +""" +Salary Analytics Package +A package for analyzing and predicting salary patterns from transaction data. +""" + +__version__ = "0.1.0" +__all__ = [ + "SalaryAnalyticsPipeline", + "DataLoader", + "KeywordAnalyzer", + "ConsistentAmountAnalyzer", + "TransactionTypeAnalyzer", + "SalaryEarnerAnalyzer", + "SalaryPredictor" +] diff --git a/salary_analytics/consistent_amount_analyzer.py b/app/analytics/services/consistent_amount_analyzer.py similarity index 100% rename from salary_analytics/consistent_amount_analyzer.py rename to app/analytics/services/consistent_amount_analyzer.py diff --git a/salary_analytics/data_loader.py b/app/analytics/services/data_loader.py similarity index 99% rename from salary_analytics/data_loader.py rename to app/analytics/services/data_loader.py index ec2da46..e11b0e4 100644 --- a/salary_analytics/data_loader.py +++ b/app/analytics/services/data_loader.py @@ -8,8 +8,7 @@ from datetime import datetime import logging import os from .config import DB_CONFIG, TABLE_NAME - -logger = logging.getLogger(__name__) +from app.utils.logger import logger class DataLoader: def __init__(self): diff --git a/salary_analytics/keyword_analyzer.py b/app/analytics/services/keyword_analyzer.py similarity index 100% rename from salary_analytics/keyword_analyzer.py rename to app/analytics/services/keyword_analyzer.py diff --git a/salary_analytics/main.py b/app/analytics/services/main.py similarity index 99% rename from salary_analytics/main.py rename to app/analytics/services/main.py index e2781e8..87482c7 100644 --- a/salary_analytics/main.py +++ b/app/analytics/services/main.py @@ -9,8 +9,7 @@ from .consistent_amount_analyzer import ConsistentAmountAnalyzer from .transaction_type_analyzer import TransactionTypeAnalyzer from .salary_earner_analyzer import SalaryEarnerAnalyzer from .salary_predictor import SalaryPredictor - -logger = logging.getLogger(__name__) +from app.utils.logger import logger class SalaryAnalyticsPipeline: def __init__(self): diff --git a/salary_analytics/salary_earner_analyzer.py b/app/analytics/services/salary_earner_analyzer.py similarity index 97% rename from salary_analytics/salary_earner_analyzer.py rename to app/analytics/services/salary_earner_analyzer.py index b32c995..f17d4ce 100644 --- a/salary_analytics/salary_earner_analyzer.py +++ b/app/analytics/services/salary_earner_analyzer.py @@ -6,15 +6,8 @@ import pandas as pd import matplotlib.pyplot as plt from matplotlib_venn import venn3 from datetime import datetime, timedelta -import logging from .config import MODEL_CONFIG, OUTPUT_PATHS - -# Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) -logger = logging.getLogger(__name__) +from app.utils.logger import logger class SalaryEarnerAnalyzer: def __init__(self, df): diff --git a/salary_analytics/salary_predictor.py b/app/analytics/services/salary_predictor.py similarity index 100% rename from salary_analytics/salary_predictor.py rename to app/analytics/services/salary_predictor.py diff --git a/salary_analytics/transaction_type_analyzer.py b/app/analytics/services/transaction_type_analyzer.py similarity index 100% rename from salary_analytics/transaction_type_analyzer.py rename to app/analytics/services/transaction_type_analyzer.py diff --git a/salary_analytics/api.py b/app/api.py similarity index 98% rename from salary_analytics/api.py rename to app/api.py index cb41a30..feb1fde 100644 --- a/salary_analytics/api.py +++ b/app/api.py @@ -17,20 +17,15 @@ from sqlalchemy import text, Table, Column, Integer, String, Float, DateTime, Me import numpy as np import warnings import time -from .main import SalaryAnalyticsPipeline +from .analytics.services.main import SalaryAnalyticsPipeline from .config import OUTPUT_PATHS, TABLE_NAME, BATCH_RESULTS_TABLE from .data_loader import DataLoader from .salary_predictor import SalaryPredictor from .salary_earner_analyzer import SalaryEarnerAnalyzer from .db_operations import DatabaseOperations -from .salary_detect import SalaryDetect +from .analytics.integrations.salary_detect import SalaryDetect +from app.utils.logger import logger -# Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) -logger = logging.getLogger(__name__) # Suppress warnings warnings.filterwarnings('ignore', category=RuntimeWarning, module='numpy') diff --git a/salary_analytics/app/commands.py b/app/commands/commands.py similarity index 94% rename from salary_analytics/app/commands.py rename to app/commands/commands.py index b90b73e..d055a5b 100644 --- a/salary_analytics/app/commands.py +++ b/app/commands/commands.py @@ -2,8 +2,8 @@ import click import pandas as pd from datetime import datetime from flask.cli import with_appcontext -from salary_analytics.app.extensions import db -from salary_analytics.app.models import RawTransaction +from app.extensions import db +from app.models import RawTransaction @click.group() def commands(): diff --git a/salary_analytics/config.py b/app/config.py similarity index 100% rename from salary_analytics/config.py rename to app/config.py diff --git a/salary_analytics/db_operations.py b/app/db_operations.py similarity index 98% rename from salary_analytics/db_operations.py rename to app/db_operations.py index 9cb317e..f3abf4d 100644 --- a/salary_analytics/db_operations.py +++ b/app/db_operations.py @@ -2,12 +2,10 @@ Database operations module for salary analytics. """ -import logging from sqlalchemy import text from .config import BATCH_RESULTS_TABLE from datetime import datetime - -logger = logging.getLogger(__name__) +from app.utils.logger import logger class DatabaseOperations: def __init__(self, engine): diff --git a/salary_analytics/app/extensions.py b/app/extensions.py similarity index 100% rename from salary_analytics/app/extensions.py rename to app/extensions.py diff --git a/salary_analytics/app/models.py b/app/models/raw_transaction.py similarity index 91% rename from salary_analytics/app/models.py rename to app/models/raw_transaction.py index 72615f9..a34b7d8 100644 --- a/salary_analytics/app/models.py +++ b/app/models/raw_transaction.py @@ -1,4 +1,4 @@ -from .extensions import db +from app.extensions import db class RawTransaction(db.Model): __tablename__ = 'analytics_raw_transactions' diff --git a/app/utils/logger.py b/app/utils/logger.py new file mode 100644 index 0000000..4ee0e98 --- /dev/null +++ b/app/utils/logger.py @@ -0,0 +1,13 @@ +import logging + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + handlers=[ + # logging.StreamHandler(), + logging.FileHandler("app.log", mode='a') # Log to file + ] +) + +logger = logging.getLogger("DetectionService") diff --git a/docker-compose.yml b/docker-compose.yml index b5a05e3..2f701b2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,14 +3,13 @@ services: build: . ports: - "${APP_PORT:-4800}:8000" - volumes: - - ./output:/app/output environment: - - DB_USER=salaryloan - - DB_PASSWORD=salaryloan - - DB_NAME=salaryloan - - DB_PORT=10532 - - DB_HOST=dev-data.simbrellang.net + - FLASK_APP=${FLASK_APP} + - FLASK_ENV=${FLASK_ENV} + - DATABASE_URL=postgresql+psycopg2://${DATABASE_USER}:${DATABASE_PASSWORD}@${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_NAME} + volumes: + - .:/app + - ./output:/app/output restart: unless-stopped networks: - salary_network diff --git a/migrations/env.py b/migrations/env.py index 79a3e8d..fa86225 100644 --- a/migrations/env.py +++ b/migrations/env.py @@ -19,7 +19,7 @@ if config.config_file_name is not None: # from myapp import Base # target_metadata = Base.metadata from flask import current_app -from salary_analytics.app.extensions import db +from app.extensions import db config.set_main_option('sqlalchemy.url', current_app.config.get('SQLALCHEMY_DATABASE_URI')) diff --git a/requirements.txt b/requirements.txt index 68fa9fc..4937b16 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,4 +17,6 @@ openpyxl>=3.0.10 Flask>=2.0.0 Flask-SQLAlchemy>=3.0.0 Flask-Migrate>=4.0.0 -alembic>=1.8.0 \ No newline at end of file +alembic>=1.8.0 +requests>=2.26.0 +gunicorn \ No newline at end of file diff --git a/run.py b/run.py deleted file mode 100644 index 77e5bde..0000000 --- a/run.py +++ /dev/null @@ -1,4 +0,0 @@ -import os -from salary_analytics.app import create_app - -app = create_app() \ No newline at end of file diff --git a/salary_analytics/__init__.py b/salary_analytics/__init__.py deleted file mode 100644 index 1825412..0000000 --- a/salary_analytics/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -""" -Salary Analytics Package -A package for analyzing and predicting salary patterns from transaction data. -""" - -__version__ = "0.1.0" \ No newline at end of file diff --git a/wsgi.py b/wsgi.py new file mode 100644 index 0000000..49f70e1 --- /dev/null +++ b/wsgi.py @@ -0,0 +1,7 @@ +from app import create_app + +app = create_app() + +if __name__ != "__main__": + # Expose WSGI app instance for Gunicorn + wsgi_app = app -- 2.34.1 From d9b6a7e92ebeaedaf976150d631bda8b751d7d39 Mon Sep 17 00:00:00 2001 From: VivianDee <115420678+VivianDee@users.noreply.github.com> Date: Sun, 7 Sep 2025 23:44:25 +0100 Subject: [PATCH 02/19] [add]: refactoring and cleanup --- app/__init__.py | 2 +- app/{ => analytics}/api.py | 64 +++++++-------- app/{ => analytics}/commands/commands.py | 0 app/analytics/helpers/response_helpers.py | 17 ++++ app/analytics/middlewares/middleware.py | 11 +++ app/config.py | 3 +- app/models/batch_results.py | 97 +++++++++++++++++++++++ app/{ => models}/db_operations.py | 2 +- 8 files changed, 161 insertions(+), 35 deletions(-) rename app/{ => analytics}/api.py (95%) rename app/{ => analytics}/commands/commands.py (100%) create mode 100644 app/analytics/helpers/response_helpers.py create mode 100644 app/analytics/middlewares/middleware.py create mode 100644 app/models/batch_results.py rename app/{ => models}/db_operations.py (98%) diff --git a/app/__init__.py b/app/__init__.py index 90bbc1a..32f1b84 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -20,7 +20,7 @@ def create_app(): migrate.init_app(app, db) # Register blueprints or CLI commands here if needed - from .commands import commands + from app.analytics.commands import commands app.cli.add_command(commands.upload_xls_cli) return app \ No newline at end of file diff --git a/app/api.py b/app/analytics/api.py similarity index 95% rename from app/api.py rename to app/analytics/api.py index feb1fde..ab069e3 100644 --- a/app/api.py +++ b/app/analytics/api.py @@ -4,27 +4,23 @@ FastAPI application for salary analytics. from fastapi import FastAPI, HTTPException, BackgroundTasks, UploadFile, File, Depends from fastapi.responses import FileResponse -from fastapi.middleware.cors import CORSMiddleware -from pydantic import BaseModel -from typing import Optional, Dict, List, Union import os import socket -import logging +from typing import Optional, List, Union import pandas as pd import tempfile from datetime import datetime -from sqlalchemy import text, Table, Column, Integer, String, Float, DateTime, MetaData -import numpy as np +from sqlalchemy import text import warnings import time -from .analytics.services.main import SalaryAnalyticsPipeline -from .config import OUTPUT_PATHS, TABLE_NAME, BATCH_RESULTS_TABLE -from .data_loader import DataLoader -from .salary_predictor import SalaryPredictor -from .salary_earner_analyzer import SalaryEarnerAnalyzer -from .db_operations import DatabaseOperations -from .analytics.integrations.salary_detect import SalaryDetect +from app.analytics.services.main import SalaryAnalyticsPipeline +from app.config import OUTPUT_PATHS, TABLE_NAME +from app.analytics.services.data_loader import DataLoader +from app.analytics.middlewares.middleware import add_middlewares +from app.models.db_operations import DatabaseOperations +from app.analytics.integrations.salary_detect import SalaryDetect from app.utils.logger import logger +from app.analytics.helpers.response_helpers import AnalysisResponse, BatchResponse # Suppress warnings @@ -38,13 +34,7 @@ app = FastAPI( ) # Add CORS middleware -app.add_middleware( - CORSMiddleware, - allow_origins=["*"], # Allows all origins - allow_credentials=True, - allow_methods=["*"], # Allows all methods - allow_headers=["*"], # Allows all headers -) +add_middlewares(app) # Global pipeline instance pipeline = SalaryAnalyticsPipeline() @@ -57,19 +47,7 @@ salary_earner_analyzer = None salary_detect = SalaryDetect() -class AnalysisResponse(BaseModel): - """Response model for analysis endpoints.""" - message: str - data: Optional[Dict] = None - file_path: Optional[str] = None -class BatchResponse(BaseModel): - """Response model for batch processing.""" - batch_number: int - total_batches: int - processed_rows: int - results_path: str - message: str def check_data_loaded(): """Check if data is loaded before running analytics.""" @@ -103,6 +81,8 @@ async def startup_event(): logger.error(f"Error during startup: {str(e)}") raise + + @app.get("/") async def root(): """Root endpoint.""" @@ -112,6 +92,8 @@ async def root(): logger.info(f"Root endpoint completed in {time.time() - start_time:.2f} seconds") return response + + @app.get("/health") async def health_check(): """Health check endpoint.""" @@ -121,6 +103,8 @@ async def health_check(): logger.info(f"Health check completed in {time.time() - start_time:.2f} seconds") return response + + @app.post("/analyze/keyword", response_model=AnalysisResponse) async def analyze_keyword(): """Run keyword-based salary transaction analysis.""" @@ -141,6 +125,8 @@ async def analyze_keyword(): logger.info(f"Keyword analysis endpoint failed after {time.time() - start_time:.2f} seconds") raise HTTPException(status_code=500, detail=str(e)) + + @app.post("/analyze/consistent-amount", response_model=AnalysisResponse) async def analyze_consistent_amount(): """Run consistent amount transaction analysis.""" @@ -161,6 +147,8 @@ async def analyze_consistent_amount(): logger.info(f"Consistent amount analysis endpoint failed after {time.time() - start_time:.2f} seconds") raise HTTPException(status_code=500, detail=str(e)) + + @app.post("/analyze/transaction-type", response_model=AnalysisResponse) async def analyze_transaction_type(): """Run transaction type analysis.""" @@ -181,6 +169,8 @@ async def analyze_transaction_type(): logger.info(f"Transaction type analysis endpoint failed after {time.time() - start_time:.2f} seconds") raise HTTPException(status_code=500, detail=str(e)) + + @app.post("/generate/reports", response_model=AnalysisResponse) async def generate_reports(background_tasks: BackgroundTasks): """Generate salary earner reports.""" @@ -205,6 +195,8 @@ async def generate_reports(background_tasks: BackgroundTasks): logger.info(f"Report generation endpoint failed after {time.time() - start_time:.2f} seconds") raise HTTPException(status_code=500, detail=str(e)) + + @app.post("/train/models", response_model=AnalysisResponse) async def train_models(): """Train salary prediction models.""" @@ -224,6 +216,8 @@ async def train_models(): logger.info(f"Model training endpoint failed after {time.time() - start_time:.2f} seconds") raise HTTPException(status_code=500, detail=str(e)) + + @app.get("/download/{report_type}") async def download_report(report_type: str): """Download generated reports.""" @@ -264,6 +258,8 @@ async def download_report(report_type: str): logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds") raise HTTPException(status_code=500, detail=str(e)) + + @app.post("/run/pipeline", response_model=AnalysisResponse) async def run_full_pipeline(): """Run the complete salary analytics pipeline.""" @@ -288,6 +284,8 @@ async def run_full_pipeline(): logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds") raise HTTPException(status_code=500, detail=str(e)) + + @app.post("/load-data") async def load_data(source: str = "db", file: Optional[UploadFile] = File(None)): """ @@ -351,6 +349,8 @@ async def get_file_if_csv(source: str, file: Optional[UploadFile] = File(None)): raise HTTPException(status_code=400, detail="File must be provided when loading from CSV") return file + + @app.post("/run/streaming-pipeline", response_model=List[BatchResponse]) async def run_streaming_pipeline( source: str = "db", diff --git a/app/commands/commands.py b/app/analytics/commands/commands.py similarity index 100% rename from app/commands/commands.py rename to app/analytics/commands/commands.py diff --git a/app/analytics/helpers/response_helpers.py b/app/analytics/helpers/response_helpers.py new file mode 100644 index 0000000..8485611 --- /dev/null +++ b/app/analytics/helpers/response_helpers.py @@ -0,0 +1,17 @@ +from typing import Optional, Dict, List, Union +from pydantic import BaseModel + + +class AnalysisResponse(BaseModel): + """Response model for analysis endpoints.""" + message: str + data: Optional[Dict] = None + file_path: Optional[str] = None + +class BatchResponse(BaseModel): + """Response model for batch processing.""" + batch_number: int + total_batches: int + processed_rows: int + results_path: str + message: str \ No newline at end of file diff --git a/app/analytics/middlewares/middleware.py b/app/analytics/middlewares/middleware.py new file mode 100644 index 0000000..afd95a3 --- /dev/null +++ b/app/analytics/middlewares/middleware.py @@ -0,0 +1,11 @@ +from fastapi.middleware.cors import CORSMiddleware +from fastapi import FastAPI + +def add_middlewares(app: FastAPI): + app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) diff --git a/app/config.py b/app/config.py index fccdb37..b2f671a 100644 --- a/app/config.py +++ b/app/config.py @@ -22,9 +22,10 @@ os.makedirs(PLOTS_DIR, exist_ok=True) os.makedirs(CSV_DIR, exist_ok=True) os.makedirs(MODEL_DIR, exist_ok=True) + # Database Configuration DB_CONFIG = { - "user": os.getenv("DB_USER"), # Default value as fallback + "user": os.getenv("DB_USER"), "password": os.getenv("DB_PASSWORD"), "name": os.getenv("DB_NAME"), "port": os.getenv("DB_PORT"), diff --git a/app/models/batch_results.py b/app/models/batch_results.py new file mode 100644 index 0000000..e65cce0 --- /dev/null +++ b/app/models/batch_results.py @@ -0,0 +1,97 @@ +from sqlalchemy import Column, Integer, String, DateTime, Numeric, Boolean, func +from sqlalchemy.orm import declarative_base, Session +from datetime import datetime +from app.utils.logger import logger +from app.extensions import db + + +class BatchResult(db.Model): + __tablename__ = "salary_analytics_batch_results" + + id = Column(Integer, primary_key=True, autoincrement=True) + batch_number = Column(Integer, nullable=False) + total_batches = Column(Integer, nullable=False) + processed_at = Column(DateTime, default=datetime.utcnow) + accountid = Column(String, nullable=False) + num_months = Column(Integer) + least_inflow_6m = Column(Numeric) + avg_monthly_salary = Column(Numeric) + estimated_next_amount = Column(Numeric) + estimated_next_date = Column(DateTime) + is_45day_salary = Column(Boolean, default=False) + is_2months_salary = Column(Boolean, default=False) + status = Column(String, default="success") + + + @classmethod + def save_batch(cls, session: Session, batch_number, total_batches, results_df, status="success"): + """Save batch results into DB using ORM bulk insert.""" + try: + results_df["batch_number"] = batch_number + results_df["total_batches"] = total_batches + results_df["processed_at"] = datetime.utcnow() + results_df["status"] = status + + # Normalize boolean columns + results_df["is_45day_salary"] = results_df.get("45daysalary", False) + results_df["is_2months_salary"] = results_df.get("2monthssalary", False) + + # Convert to list of ORM objects + records = [ + cls(**row) + for row in results_df.to_dict("records") + ] + + session.bulk_save_objects(records) + session.commit() + logger.info(f"Saved batch {batch_number} successfully.") + return True + except Exception as e: + session.rollback() + logger.error(f"Error saving batch {batch_number}: {str(e)}") + return False + + @classmethod + def get_batch_status(cls, session: Session, batch_number: int): + """Return summary info about one batch.""" + try: + result = ( + session.query( + cls.batch_number, + cls.total_batches, + cls.processed_at, + func.count().label("total_records"), + func.sum(func.case((cls.status == "success", 1), else_=0)).label("successful_records"), + func.sum(func.case((cls.status == "error", 1), else_=0)).label("failed_records"), + ) + .filter(cls.batch_number == batch_number) + .group_by(cls.batch_number, cls.total_batches, cls.processed_at) + .order_by(cls.processed_at.desc()) + .first() + ) + return dict(result._mapping) if result else None + except Exception as e: + logger.error(f"Error fetching batch {batch_number} status: {str(e)}") + return None + + @classmethod + def get_all_batches(cls, session: Session): + """Return summaries for all batches.""" + try: + results = ( + session.query( + cls.batch_number, + cls.total_batches, + cls.processed_at, + func.count().label("total_records"), + func.sum(func.case((cls.status == "success", 1), else_=0)).label("successful_records"), + func.sum(func.case((cls.status == "error", 1), else_=0)).label("failed_records"), + ) + .group_by(cls.batch_number, cls.total_batches, cls.processed_at) + .order_by(cls.batch_number) + .all() + ) + return [dict(r._mapping) for r in results] + except Exception as e: + logger.error(f"Error fetching all batches: {str(e)}") + return [] diff --git a/app/db_operations.py b/app/models/db_operations.py similarity index 98% rename from app/db_operations.py rename to app/models/db_operations.py index f3abf4d..0517dee 100644 --- a/app/db_operations.py +++ b/app/models/db_operations.py @@ -3,7 +3,7 @@ Database operations module for salary analytics. """ from sqlalchemy import text -from .config import BATCH_RESULTS_TABLE +from ..config import BATCH_RESULTS_TABLE from datetime import datetime from app.utils.logger import logger -- 2.34.1 From 4e221610882d7f3a41972e723a5ad9d80655f399 Mon Sep 17 00:00:00 2001 From: VivianDee <115420678+VivianDee@users.noreply.github.com> Date: Tue, 9 Sep 2025 10:57:47 +0100 Subject: [PATCH 03/19] [add]: refactor/clean up --- app/__init__.py | 26 --------- app/api/__init__.py | 21 +++++++ app/models/__init__.py | 4 ++ app/salary_analytics/__init__.py | 40 +++++++++++++ app/salary_analytics/core/state.py | 64 +++++++++++++++++++++ app/salary_analytics/events/lifecycle.py | 36 ++++++++++++ app/salary_analytics/helpers/data_checks.py | 12 ++++ app/salary_analytics/routes/base.py | 28 +++++++++ app/salary_analytics/routes/train.py | 33 +++++++++++ docker-compose.yml | 2 + 10 files changed, 240 insertions(+), 26 deletions(-) delete mode 100644 app/__init__.py create mode 100644 app/api/__init__.py create mode 100644 app/models/__init__.py create mode 100644 app/salary_analytics/__init__.py create mode 100644 app/salary_analytics/core/state.py create mode 100644 app/salary_analytics/events/lifecycle.py create mode 100644 app/salary_analytics/helpers/data_checks.py create mode 100644 app/salary_analytics/routes/base.py create mode 100644 app/salary_analytics/routes/train.py diff --git a/app/__init__.py b/app/__init__.py deleted file mode 100644 index 32f1b84..0000000 --- a/app/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -from flask import Flask -import os -from .extensions import db, migrate - - -""" -Salary Analytics Package -A package for analyzing and predicting salary patterns from transaction data. -""" - -__version__ = "0.1.0" - - -def create_app(): - app = Flask(__name__) - app.config.from_object('salary_analytics.config') - - # Initialize extensions - db.init_app(app) - migrate.init_app(app, db) - - # Register blueprints or CLI commands here if needed - from app.analytics.commands import commands - app.cli.add_command(commands.upload_xls_cli) - - return app \ No newline at end of file diff --git a/app/api/__init__.py b/app/api/__init__.py new file mode 100644 index 0000000..dde0fd0 --- /dev/null +++ b/app/api/__init__.py @@ -0,0 +1,21 @@ +from flask import Flask +import os +from app.extensions import db, migrate + + + +def create_app(): + app = Flask(__name__) + + # Load configuration from config.py + app.config.from_object('app.config') + + # Initialize extensions + db.init_app(app) + migrate.init_app(app, db) + + # Register blueprints or CLI commands here if needed + from app.api.commands import commands + app.cli.add_command(commands.upload_xls_cli) + + return app \ No newline at end of file diff --git a/app/models/__init__.py b/app/models/__init__.py new file mode 100644 index 0000000..62baa90 --- /dev/null +++ b/app/models/__init__.py @@ -0,0 +1,4 @@ +from .raw_transaction import RawTransaction + + +__all__ = ['RawTransaction'] \ No newline at end of file diff --git a/app/salary_analytics/__init__.py b/app/salary_analytics/__init__.py new file mode 100644 index 0000000..babbabb --- /dev/null +++ b/app/salary_analytics/__init__.py @@ -0,0 +1,40 @@ +from fastapi import FastAPI +from app.salary_analytics.routes import analysis, reports, pipeline, load, base, train +from app.salary_analytics.middlewares.middleware import add_middlewares +from app.salary_analytics.events.lifecycle import register_events +from app.utils.logger import logger +import socket + +""" +Salary Analytics Package +A package for analyzing and predicting salary patterns from transaction data. +""" + +__version__ = "0.1.0" + + +def create_app() -> FastAPI: + app = FastAPI( + title="Salary Analytics API", + description="API for analyzing and predicting salary patterns from transaction data", + version="1.0.0" + ) + + # Middlewares + add_middlewares(app) + + # Events + register_events(app) + + # Routers + app.include_router(base.router, tags=["Base"]) + app.include_router(analysis.router, prefix="/analyze", tags=["Analysis"]) + app.include_router(reports.router, tags=["Reports"]) + app.include_router(pipeline.router, tags=["Pipeline"]) + app.include_router(load.router, tags=["Data"]) + app.include_router(train.router, tags=["Model Training"]) + + + return app + +app = create_app() diff --git a/app/salary_analytics/core/state.py b/app/salary_analytics/core/state.py new file mode 100644 index 0000000..4e59c2f --- /dev/null +++ b/app/salary_analytics/core/state.py @@ -0,0 +1,64 @@ +from app.salary_analytics.services.main import SalaryAnalyticsPipeline +from app.salary_analytics.services.data_loader import DataLoader + + +class GlobalState: + def __init__(self): + self._pipeline = None + self._data_loader = None + self.df = None + self.salary_predictor = None + self.salary_earner_analyzer = None + + # ---- Pipeline ---- + @property + def pipeline(self): + if self._pipeline is None: + self._pipeline = SalaryAnalyticsPipeline() + return self._pipeline + + @pipeline.setter + def pipeline(self, value): + self._pipeline = value + + # ---- Data Loader ---- + @property + def data_loader(self): + if self._data_loader is None: + self._data_loader = DataLoader() + return self._data_loader + + @data_loader.setter + def data_loader(self, value): + self._data_loader = value + + # ---- DataFrame ---- + @property + def df(self): + return self._df + + @df.setter + def df(self, value): + self._df = value + + # ---- Salary Predictor ---- + @property + def salary_predictor(self): + return self._salary_predictor + + @salary_predictor.setter + def salary_predictor(self, value): + self._salary_predictor = value + + # ---- Salary Earner Analyzer ---- + @property + def salary_earner_analyzer(self): + return self._salary_earner_analyzer + + @salary_earner_analyzer.setter + def salary_earner_analyzer(self, value): + self._salary_earner_analyzer = value + + +state = GlobalState() + diff --git a/app/salary_analytics/events/lifecycle.py b/app/salary_analytics/events/lifecycle.py new file mode 100644 index 0000000..635ab73 --- /dev/null +++ b/app/salary_analytics/events/lifecycle.py @@ -0,0 +1,36 @@ +import socket +from fastapi import FastAPI +from app.salary_analytics.integrations.salary_detect import SalaryDetect +from app.utils.logger import logger + +salary_detect = SalaryDetect() + + +def register_events(app: FastAPI): + @app.on_event("startup") + async def startup_event(): + """Initialize the pipeline on startup.""" + try: + logger.info("Initializing pipeline...") + + # Start autonomous salary detection loop + salary_detect.start() + logger.info("Started autonomous salary detection loop.") + + # Print network information + hostname = socket.gethostname() + ip_address = socket.gethostbyname(hostname) + logger.info(f"Server running on hostname: {hostname}") + logger.info(f"Server IP address: {ip_address}") + logger.info(f"Server is accessible at:") + logger.info(f"- http://localhost:8000") + logger.info(f"- http://127.0.0.1:8000") + logger.info(f"- http://{ip_address}:8000") + logger.info("Pipeline initialized successfully") + except Exception as e: + logger.error(f"Error during startup: {str(e)}") + raise + + @app.on_event("shutdown") + async def shutdown_event(): + logger.info("Shutting down Salary Analytics API...") diff --git a/app/salary_analytics/helpers/data_checks.py b/app/salary_analytics/helpers/data_checks.py new file mode 100644 index 0000000..47caa4a --- /dev/null +++ b/app/salary_analytics/helpers/data_checks.py @@ -0,0 +1,12 @@ +from fastapi import HTTPException +from app.salary_analytics.core.state import state + + +def check_data_loaded(): + """Raise HTTP 400 if no data is loaded into the pipeline.""" + if state.pipeline.df is None: + raise HTTPException( + status_code=400, + detail="No data loaded. Please load data first using the /load-data endpoint." + ) + return True diff --git a/app/salary_analytics/routes/base.py b/app/salary_analytics/routes/base.py new file mode 100644 index 0000000..c1319fb --- /dev/null +++ b/app/salary_analytics/routes/base.py @@ -0,0 +1,28 @@ +from fastapi import APIRouter +from app.utils.logger import logger +import time + + +router = APIRouter() + +@router.get("/") +async def root(): + """Root endpoint.""" + start_time = time.time() + logger.info("Root endpoint accessed") + response = {"message": "Welcome to Salary Analytics API"} + logger.info(f"Root endpoint completed in {time.time() - start_time:.2f} seconds") + return response + + +@router.get("/health") +async def health_check(): + """Health check endpoint.""" + start_time = time.time() + logger.info("Health check endpoint accessed") + response = {"status": "healthy"} + logger.info(f"Health check completed in {time.time() - start_time:.2f} seconds") + return response + + + diff --git a/app/salary_analytics/routes/train.py b/app/salary_analytics/routes/train.py new file mode 100644 index 0000000..94320cb --- /dev/null +++ b/app/salary_analytics/routes/train.py @@ -0,0 +1,33 @@ +import time +import logging +from fastapi import APIRouter, HTTPException +from app.salary_analytics.services.main import SalaryAnalyticsPipeline +from app.salary_analytics.helpers.data_checks import check_data_loaded +from app.salary_analytics.helpers.response_helpers import AnalysisResponse +from app.salary_analytics.core.state import state +from app.utils.logger import logger + + + +router = APIRouter() + + +@router.post("/train/models", response_model=AnalysisResponse) +async def train_models(): + """Train salary prediction models.""" + start_time = time.time() + try: + check_data_loaded() + logger.info("Starting model training...") + state.pipeline.train_salary_prediction_models() + logger.info("Models trained successfully") + response = AnalysisResponse( + message="Models trained successfully" + ) + logger.info(f"Model training endpoint completed in {time.time() - start_time:.2f} seconds") + return response + except Exception as e: + logger.error(f"Error in model training: {str(e)}") + logger.info(f"Model training endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=500, detail=str(e)) + diff --git a/docker-compose.yml b/docker-compose.yml index 2f701b2..9205cb3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,8 @@ services: digifi-analytics: build: . + env_file: + - .env ports: - "${APP_PORT:-4800}:8000" environment: -- 2.34.1 From 80cc543cdd9101ddd102a0440a52e6ce4424ce35 Mon Sep 17 00:00:00 2001 From: VivianDee <115420678+VivianDee@users.noreply.github.com> Date: Tue, 9 Sep 2025 10:58:08 +0100 Subject: [PATCH 04/19] [add]: refactor/clean up --- app/analytics/api.py | 600 ----------------- app/api.py | 601 ++++++++++++++++++ app/{analytics => api}/commands/commands.py | 0 .../helpers/response_helpers.py | 0 .../integrations/rac_check.py | 0 .../integrations/salary_detect.py | 2 +- .../middlewares/middleware.py | 0 app/salary_analytics/routes/analysis.py | 75 +++ app/salary_analytics/routes/load.py | 74 +++ app/salary_analytics/routes/pipeline.py | 292 +++++++++ app/salary_analytics/routes/reports.py | 78 +++ .../services/__init__.py | 0 .../services/consistent_amount_analyzer.py | 2 +- .../services/data_loader.py | 2 +- .../services/keyword_analyzer.py | 2 +- .../services/main.py | 0 .../services/salary_earner_analyzer.py | 2 +- .../services/salary_predictor.py | 2 +- .../services/transaction_type_analyzer.py | 2 +- 19 files changed, 1127 insertions(+), 607 deletions(-) delete mode 100644 app/analytics/api.py create mode 100644 app/api.py rename app/{analytics => api}/commands/commands.py (100%) rename app/{analytics => salary_analytics}/helpers/response_helpers.py (100%) rename app/{analytics => salary_analytics}/integrations/rac_check.py (100%) rename app/{analytics => salary_analytics}/integrations/salary_detect.py (92%) rename app/{analytics => salary_analytics}/middlewares/middleware.py (100%) create mode 100644 app/salary_analytics/routes/analysis.py create mode 100644 app/salary_analytics/routes/load.py create mode 100644 app/salary_analytics/routes/pipeline.py create mode 100644 app/salary_analytics/routes/reports.py rename app/{analytics => salary_analytics}/services/__init__.py (100%) rename app/{analytics => salary_analytics}/services/consistent_amount_analyzer.py (97%) rename app/{analytics => salary_analytics}/services/data_loader.py (98%) rename app/{analytics => salary_analytics}/services/keyword_analyzer.py (96%) rename app/{analytics => salary_analytics}/services/main.py (100%) rename app/{analytics => salary_analytics}/services/salary_earner_analyzer.py (99%) rename app/{analytics => salary_analytics}/services/salary_predictor.py (99%) rename app/{analytics => salary_analytics}/services/transaction_type_analyzer.py (93%) diff --git a/app/analytics/api.py b/app/analytics/api.py deleted file mode 100644 index ab069e3..0000000 --- a/app/analytics/api.py +++ /dev/null @@ -1,600 +0,0 @@ -""" -FastAPI application for salary analytics. -""" - -from fastapi import FastAPI, HTTPException, BackgroundTasks, UploadFile, File, Depends -from fastapi.responses import FileResponse -import os -import socket -from typing import Optional, List, Union -import pandas as pd -import tempfile -from datetime import datetime -from sqlalchemy import text -import warnings -import time -from app.analytics.services.main import SalaryAnalyticsPipeline -from app.config import OUTPUT_PATHS, TABLE_NAME -from app.analytics.services.data_loader import DataLoader -from app.analytics.middlewares.middleware import add_middlewares -from app.models.db_operations import DatabaseOperations -from app.analytics.integrations.salary_detect import SalaryDetect -from app.utils.logger import logger -from app.analytics.helpers.response_helpers import AnalysisResponse, BatchResponse - - -# Suppress warnings -warnings.filterwarnings('ignore', category=RuntimeWarning, module='numpy') -pd.options.mode.chained_assignment = None - -app = FastAPI( - title="Salary Analytics API", - description="API for analyzing and predicting salary patterns from transaction data", - version="1.0.0" -) - -# Add CORS middleware -add_middlewares(app) - -# Global pipeline instance -pipeline = SalaryAnalyticsPipeline() - -# Global variables to store loaded data and models -data_loader = None -df = None -salary_predictor = None -salary_earner_analyzer = None - -salary_detect = SalaryDetect() - - - -def check_data_loaded(): - """Check if data is loaded before running analytics.""" - if pipeline.df is None: - raise HTTPException( - status_code=400, - detail="No data loaded. Please load data first using the /load-data endpoint." - ) - -@app.on_event("startup") -async def startup_event(): - """Initialize the pipeline on startup.""" - try: - logger.info("Initializing pipeline...") - - # Start autonomous salary detection loop - salary_detect.start() - logger.info("Started autonomous salary detection loop.") - - # Print network information - hostname = socket.gethostname() - ip_address = socket.gethostbyname(hostname) - logger.info(f"Server running on hostname: {hostname}") - logger.info(f"Server IP address: {ip_address}") - logger.info(f"Server is accessible at:") - logger.info(f"- http://localhost:8000") - logger.info(f"- http://127.0.0.1:8000") - logger.info(f"- http://{ip_address}:8000") - logger.info("Pipeline initialized successfully") - except Exception as e: - logger.error(f"Error during startup: {str(e)}") - raise - - - -@app.get("/") -async def root(): - """Root endpoint.""" - start_time = time.time() - logger.info("Root endpoint accessed") - response = {"message": "Welcome to Salary Analytics API"} - logger.info(f"Root endpoint completed in {time.time() - start_time:.2f} seconds") - return response - - - -@app.get("/health") -async def health_check(): - """Health check endpoint.""" - start_time = time.time() - logger.info("Health check endpoint accessed") - response = {"status": "healthy"} - logger.info(f"Health check completed in {time.time() - start_time:.2f} seconds") - return response - - - -@app.post("/analyze/keyword", response_model=AnalysisResponse) -async def analyze_keyword(): - """Run keyword-based salary transaction analysis.""" - start_time = time.time() - try: - check_data_loaded() - logger.info("Starting keyword analysis...") - data = pipeline.run_keyword_analysis() - logger.info(f"Keyword analysis completed. Found {len(data)} matches") - response = AnalysisResponse( - message="Keyword analysis completed successfully", - data={"count": len(data)} - ) - logger.info(f"Keyword analysis endpoint completed in {time.time() - start_time:.2f} seconds") - return response - except Exception as e: - logger.error(f"Error in keyword analysis: {str(e)}") - logger.info(f"Keyword analysis endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=500, detail=str(e)) - - - -@app.post("/analyze/consistent-amount", response_model=AnalysisResponse) -async def analyze_consistent_amount(): - """Run consistent amount transaction analysis.""" - start_time = time.time() - try: - check_data_loaded() - logger.info("Starting consistent amount analysis...") - data = pipeline.run_consistent_amount_analysis() - logger.info(f"Consistent amount analysis completed. Found {len(data)} matches") - response = AnalysisResponse( - message="Consistent amount analysis completed successfully", - data={"count": len(data)} - ) - logger.info(f"Consistent amount analysis endpoint completed in {time.time() - start_time:.2f} seconds") - return response - except Exception as e: - logger.error(f"Error in consistent amount analysis: {str(e)}") - logger.info(f"Consistent amount analysis endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=500, detail=str(e)) - - - -@app.post("/analyze/transaction-type", response_model=AnalysisResponse) -async def analyze_transaction_type(): - """Run transaction type analysis.""" - start_time = time.time() - try: - check_data_loaded() - logger.info("Starting transaction type analysis...") - data = pipeline.run_transaction_type_analysis() - logger.info(f"Transaction type analysis completed. Found {len(data)} matches") - response = AnalysisResponse( - message="Transaction type analysis completed successfully", - data={"count": len(data)} - ) - logger.info(f"Transaction type analysis endpoint completed in {time.time() - start_time:.2f} seconds") - return response - except Exception as e: - logger.error(f"Error in transaction type analysis: {str(e)}") - logger.info(f"Transaction type analysis endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=500, detail=str(e)) - - - -@app.post("/generate/reports", response_model=AnalysisResponse) -async def generate_reports(background_tasks: BackgroundTasks): - """Generate salary earner reports.""" - start_time = time.time() - try: - check_data_loaded() - logger.info("Starting report generation...") - reports = pipeline.generate_salary_earner_reports() - logger.info("Reports generated successfully") - response = AnalysisResponse( - message="Reports generated successfully", - data={ - "verified_salary_earners": len(reports['final_table']), - "likely_salary_earners": len(reports['likely_salary_earner']), - "high_earners": reports['total_high_earners'] - } - ) - logger.info(f"Report generation endpoint completed in {time.time() - start_time:.2f} seconds") - return response - except Exception as e: - logger.error(f"Error in report generation: {str(e)}") - logger.info(f"Report generation endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=500, detail=str(e)) - - - -@app.post("/train/models", response_model=AnalysisResponse) -async def train_models(): - """Train salary prediction models.""" - start_time = time.time() - try: - check_data_loaded() - logger.info("Starting model training...") - pipeline.train_salary_prediction_models() - logger.info("Models trained successfully") - response = AnalysisResponse( - message="Models trained successfully" - ) - logger.info(f"Model training endpoint completed in {time.time() - start_time:.2f} seconds") - return response - except Exception as e: - logger.error(f"Error in model training: {str(e)}") - logger.info(f"Model training endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=500, detail=str(e)) - - - -@app.get("/download/{report_type}") -async def download_report(report_type: str): - """Download generated reports.""" - start_time = time.time() - try: - check_data_loaded() - logger.info(f"Attempting to download report: {report_type}") - file_paths = { - "high_earners": OUTPUT_PATHS["high_earner_details"], - "likely_earners": OUTPUT_PATHS["likely_salary_earner"], - "final_table": OUTPUT_PATHS["final_table"], - "consistent_plot": OUTPUT_PATHS["consistent_earners_plot"], - "inconsistent_plot": OUTPUT_PATHS["inconsistent_earners_plot"], - "hypothesis_plot": OUTPUT_PATHS["hypothesis_overlap_plot"] - } - - if report_type not in file_paths: - logger.error(f"Report type not found: {report_type}") - logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=404, detail="Report type not found") - - file_path = file_paths[report_type] - if not os.path.exists(file_path): - logger.error(f"Report file not found: {file_path}") - logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=404, detail="Report file not found") - - logger.info(f"Successfully found report file: {file_path}") - response = FileResponse( - path=file_path, - filename=os.path.basename(file_path), - media_type="application/octet-stream" - ) - logger.info(f"Download endpoint completed in {time.time() - start_time:.2f} seconds") - return response - except Exception as e: - logger.error(f"Error downloading report: {str(e)}") - logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=500, detail=str(e)) - - - -@app.post("/run/pipeline", response_model=AnalysisResponse) -async def run_full_pipeline(): - """Run the complete salary analytics pipeline.""" - start_time = time.time() - try: - check_data_loaded() - logger.info("Starting full pipeline...") - success = pipeline.run_full_pipeline() - if not success: - logger.error("Pipeline failed") - logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=500, detail="Pipeline failed") - - logger.info("Pipeline completed successfully") - response = AnalysisResponse( - message="Pipeline completed successfully" - ) - logger.info(f"Full pipeline endpoint completed in {time.time() - start_time:.2f} seconds") - return response - except Exception as e: - logger.error(f"Error in pipeline: {str(e)}") - logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=500, detail=str(e)) - - - -@app.post("/load-data") -async def load_data(source: str = "db", file: Optional[UploadFile] = File(None)): - """ - Load data from either database or CSV file. - - Args: - source (str): Source of data ('db' or 'csv') - file (UploadFile, optional): CSV file to load (required if source is 'csv') - - Returns: - dict: Status of data loading - """ - start_time = time.time() - try: - if source not in ['db', 'csv']: - logger.error(f"Invalid source: {source}") - logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'") - - if source == 'csv' and not file: - logger.error("No file provided for CSV source") - logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=400, detail="File must be provided when loading from CSV") - - if source == 'csv': - # Save uploaded file temporarily - with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file: - content = await file.read() - temp_file.write(content) - temp_file_path = temp_file.name - - try: - success = pipeline.load_data(source='csv', file_path=temp_file_path) - finally: - # Clean up temporary file - os.unlink(temp_file_path) - else: - success = pipeline.load_data(source='db') - - if not success: - logger.error("Failed to load data") - logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=500, detail="Failed to load data") - - response = { - "status": "success", - "message": f"Successfully loaded {len(pipeline.df)} rows of data", - "columns": pipeline.df.columns.tolist(), - "row_count": len(pipeline.df) - } - logger.info(f"Load data endpoint completed in {time.time() - start_time:.2f} seconds") - return response - except Exception as e: - logger.error(f"Error loading data: {str(e)}") - logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=500, detail=str(e)) - -async def get_file_if_csv(source: str, file: Optional[UploadFile] = File(None)): - """Dependency to handle file upload only when source is csv.""" - if source == 'csv' and not file: - raise HTTPException(status_code=400, detail="File must be provided when loading from CSV") - return file - - - -@app.post("/run/streaming-pipeline", response_model=List[BatchResponse]) -async def run_streaming_pipeline( - source: str = "db", - batch_size: int = 10000, - file: Optional[Union[UploadFile, str]] = File(None) -): - """ - Run the complete salary analytics pipeline in batches. - - Args: - source (str): Source of data ('db' or 'csv') - batch_size (int): Number of rows to process in each batch - file (UploadFile, optional): CSV file to load (required if source is 'csv') - - Returns: - List[BatchResponse]: List of responses for each batch processed - """ - start_time = time.time() - try: - if source not in ['db', 'csv']: - logger.error(f"Invalid source: {source}") - logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'") - - if source == 'csv' and not file: - logger.error("No file provided for CSV source") - logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=400, detail="File must be provided when loading from CSV") - - # Initialize data loader - data_loader = DataLoader() - data_loader.chunk_size = batch_size - - # Create output directory for batch results - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - batch_output_dir = os.path.join(os.path.dirname(OUTPUT_PATHS['final_table']), f"batch_results_{timestamp}") - os.makedirs(batch_output_dir, exist_ok=True) - - # Initialize database operations - if not data_loader.connect(): - logger.error("Failed to connect to database") - logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=500, detail="Failed to connect to database") - - db_ops = DatabaseOperations(data_loader.engine) - if not db_ops.create_batch_results_table(): - logger.error("Failed to create batch results table") - logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=500, detail="Failed to create batch results table") - - responses = [] - batch_number = 0 - batch_start_time = time.time() - - def preprocess_chunk(chunk): - """Preprocess a chunk of data with the same logic as DataLoader.""" - # Convert dates - chunk['trx_start_date'] = pd.to_datetime(chunk['trx_start_date']) - chunk['trx_end_date'] = pd.to_datetime(chunk['trx_end_date']) - - # Rename columns - chunk = chunk.rename(columns={ - 'd1': 'trx_type', - 'd2': 'trx_subtype', - 'd3': 'initiated_by', - 'd4': 'customer_id' - }) - - chunk = chunk.dropna() - - return chunk - - if source == 'csv': - # Save uploaded file temporarily - with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file: - content = await file.read() - temp_file.write(content) - temp_file_path = temp_file.name - - try: - # Process CSV in chunks - for chunk in pd.read_csv(temp_file_path, chunksize=batch_size): - batch_number += 1 - logger.info(f"Processing batch {batch_number}") - - # Preprocess chunk - chunk = preprocess_chunk(chunk) - - # Run pipeline on chunk - pipeline = SalaryAnalyticsPipeline() - pipeline.df = chunk - - try: - batch_start_time = time.time() - # Run analyses - pipeline.run_keyword_analysis() - pipeline.run_consistent_amount_analysis() - pipeline.run_transaction_type_analysis() - - # Generate reports - reports = pipeline.generate_salary_earner_reports() - - # Add batch metadata to results - results_df = reports['final_table'].copy() - results_df['batch_number'] = batch_number - results_df['total_batches'] = -1 # Unknown for CSV - results_df['processed_at'] = datetime.now() - - # Save batch results to CSV - batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv") - results_df.to_csv(batch_results_path, index=False) - - # Save to database - db_ops.save_batch_to_db( - batch_number=batch_number, - total_batches=-1, # Unknown for CSV - results_df=results_df, - status="success" - ) - - logger.info(f"Batch {batch_number} processed in {time.time() - batch_start_time:.2f} seconds") - - responses.append(BatchResponse( - batch_number=batch_number, - total_batches=-1, # Unknown for CSV - processed_rows=len(chunk), - results_path=batch_results_path, - message=f"Successfully processed batch {batch_number}" - )) - except Exception as e: - error_message = str(e) - logger.error(f"Error processing batch {batch_number}: {error_message}") - - # Save error to database - db_ops.save_batch_to_db( - batch_number=batch_number, - total_batches=-1, - results_df=pd.DataFrame(), # Empty DataFrame for error case - status="error" - ) - - responses.append(BatchResponse( - batch_number=batch_number, - total_batches=-1, - processed_rows=len(chunk), - results_path="", - message=f"Error processing batch {batch_number}: {error_message}" - )) - finally: - # Clean up temporary file - os.unlink(temp_file_path) - else: - # Process database in chunks - if not data_loader.connect(): - raise HTTPException(status_code=500, detail="Failed to connect to database") - - # Get total row count - with data_loader.engine.connect() as conn: - count_query = text(f"SELECT COUNT(*) FROM {TABLE_NAME}") - total_rows = conn.execute(count_query).scalar() - - total_batches = (total_rows + batch_size - 1) // batch_size - offset = 0 - - while offset < total_rows: - batch_number += 1 - logger.info(f"Processing batch {batch_number} of {total_batches}") - - # Load chunk from database - query = f"SELECT * FROM {TABLE_NAME} LIMIT {batch_size} OFFSET {offset}" - chunk = pd.read_sql(query, data_loader.engine) - - if chunk.empty: - break - - # Preprocess chunk - chunk = preprocess_chunk(chunk) - - # Run pipeline on chunk - pipeline = SalaryAnalyticsPipeline() - pipeline.df = chunk - - try: - batch_start_time = time.time() - # Run analyses - pipeline.run_keyword_analysis() - pipeline.run_consistent_amount_analysis() - pipeline.run_transaction_type_analysis() - - # Generate reports - reports = pipeline.generate_salary_earner_reports() - - # Add batch metadata to results - results_df = reports['final_table'].copy() - results_df['batch_number'] = batch_number - results_df['total_batches'] = total_batches - results_df['processed_at'] = datetime.now() - - # Save batch results to CSV - batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv") - results_df.to_csv(batch_results_path, index=False) - - # Save to database - db_ops.save_batch_to_db( - batch_number=batch_number, - total_batches=total_batches, - results_df=results_df, - status="success" - ) - - logger.info(f"Batch {batch_number} of {total_batches} processed in {time.time() - batch_start_time:.2f} seconds") - - responses.append(BatchResponse( - batch_number=batch_number, - total_batches=total_batches, - processed_rows=len(chunk), - results_path=batch_results_path, - message=f"Successfully processed batch {batch_number} of {total_batches}" - )) - except Exception as e: - error_message = str(e) - logger.error(f"Error processing batch {batch_number}: {error_message}") - - # Save error to database - db_ops.save_batch_to_db( - batch_number=batch_number, - total_batches=total_batches, - results_df=pd.DataFrame(), # Empty DataFrame for error case - status="error" - ) - - responses.append(BatchResponse( - batch_number=batch_number, - total_batches=total_batches, - processed_rows=len(chunk), - results_path="", - message=f"Error processing batch {batch_number}: {error_message}" - )) - - offset += batch_size - - logger.info(f"Streaming pipeline endpoint completed in {time.time() - start_time:.2f} seconds") - return responses - except Exception as e: - logger.error(f"Error in streaming pipeline: {str(e)}") - logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") - raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file diff --git a/app/api.py b/app/api.py new file mode 100644 index 0000000..3a65527 --- /dev/null +++ b/app/api.py @@ -0,0 +1,601 @@ +""" +FastAPI application for salary analytics. +""" + +from fastapi import FastAPI, HTTPException, BackgroundTasks, UploadFile, File, Depends +from fastapi.responses import FileResponse +import os +import socket +from typing import Optional, List, Union +import pandas as pd +import tempfile +from datetime import datetime +from sqlalchemy import text +import warnings +import time +from app.salary_analytics.services.main import SalaryAnalyticsPipeline +from app.config import OUTPUT_PATHS, TABLE_NAME +from app.salary_analytics.services.data_loader import DataLoader +from app.salary_analytics.middlewares.middleware import add_middlewares +from app.models.db_operations import DatabaseOperations +from app.salary_analytics.integrations.salary_detect import SalaryDetect +from app.utils.logger import logger +from app.salary_analytics.helpers.response_helpers import AnalysisResponse, BatchResponse + + +# Suppress warnings +warnings.filterwarnings('ignore', category=RuntimeWarning, module='numpy') +pd.options.mode.chained_assignment = None + +app = FastAPI( + title="Salary Analytics API", + description="API for analyzing and predicting salary patterns from transaction data", + version="1.0.0" +) + +# Add CORS middleware +add_middlewares(app) + +# Global pipeline instance +pipeline = SalaryAnalyticsPipeline() + +# Global variables to store loaded data and models +data_loader = None +df = None +salary_predictor = None +salary_earner_analyzer = None + +# salary_detect = SalaryDetect() + + + +# def check_data_loaded(): +# """Check if data is loaded before running analytics.""" +# if pipeline.df is None: +# raise HTTPException( +# status_code=400, +# detail="No data loaded. Please load data first using the /load-data endpoint." +# ) + +# @app.on_event("startup") +# async def startup_event(): +# """Initialize the pipeline on startup.""" +# try: +# logger.info("Initializing pipeline...") + +# # Start autonomous salary detection loop +# salary_detect.start() +# logger.info("Started autonomous salary detection loop.") + +# # Print network information +# hostname = socket.gethostname() +# ip_address = socket.gethostbyname(hostname) +# logger.info(f"Server running on hostname: {hostname}") +# logger.info(f"Server IP address: {ip_address}") +# logger.info(f"Server is accessible at:") +# logger.info(f"- http://localhost:8000") +# logger.info(f"- http://127.0.0.1:8000") +# logger.info(f"- http://{ip_address}:8000") +# logger.info("Pipeline initialized successfully") +# except Exception as e: +# logger.error(f"Error during startup: {str(e)}") +# raise + + + +# @app.get("/") +# async def root(): +# """Root endpoint.""" +# start_time = time.time() +# logger.info("Root endpoint accessed") +# response = {"message": "Welcome to Salary Analytics API"} +# logger.info(f"Root endpoint completed in {time.time() - start_time:.2f} seconds") +# return response + + + +# @app.get("/health") +# async def health_check(): +# """Health check endpoint.""" +# start_time = time.time() +# logger.info("Health check endpoint accessed") +# response = {"status": "healthy"} +# logger.info(f"Health check completed in {time.time() - start_time:.2f} seconds") +# return response + + + +# @app.post("/analyze/keyword", response_model=AnalysisResponse) +# async def analyze_keyword(): +# """Run keyword-based salary transaction analysis.""" +# start_time = time.time() +# try: +# check_data_loaded() +# logger.info("Starting keyword analysis...") +# data = pipeline.run_keyword_analysis() +# logger.info(f"Keyword analysis completed. Found {len(data)} matches") +# response = AnalysisResponse( +# message="Keyword analysis completed successfully", +# data={"count": len(data)} +# ) +# logger.info(f"Keyword analysis endpoint completed in {time.time() - start_time:.2f} seconds") +# return response +# except Exception as e: +# logger.error(f"Error in keyword analysis: {str(e)}") +# logger.info(f"Keyword analysis endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=500, detail=str(e)) + + + +# @app.post("/analyze/consistent-amount", response_model=AnalysisResponse) +# async def analyze_consistent_amount(): +# """Run consistent amount transaction analysis.""" +# start_time = time.time() +# try: +# check_data_loaded() +# logger.info("Starting consistent amount analysis...") +# data = pipeline.run_consistent_amount_analysis() +# logger.info(f"Consistent amount analysis completed. Found {len(data)} matches") +# response = AnalysisResponse( +# message="Consistent amount analysis completed successfully", +# data={"count": len(data)} +# ) +# logger.info(f"Consistent amount analysis endpoint completed in {time.time() - start_time:.2f} seconds") +# return response +# except Exception as e: +# logger.error(f"Error in consistent amount analysis: {str(e)}") +# logger.info(f"Consistent amount analysis endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=500, detail=str(e)) + + + +# @app.post("/analyze/transaction-type", response_model=AnalysisResponse) +# async def analyze_transaction_type(): +# """Run transaction type analysis.""" +# start_time = time.time() +# try: +# check_data_loaded() +# logger.info("Starting transaction type analysis...") +# data = pipeline.run_transaction_type_analysis() +# logger.info(f"Transaction type analysis completed. Found {len(data)} matches") +# response = AnalysisResponse( +# message="Transaction type analysis completed successfully", +# data={"count": len(data)} +# ) +# logger.info(f"Transaction type analysis endpoint completed in {time.time() - start_time:.2f} seconds") +# return response +# except Exception as e: +# logger.error(f"Error in transaction type analysis: {str(e)}") +# logger.info(f"Transaction type analysis endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=500, detail=str(e)) + + + +# @app.post("/generate/reports", response_model=AnalysisResponse) +# async def generate_reports(background_tasks: BackgroundTasks): +# """Generate salary earner reports.""" +# start_time = time.time() +# try: +# check_data_loaded() +# logger.info("Starting report generation...") +# reports = pipeline.generate_salary_earner_reports() +# logger.info("Reports generated successfully") +# response = AnalysisResponse( +# message="Reports generated successfully", +# data={ +# "verified_salary_earners": len(reports['final_table']), +# "likely_salary_earners": len(reports['likely_salary_earner']), +# "high_earners": reports['total_high_earners'] +# } +# ) +# logger.info(f"Report generation endpoint completed in {time.time() - start_time:.2f} seconds") +# return response +# except Exception as e: +# logger.error(f"Error in report generation: {str(e)}") +# logger.info(f"Report generation endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=500, detail=str(e)) + + + +# @app.post("/train/models", response_model=AnalysisResponse) +# async def train_models(): +# """Train salary prediction models.""" +# start_time = time.time() +# try: +# check_data_loaded() +# logger.info("Starting model training...") +# pipeline.train_salary_prediction_models() +# logger.info("Models trained successfully") +# response = AnalysisResponse( +# message="Models trained successfully" +# ) +# logger.info(f"Model training endpoint completed in {time.time() - start_time:.2f} seconds") +# return response +# except Exception as e: +# logger.error(f"Error in model training: {str(e)}") +# logger.info(f"Model training endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=500, detail=str(e)) + + + +# @app.get("/download/{report_type}") +# async def download_report(report_type: str): +# """Download generated reports.""" +# start_time = time.time() +# try: +# check_data_loaded() +# logger.info(f"Attempting to download report: {report_type}") +# file_paths = { +# "high_earners": OUTPUT_PATHS["high_earner_details"], +# "likely_earners": OUTPUT_PATHS["likely_salary_earner"], +# "final_table": OUTPUT_PATHS["final_table"], +# "consistent_plot": OUTPUT_PATHS["consistent_earners_plot"], +# "inconsistent_plot": OUTPUT_PATHS["inconsistent_earners_plot"], +# "hypothesis_plot": OUTPUT_PATHS["hypothesis_overlap_plot"] +# } + +# if report_type not in file_paths: +# logger.error(f"Report type not found: {report_type}") +# logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=404, detail="Report type not found") + +# file_path = file_paths[report_type] +# if not os.path.exists(file_path): +# logger.error(f"Report file not found: {file_path}") +# logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=404, detail="Report file not found") + +# logger.info(f"Successfully found report file: {file_path}") +# response = FileResponse( +# path=file_path, +# filename=os.path.basename(file_path), +# media_type="application/octet-stream" +# ) +# logger.info(f"Download endpoint completed in {time.time() - start_time:.2f} seconds") +# return response +# except Exception as e: +# logger.error(f"Error downloading report: {str(e)}") +# logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=500, detail=str(e)) + + + +# @app.post("/run/pipeline", response_model=AnalysisResponse) +# async def run_full_pipeline(): +# """Run the complete salary analytics pipeline.""" +# start_time = time.time() +# try: +# check_data_loaded() +# logger.info("Starting full pipeline...") +# success = pipeline.run_full_pipeline() +# if not success: +# logger.error("Pipeline failed") +# logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=500, detail="Pipeline failed") + +# logger.info("Pipeline completed successfully") +# response = AnalysisResponse( +# message="Pipeline completed successfully" +# ) +# logger.info(f"Full pipeline endpoint completed in {time.time() - start_time:.2f} seconds") +# return response +# except Exception as e: +# logger.error(f"Error in pipeline: {str(e)}") +# logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=500, detail=str(e)) + + + +# @app.post("/load-data") +# async def load_data(source: str = "db", file: Optional[UploadFile] = File(None)): +# """ +# Load data from either database or CSV file. + +# Args: +# source (str): Source of data ('db' or 'csv') +# file (UploadFile, optional): CSV file to load (required if source is 'csv') + +# Returns: +# dict: Status of data loading +# """ +# start_time = time.time() +# try: +# if source not in ['db', 'csv']: +# logger.error(f"Invalid source: {source}") +# logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'") + +# if source == 'csv' and not file: +# logger.error("No file provided for CSV source") +# logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=400, detail="File must be provided when loading from CSV") + +# if source == 'csv': +# # Save uploaded file temporarily +# with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file: +# content = await file.read() +# temp_file.write(content) +# temp_file_path = temp_file.name + +# try: +# success = pipeline.load_data(source='csv', file_path=temp_file_path) +# finally: +# # Clean up temporary file +# os.unlink(temp_file_path) +# else: +# success = pipeline.load_data(source='db') + +# if not success: +# logger.error("Failed to load data") +# logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=500, detail="Failed to load data") + +# response = { +# "status": "success", +# "message": f"Successfully loaded {len(pipeline.df)} rows of data", +# "columns": pipeline.df.columns.tolist(), +# "row_count": len(pipeline.df) +# } +# logger.info(f"Load data endpoint completed in {time.time() - start_time:.2f} seconds") +# return response +# except Exception as e: +# logger.error(f"Error loading data: {str(e)}") +# logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=500, detail=str(e)) + +# async def get_file_if_csv(source: str, file: Optional[UploadFile] = File(None)): +# """Dependency to handle file upload only when source is csv.""" +# if source == 'csv' and not file: +# raise HTTPException(status_code=400, detail="File must be provided when loading from CSV") +# return file + + + +# @app.post("/run/streaming-pipeline", response_model=List[BatchResponse]) +# async def run_streaming_pipeline( +# source: str = "db", +# batch_size: int = 10000, +# file: Optional[Union[UploadFile, str]] = File(None) +# ): +# """ +# Run the complete salary analytics pipeline in batches. + +# Args: +# source (str): Source of data ('db' or 'csv') +# batch_size (int): Number of rows to process in each batch +# file (UploadFile, optional): CSV file to load (required if source is 'csv') + +# Returns: +# List[BatchResponse]: List of responses for each batch processed +# """ +# start_time = time.time() +# try: +# if source not in ['db', 'csv']: +# logger.error(f"Invalid source: {source}") +# logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'") + +# if source == 'csv' and not file: +# logger.error("No file provided for CSV source") +# logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=400, detail="File must be provided when loading from CSV") + +# # Initialize data loader +# data_loader = DataLoader() +# data_loader.chunk_size = batch_size + +# # Create output directory for batch results +# timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") +# batch_output_dir = os.path.join(os.path.dirname(OUTPUT_PATHS['final_table']), f"batch_results_{timestamp}") +# os.makedirs(batch_output_dir, exist_ok=True) + +# # Initialize database operations +# if not data_loader.connect(): +# logger.error("Failed to connect to database") +# logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=500, detail="Failed to connect to database") + +# db_ops = DatabaseOperations(data_loader.engine) +# if not db_ops.create_batch_results_table(): +# logger.error("Failed to create batch results table") +# logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=500, detail="Failed to create batch results table") + +# responses = [] +# batch_number = 0 +# batch_start_time = time.time() + +# def preprocess_chunk(chunk): +# """Preprocess a chunk of data with the same logic as DataLoader.""" +# # Convert dates +# chunk['trx_start_date'] = pd.to_datetime(chunk['trx_start_date']) +# chunk['trx_end_date'] = pd.to_datetime(chunk['trx_end_date']) + +# # Rename columns +# chunk = chunk.rename(columns={ +# 'd1': 'trx_type', +# 'd2': 'trx_subtype', +# 'd3': 'initiated_by', +# 'd4': 'customer_id' +# }) + +# chunk = chunk.dropna() + +# return chunk + +# if source == 'csv': +# # Save uploaded file temporarily +# with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file: +# content = await file.read() +# temp_file.write(content) +# temp_file_path = temp_file.name + +# try: +# # Process CSV in chunks +# for chunk in pd.read_csv(temp_file_path, chunksize=batch_size): +# batch_number += 1 +# logger.info(f"Processing batch {batch_number}") + +# # Preprocess chunk +# chunk = preprocess_chunk(chunk) + +# # Run pipeline on chunk +# pipeline = SalaryAnalyticsPipeline() +# pipeline.df = chunk + +# try: +# batch_start_time = time.time() +# # Run analyses +# pipeline.run_keyword_analysis() +# pipeline.run_consistent_amount_analysis() +# pipeline.run_transaction_type_analysis() + +# # Generate reports +# reports = pipeline.generate_salary_earner_reports() + +# # Add batch metadata to results +# results_df = reports['final_table'].copy() +# results_df['batch_number'] = batch_number +# results_df['total_batches'] = -1 # Unknown for CSV +# results_df['processed_at'] = datetime.now() + +# # Save batch results to CSV +# batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv") +# results_df.to_csv(batch_results_path, index=False) + +# # Save to database +# db_ops.save_batch_to_db( +# batch_number=batch_number, +# total_batches=-1, # Unknown for CSV +# results_df=results_df, +# status="success" +# ) + +# logger.info(f"Batch {batch_number} processed in {time.time() - batch_start_time:.2f} seconds") + +# responses.append(BatchResponse( +# batch_number=batch_number, +# total_batches=-1, # Unknown for CSV +# processed_rows=len(chunk), +# results_path=batch_results_path, +# message=f"Successfully processed batch {batch_number}" +# )) +# except Exception as e: +# error_message = str(e) +# logger.error(f"Error processing batch {batch_number}: {error_message}") + +# # Save error to database +# db_ops.save_batch_to_db( +# batch_number=batch_number, +# total_batches=-1, +# results_df=pd.DataFrame(), # Empty DataFrame for error case +# status="error" +# ) + +# responses.append(BatchResponse( +# batch_number=batch_number, +# total_batches=-1, +# processed_rows=len(chunk), +# results_path="", +# message=f"Error processing batch {batch_number}: {error_message}" +# )) +# finally: +# # Clean up temporary file +# os.unlink(temp_file_path) +# else: +# # Process database in chunks +# if not data_loader.connect(): +# raise HTTPException(status_code=500, detail="Failed to connect to database") + +# # Get total row count +# with data_loader.engine.connect() as conn: +# count_query = text(f"SELECT COUNT(*) FROM {TABLE_NAME}") +# total_rows = conn.execute(count_query).scalar() + +# total_batches = (total_rows + batch_size - 1) // batch_size +# offset = 0 + +# while offset < total_rows: +# batch_number += 1 +# logger.info(f"Processing batch {batch_number} of {total_batches}") + +# # Load chunk from database +# query = f"SELECT * FROM {TABLE_NAME} LIMIT {batch_size} OFFSET {offset}" +# chunk = pd.read_sql(query, data_loader.engine) + +# if chunk.empty: +# break + +# # Preprocess chunk +# chunk = preprocess_chunk(chunk) + +# # Run pipeline on chunk +# pipeline = SalaryAnalyticsPipeline() +# pipeline.df = chunk + +# try: +# batch_start_time = time.time() +# # Run analyses +# pipeline.run_keyword_analysis() +# pipeline.run_consistent_amount_analysis() +# pipeline.run_transaction_type_analysis() + +# # Generate reports +# reports = pipeline.generate_salary_earner_reports() + +# # Add batch metadata to results +# results_df = reports['final_table'].copy() +# results_df['batch_number'] = batch_number +# results_df['total_batches'] = total_batches +# results_df['processed_at'] = datetime.now() + +# # Save batch results to CSV +# batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv") +# results_df.to_csv(batch_results_path, index=False) + +# # Save to database +# db_ops.save_batch_to_db( +# batch_number=batch_number, +# total_batches=total_batches, +# results_df=results_df, +# status="success" +# ) + +# logger.info(f"Batch {batch_number} of {total_batches} processed in {time.time() - batch_start_time:.2f} seconds") + +# responses.append(BatchResponse( +# batch_number=batch_number, +# total_batches=total_batches, +# processed_rows=len(chunk), +# results_path=batch_results_path, +# message=f"Successfully processed batch {batch_number} of {total_batches}" +# )) +# except Exception as e: +# error_message = str(e) +# logger.error(f"Error processing batch {batch_number}: {error_message}") + +# # Save error to database +# db_ops.save_batch_to_db( +# batch_number=batch_number, +# total_batches=total_batches, +# results_df=pd.DataFrame(), # Empty DataFrame for error case +# status="error" +# ) + +# responses.append(BatchResponse( +# batch_number=batch_number, +# total_batches=total_batches, +# processed_rows=len(chunk), +# results_path="", +# message=f"Error processing batch {batch_number}: {error_message}" +# )) + +# offset += batch_size + +# logger.info(f"Streaming pipeline endpoint completed in {time.time() - start_time:.2f} seconds") +# return responses +# except Exception as e: +# logger.error(f"Error in streaming pipeline: {str(e)}") +# logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") +# raise HTTPException(status_code=500, detail=str(e)) + diff --git a/app/analytics/commands/commands.py b/app/api/commands/commands.py similarity index 100% rename from app/analytics/commands/commands.py rename to app/api/commands/commands.py diff --git a/app/analytics/helpers/response_helpers.py b/app/salary_analytics/helpers/response_helpers.py similarity index 100% rename from app/analytics/helpers/response_helpers.py rename to app/salary_analytics/helpers/response_helpers.py diff --git a/app/analytics/integrations/rac_check.py b/app/salary_analytics/integrations/rac_check.py similarity index 100% rename from app/analytics/integrations/rac_check.py rename to app/salary_analytics/integrations/rac_check.py diff --git a/app/analytics/integrations/salary_detect.py b/app/salary_analytics/integrations/salary_detect.py similarity index 92% rename from app/analytics/integrations/salary_detect.py rename to app/salary_analytics/integrations/salary_detect.py index 684abcb..441dd02 100644 --- a/app/analytics/integrations/salary_detect.py +++ b/app/salary_analytics/integrations/salary_detect.py @@ -1,7 +1,7 @@ import time import threading import requests -from ...config import SALARY_DETECT_URL, SALARY_DETECT_HEADERS, get_random_salary_payload +from app.config import SALARY_DETECT_URL, SALARY_DETECT_HEADERS, get_random_salary_payload from app.utils.logger import logger class SalaryDetect: diff --git a/app/analytics/middlewares/middleware.py b/app/salary_analytics/middlewares/middleware.py similarity index 100% rename from app/analytics/middlewares/middleware.py rename to app/salary_analytics/middlewares/middleware.py diff --git a/app/salary_analytics/routes/analysis.py b/app/salary_analytics/routes/analysis.py new file mode 100644 index 0000000..e6c6688 --- /dev/null +++ b/app/salary_analytics/routes/analysis.py @@ -0,0 +1,75 @@ +from fastapi import APIRouter, HTTPException +from app.salary_analytics.helpers.response_helpers import AnalysisResponse +from app.salary_analytics.helpers.data_checks import check_data_loaded +from app.utils.logger import logger +import time +from app.salary_analytics.core.state import state + +router = APIRouter() + + +@router.post("/keyword", response_model=AnalysisResponse) +async def analyze_keyword(): + """Run keyword-based salary transaction analysis.""" + start_time = time.time() + try: + check_data_loaded() + logger.info("Starting keyword analysis...") + data = state.pipeline.run_keyword_analysis() + logger.info(f"Keyword analysis completed. Found {len(data)} matches") + response = AnalysisResponse( + message="Keyword analysis completed successfully", + data={"count": len(data)} + ) + logger.info(f"Keyword analysis endpoint completed in {time.time() - start_time:.2f} seconds") + return response + except Exception as e: + logger.error(f"Error in keyword analysis: {str(e)}") + logger.info(f"Keyword analysis endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=500, detail=str(e)) + + + +@router.post("/consistent-amount", response_model=AnalysisResponse) +async def analyze_consistent_amount(): + """Run consistent amount transaction analysis.""" + start_time = time.time() + try: + check_data_loaded() + logger.info("Starting consistent amount analysis...") + data = state.pipeline.run_consistent_amount_analysis() + logger.info(f"Consistent amount analysis completed. Found {len(data)} matches") + response = AnalysisResponse( + message="Consistent amount analysis completed successfully", + data={"count": len(data)} + ) + logger.info(f"Consistent amount analysis endpoint completed in {time.time() - start_time:.2f} seconds") + return response + except Exception as e: + logger.error(f"Error in consistent amount analysis: {str(e)}") + logger.info(f"Consistent amount analysis endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=500, detail=str(e)) + + + + +@router.post("/transaction-type", response_model=AnalysisResponse) +async def analyze_transaction_type(): + """Run transaction type analysis.""" + start_time = time.time() + try: + check_data_loaded() + logger.info("Starting transaction type analysis...") + data = state.pipeline.run_transaction_type_analysis() + logger.info(f"Transaction type analysis completed. Found {len(data)} matches") + response = AnalysisResponse( + message="Transaction type analysis completed successfully", + data={"count": len(data)} + ) + logger.info(f"Transaction type analysis endpoint completed in {time.time() - start_time:.2f} seconds") + return response + except Exception as e: + logger.error(f"Error in transaction type analysis: {str(e)}") + logger.info(f"Transaction type analysis endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=500, detail=str(e)) + diff --git a/app/salary_analytics/routes/load.py b/app/salary_analytics/routes/load.py new file mode 100644 index 0000000..ac6802c --- /dev/null +++ b/app/salary_analytics/routes/load.py @@ -0,0 +1,74 @@ +from fastapi import APIRouter, HTTPException, UploadFile, File +from app.salary_analytics.core.state import state +from app.utils.logger import logger +import tempfile, os, time +from typing import Optional + +router = APIRouter() + + + +@router.post("/load-data") +async def load_data(source: str = "db", file: Optional[UploadFile] = File(None)): + """ + Load data from either database or CSV file. + + Args: + source (str): Source of data ('db' or 'csv') + file (UploadFile, optional): CSV file to load (required if source is 'csv') + + Returns: + dict: Status of data loading + """ + start_time = time.time() + try: + if source not in ['db', 'csv']: + logger.error(f"Invalid source: {source}") + logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'") + + if source == 'csv' and not file: + logger.error("No file provided for CSV source") + logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=400, detail="File must be provided when loading from CSV") + + if source == 'csv': + # Save uploaded file temporarily + with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file: + content = await file.read() + temp_file.write(content) + temp_file_path = temp_file.name + + try: + success = state.pipeline.load_data(source='csv', file_path=temp_file_path) + finally: + # Clean up temporary file + os.unlink(temp_file_path) + else: + success = state.pipeline.load_data(source='db') + + if not success: + logger.error("Failed to load data") + logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=500, detail="Failed to load data") + + response = { + "status": "success", + "message": f"Successfully loaded {len(state.pipeline.df)} rows of data", + "columns": state.pipeline.df.columns.tolist(), + "row_count": len(state.pipeline.df) + } + logger.info(f"Load data endpoint completed in {time.time() - start_time:.2f} seconds") + return response + except Exception as e: + logger.error(f"Error loading data: {str(e)}") + logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/load-data-with-file") +async def get_file_if_csv(source: str, file: Optional[UploadFile] = File(None)): + """Dependency to handle file upload only when source is csv.""" + if source == 'csv' and not file: + raise HTTPException(status_code=400, detail="File must be provided when loading from CSV") + return file diff --git a/app/salary_analytics/routes/pipeline.py b/app/salary_analytics/routes/pipeline.py new file mode 100644 index 0000000..db08518 --- /dev/null +++ b/app/salary_analytics/routes/pipeline.py @@ -0,0 +1,292 @@ +from fastapi import APIRouter, HTTPException +from app.salary_analytics.services.main import SalaryAnalyticsPipeline +from app.salary_analytics.helpers.response_helpers import AnalysisResponse, BatchResponse +from app.salary_analytics.helpers.data_checks import check_data_loaded +from app.salary_analytics.services.data_loader import DataLoader +from app.salary_analytics.core.state import state +from app.models.db_operations import DatabaseOperations +from app.config import OUTPUT_PATHS, TABLE_NAME +from app.utils.logger import logger +from typing import Optional, List, Union +from sqlalchemy import text +from datetime import datetime +import pandas as pd, os, tempfile, time +from typing import Optional, Union +from fastapi import UploadFile, File + +router = APIRouter() + + +@router.post("/run/pipeline", response_model=AnalysisResponse) +async def run_full_pipeline(): + """Run the complete salary analytics pipeline.""" + start_time = time.time() + try: + check_data_loaded() + logger.info("Starting full pipeline...") + success = state.pipeline.run_full_pipeline() + if not success: + logger.error("Pipeline failed") + logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=500, detail="Pipeline failed") + + logger.info("Pipeline completed successfully") + response = AnalysisResponse( + message="Pipeline completed successfully" + ) + logger.info(f"Full pipeline endpoint completed in {time.time() - start_time:.2f} seconds") + return response + except Exception as e: + logger.error(f"Error in pipeline: {str(e)}") + logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/run/streaming-pipeline", response_model=List[BatchResponse]) +async def run_streaming_pipeline( + source: str = "db", + batch_size: int = 10000, + file: Optional[Union[UploadFile, str]] = File(None) +): + """ + Run the complete salary analytics pipeline in batches. + + Args: + source (str): Source of data ('db' or 'csv') + batch_size (int): Number of rows to process in each batch + file (UploadFile, optional): CSV file to load (required if source is 'csv') + + Returns: + List[BatchResponse]: List of responses for each batch processed + """ + start_time = time.time() + try: + if source not in ['db', 'csv']: + logger.error(f"Invalid source: {source}") + logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'") + + if source == 'csv' and not file: + logger.error("No file provided for CSV source") + logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=400, detail="File must be provided when loading from CSV") + + # Initialize data loader + state.data_loader = DataLoader() + state.data_loader.chunk_size = batch_size + + # Create output directory for batch results + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + batch_output_dir = os.path.join(os.path.dirname(OUTPUT_PATHS['final_table']), f"batch_results_{timestamp}") + os.makedirs(batch_output_dir, exist_ok=True) + + # Initialize database operations + if not state.data_loader.connect(): + logger.error("Failed to connect to database") + logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=500, detail="Failed to connect to database") + + db_ops = DatabaseOperations(state.data_loader.engine) + if not db_ops.create_batch_results_table(): + logger.error("Failed to create batch results table") + logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=500, detail="Failed to create batch results table") + + responses = [] + batch_number = 0 + batch_start_time = time.time() + + def preprocess_chunk(chunk): + """Preprocess a chunk of data with the same logic as DataLoader.""" + # Convert dates + chunk['trx_start_date'] = pd.to_datetime(chunk['trx_start_date']) + chunk['trx_end_date'] = pd.to_datetime(chunk['trx_end_date']) + + # Rename columns + chunk = chunk.rename(columns={ + 'd1': 'trx_type', + 'd2': 'trx_subtype', + 'd3': 'initiated_by', + 'd4': 'customer_id' + }) + + chunk = chunk.dropna() + + return chunk + + if source == 'csv': + # Save uploaded file temporarily + with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file: + content = await file.read() + temp_file.write(content) + temp_file_path = temp_file.name + + try: + # Process CSV in chunks + for chunk in pd.read_csv(temp_file_path, chunksize=batch_size): + batch_number += 1 + logger.info(f"Processing batch {batch_number}") + + # Preprocess chunk + chunk = preprocess_chunk(chunk) + + # Run pipeline on chunk + state.pipeline = SalaryAnalyticsPipeline() + state.pipeline.df = chunk + + try: + batch_start_time = time.time() + # Run analyses + state.pipeline.run_keyword_analysis() + state.pipeline.run_consistent_amount_analysis() + state.pipeline.run_transaction_type_analysis() + + # Generate reports + reports = state.pipeline.generate_salary_earner_reports() + + # Add batch metadata to results + results_df = reports['final_table'].copy() + results_df['batch_number'] = batch_number + results_df['total_batches'] = -1 # Unknown for CSV + results_df['processed_at'] = datetime.now() + + # Save batch results to CSV + batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv") + results_df.to_csv(batch_results_path, index=False) + + # Save to database + db_ops.save_batch_to_db( + batch_number=batch_number, + total_batches=-1, # Unknown for CSV + results_df=results_df, + status="success" + ) + + logger.info(f"Batch {batch_number} processed in {time.time() - batch_start_time:.2f} seconds") + + responses.append(BatchResponse( + batch_number=batch_number, + total_batches=-1, # Unknown for CSV + processed_rows=len(chunk), + results_path=batch_results_path, + message=f"Successfully processed batch {batch_number}" + )) + except Exception as e: + error_message = str(e) + logger.error(f"Error processing batch {batch_number}: {error_message}") + + # Save error to database + db_ops.save_batch_to_db( + batch_number=batch_number, + total_batches=-1, + results_df=pd.DataFrame(), # Empty DataFrame for error case + status="error" + ) + + responses.append(BatchResponse( + batch_number=batch_number, + total_batches=-1, + processed_rows=len(chunk), + results_path="", + message=f"Error processing batch {batch_number}: {error_message}" + )) + finally: + # Clean up temporary file + os.unlink(temp_file_path) + else: + # Process database in chunks + if not state.data_loader.connect(): + raise HTTPException(status_code=500, detail="Failed to connect to database") + + # Get total row count + with state.data_loader.engine.connect() as conn: + count_query = text(f"SELECT COUNT(*) FROM {TABLE_NAME}") + total_rows = conn.execute(count_query).scalar() + + total_batches = (total_rows + batch_size - 1) // batch_size + offset = 0 + + while offset < total_rows: + batch_number += 1 + logger.info(f"Processing batch {batch_number} of {total_batches}") + + # Load chunk from database + query = f"SELECT * FROM {TABLE_NAME} LIMIT {batch_size} OFFSET {offset}" + chunk = pd.read_sql(query, state.data_loader.engine) + + if chunk.empty: + break + + # Preprocess chunk + chunk = preprocess_chunk(chunk) + + # Run pipeline on chunk + pipeline = SalaryAnalyticsPipeline() + state.pipeline.df = chunk + + try: + batch_start_time = time.time() + # Run analyses + state.pipeline.run_keyword_analysis() + state.pipeline.run_consistent_amount_analysis() + state.pipeline.run_transaction_type_analysis() + + # Generate reports + reports = state.pipeline.generate_salary_earner_reports() + + # Add batch metadata to results + results_df = reports['final_table'].copy() + results_df['batch_number'] = batch_number + results_df['total_batches'] = total_batches + results_df['processed_at'] = datetime.now() + + # Save batch results to CSV + batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv") + results_df.to_csv(batch_results_path, index=False) + + # Save to database + db_ops.save_batch_to_db( + batch_number=batch_number, + total_batches=total_batches, + results_df=results_df, + status="success" + ) + + logger.info(f"Batch {batch_number} of {total_batches} processed in {time.time() - batch_start_time:.2f} seconds") + + responses.append(BatchResponse( + batch_number=batch_number, + total_batches=total_batches, + processed_rows=len(chunk), + results_path=batch_results_path, + message=f"Successfully processed batch {batch_number} of {total_batches}" + )) + except Exception as e: + error_message = str(e) + logger.error(f"Error processing batch {batch_number}: {error_message}") + + # Save error to database + db_ops.save_batch_to_db( + batch_number=batch_number, + total_batches=total_batches, + results_df=pd.DataFrame(), # Empty DataFrame for error case + status="error" + ) + + responses.append(BatchResponse( + batch_number=batch_number, + total_batches=total_batches, + processed_rows=len(chunk), + results_path="", + message=f"Error processing batch {batch_number}: {error_message}" + )) + + offset += batch_size + + logger.info(f"Streaming pipeline endpoint completed in {time.time() - start_time:.2f} seconds") + return responses + except Exception as e: + logger.error(f"Error in streaming pipeline: {str(e)}") + logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=500, detail=str(e)) + \ No newline at end of file diff --git a/app/salary_analytics/routes/reports.py b/app/salary_analytics/routes/reports.py new file mode 100644 index 0000000..24f9477 --- /dev/null +++ b/app/salary_analytics/routes/reports.py @@ -0,0 +1,78 @@ +from fastapi import APIRouter, HTTPException, BackgroundTasks +from fastapi.responses import FileResponse +from app.salary_analytics.helpers.response_helpers import AnalysisResponse +from app.salary_analytics.helpers.data_checks import check_data_loaded +from app.salary_analytics.core.state import state +from app.config import OUTPUT_PATHS +from app.utils.logger import logger +import os, time + +router = APIRouter() + + +@router.post("/generate/reports", response_model=AnalysisResponse) +async def generate_reports(background_tasks: BackgroundTasks): + """Generate salary earner reports.""" + start_time = time.time() + try: + check_data_loaded() + logger.info("Starting report generation...") + reports = state.pipeline.generate_salary_earner_reports() + logger.info("Reports generated successfully") + response = AnalysisResponse( + message="Reports generated successfully", + data={ + "verified_salary_earners": len(reports['final_table']), + "likely_salary_earners": len(reports['likely_salary_earner']), + "high_earners": reports['total_high_earners'] + } + ) + logger.info(f"Report generation endpoint completed in {time.time() - start_time:.2f} seconds") + return response + except Exception as e: + logger.error(f"Error in report generation: {str(e)}") + logger.info(f"Report generation endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=500, detail=str(e)) + + + +@router.get("/download/{report_type}") +async def download_report(report_type: str): + """Download generated reports.""" + start_time = time.time() + try: + check_data_loaded() + logger.info(f"Attempting to download report: {report_type}") + file_paths = { + "high_earners": OUTPUT_PATHS["high_earner_details"], + "likely_earners": OUTPUT_PATHS["likely_salary_earner"], + "final_table": OUTPUT_PATHS["final_table"], + "consistent_plot": OUTPUT_PATHS["consistent_earners_plot"], + "inconsistent_plot": OUTPUT_PATHS["inconsistent_earners_plot"], + "hypothesis_plot": OUTPUT_PATHS["hypothesis_overlap_plot"] + } + + if report_type not in file_paths: + logger.error(f"Report type not found: {report_type}") + logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=404, detail="Report type not found") + + file_path = file_paths[report_type] + if not os.path.exists(file_path): + logger.error(f"Report file not found: {file_path}") + logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=404, detail="Report file not found") + + logger.info(f"Successfully found report file: {file_path}") + response = FileResponse( + path=file_path, + filename=os.path.basename(file_path), + media_type="application/octet-stream" + ) + logger.info(f"Download endpoint completed in {time.time() - start_time:.2f} seconds") + return response + except Exception as e: + logger.error(f"Error downloading report: {str(e)}") + logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds") + raise HTTPException(status_code=500, detail=str(e)) + diff --git a/app/analytics/services/__init__.py b/app/salary_analytics/services/__init__.py similarity index 100% rename from app/analytics/services/__init__.py rename to app/salary_analytics/services/__init__.py diff --git a/app/analytics/services/consistent_amount_analyzer.py b/app/salary_analytics/services/consistent_amount_analyzer.py similarity index 97% rename from app/analytics/services/consistent_amount_analyzer.py rename to app/salary_analytics/services/consistent_amount_analyzer.py index 3b74afc..119adae 100644 --- a/app/analytics/services/consistent_amount_analyzer.py +++ b/app/salary_analytics/services/consistent_amount_analyzer.py @@ -3,7 +3,7 @@ Consistent amount transaction analysis module. """ import pandas as pd -from .config import MODEL_CONFIG +from app.config import MODEL_CONFIG class ConsistentAmountAnalyzer: def __init__(self, df): diff --git a/app/analytics/services/data_loader.py b/app/salary_analytics/services/data_loader.py similarity index 98% rename from app/analytics/services/data_loader.py rename to app/salary_analytics/services/data_loader.py index e11b0e4..3aadd99 100644 --- a/app/analytics/services/data_loader.py +++ b/app/salary_analytics/services/data_loader.py @@ -7,7 +7,7 @@ import pandas as pd from datetime import datetime import logging import os -from .config import DB_CONFIG, TABLE_NAME +from app.config import DB_CONFIG, TABLE_NAME from app.utils.logger import logger class DataLoader: diff --git a/app/analytics/services/keyword_analyzer.py b/app/salary_analytics/services/keyword_analyzer.py similarity index 96% rename from app/analytics/services/keyword_analyzer.py rename to app/salary_analytics/services/keyword_analyzer.py index 5c45b0f..b9db479 100644 --- a/app/analytics/services/keyword_analyzer.py +++ b/app/salary_analytics/services/keyword_analyzer.py @@ -4,7 +4,7 @@ Keyword-based salary transaction analysis module. import re import pandas as pd -from .config import SALARY_KEYWORDS +from app.config import SALARY_KEYWORDS class KeywordAnalyzer: def __init__(self, df): diff --git a/app/analytics/services/main.py b/app/salary_analytics/services/main.py similarity index 100% rename from app/analytics/services/main.py rename to app/salary_analytics/services/main.py diff --git a/app/analytics/services/salary_earner_analyzer.py b/app/salary_analytics/services/salary_earner_analyzer.py similarity index 99% rename from app/analytics/services/salary_earner_analyzer.py rename to app/salary_analytics/services/salary_earner_analyzer.py index f17d4ce..7302cbc 100644 --- a/app/analytics/services/salary_earner_analyzer.py +++ b/app/salary_analytics/services/salary_earner_analyzer.py @@ -6,7 +6,7 @@ import pandas as pd import matplotlib.pyplot as plt from matplotlib_venn import venn3 from datetime import datetime, timedelta -from .config import MODEL_CONFIG, OUTPUT_PATHS +from app.config import MODEL_CONFIG, OUTPUT_PATHS from app.utils.logger import logger class SalaryEarnerAnalyzer: diff --git a/app/analytics/services/salary_predictor.py b/app/salary_analytics/services/salary_predictor.py similarity index 99% rename from app/analytics/services/salary_predictor.py rename to app/salary_analytics/services/salary_predictor.py index b74dfd5..19fc531 100644 --- a/app/analytics/services/salary_predictor.py +++ b/app/salary_analytics/services/salary_predictor.py @@ -9,7 +9,7 @@ from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score from joblib import dump -from .config import OUTPUT_PATHS +from app.config import OUTPUT_PATHS class SalaryPredictor: def __init__(self, df): diff --git a/app/analytics/services/transaction_type_analyzer.py b/app/salary_analytics/services/transaction_type_analyzer.py similarity index 93% rename from app/analytics/services/transaction_type_analyzer.py rename to app/salary_analytics/services/transaction_type_analyzer.py index 2d1249c..0597aea 100644 --- a/app/analytics/services/transaction_type_analyzer.py +++ b/app/salary_analytics/services/transaction_type_analyzer.py @@ -3,7 +3,7 @@ Transaction type analysis module. """ import pandas as pd -from .config import MODEL_CONFIG +from app.config import MODEL_CONFIG class TransactionTypeAnalyzer: def __init__(self, df): -- 2.34.1 From 00d89e460f0f55a33b6aaa6c1a8f9fa6a3e517ab Mon Sep 17 00:00:00 2001 From: VivianDee <115420678+VivianDee@users.noreply.github.com> Date: Tue, 9 Sep 2025 11:28:27 +0100 Subject: [PATCH 05/19] [add]: docker config --- Dockerfile | 7 +++++-- app.log | 31 +++++++++++++++++++++++++++++++ app/salary_analytics/__init__.py | 2 -- run.py | 4 ++++ 4 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 app.log create mode 100644 run.py diff --git a/Dockerfile b/Dockerfile index 5b599cb..d07e212 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,9 +15,12 @@ RUN pip install -r requirements.txt RUN mkdir -p output/csv output/plots output/models -ENV FLASK_APP=wsgi.py +# ENV FLASK_APP=wsgi.py + +ENV FLASK_APP=run.py ENV FLASK_RUN_HOST=0.0.0.0 EXPOSE 8000 -CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:8000", "wsgi:wsgi_app"] \ No newline at end of file +# CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:8000", "wsgi:wsgi_app"] +CMD ["uvicorn", "run:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] \ No newline at end of file diff --git a/app.log b/app.log new file mode 100644 index 0000000..5c264fd --- /dev/null +++ b/app.log @@ -0,0 +1,31 @@ +2025-09-09 10:23:31,651 - INFO - generated new fontManager +2025-09-09 10:23:38,494 - INFO - Initializing pipeline... +2025-09-09 10:23:38,496 - INFO - [2025-09-09 10:23:38] Detecting salary... +2025-09-09 10:23:38,497 - INFO - Started autonomous salary detection loop. +2025-09-09 10:23:38,509 - INFO - Server running on hostname: 1c3f3ceb2429 +2025-09-09 10:23:38,511 - INFO - Server IP address: 172.25.0.2 +2025-09-09 10:23:38,523 - INFO - Server is accessible at: +2025-09-09 10:23:38,525 - INFO - - http://localhost:8000 +2025-09-09 10:23:38,527 - INFO - - http://127.0.0.1:8000 +2025-09-09 10:23:38,528 - INFO - - http://172.25.0.2:8000 +2025-09-09 10:23:38,529 - INFO - Pipeline initialized successfully +2025-09-09 10:23:41,368 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-09 10:23:41,369 - INFO - [2025-09-09 10:23:41] Salary detection complete +2025-09-09 10:25:41,371 - INFO - [2025-09-09 10:25:41] Detecting salary... +2025-09-09 10:25:42,098 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-09 10:25:42,100 - INFO - [2025-09-09 10:25:42] Salary detection complete +2025-09-09 10:27:03,741 - INFO - Shutting down Salary Analytics API... diff --git a/app/salary_analytics/__init__.py b/app/salary_analytics/__init__.py index babbabb..62f8ac0 100644 --- a/app/salary_analytics/__init__.py +++ b/app/salary_analytics/__init__.py @@ -36,5 +36,3 @@ def create_app() -> FastAPI: return app - -app = create_app() diff --git a/run.py b/run.py new file mode 100644 index 0000000..c4932c4 --- /dev/null +++ b/run.py @@ -0,0 +1,4 @@ +import os +from app.salary_analytics import create_app + +app = create_app() \ No newline at end of file -- 2.34.1 From ebe40cda198aa39c97c5052bab2fcf5d103914ff Mon Sep 17 00:00:00 2001 From: VivianDee <115420678+VivianDee@users.noreply.github.com> Date: Tue, 9 Sep 2025 12:06:37 +0100 Subject: [PATCH 06/19] [add]: database configuration fix --- Dockerfile | 2 +- app.log | 182 +++++++++++++++ app/api.py | 601 -------------------------------------------------- app/config.py | 31 ++- 4 files changed, 204 insertions(+), 612 deletions(-) delete mode 100644 app/api.py diff --git a/Dockerfile b/Dockerfile index d07e212..c74ecdb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,7 +17,7 @@ RUN mkdir -p output/csv output/plots output/models # ENV FLASK_APP=wsgi.py -ENV FLASK_APP=run.py +ENV FLASK_APP=run.py. ENV FLASK_RUN_HOST=0.0.0.0 EXPOSE 8000 diff --git a/app.log b/app.log index 5c264fd..4282a8a 100644 --- a/app.log +++ b/app.log @@ -29,3 +29,185 @@ 2025-09-09 10:25:42,100 - INFO - [2025-09-09 10:25:42] Salary detection complete 2025-09-09 10:27:03,741 - INFO - Shutting down Salary Analytics API... +2025-09-09 10:29:59,503 - INFO - Initializing pipeline... +2025-09-09 10:29:59,506 - INFO - [2025-09-09 10:29:59] Detecting salary... +2025-09-09 10:29:59,506 - INFO - Started autonomous salary detection loop. +2025-09-09 10:29:59,534 - INFO - Server running on hostname: 1c3f3ceb2429 +2025-09-09 10:29:59,535 - INFO - Server IP address: 172.25.0.2 +2025-09-09 10:29:59,535 - INFO - Server is accessible at: +2025-09-09 10:29:59,536 - INFO - - http://localhost:8000 +2025-09-09 10:29:59,537 - INFO - - http://127.0.0.1:8000 +2025-09-09 10:29:59,539 - INFO - - http://172.25.0.2:8000 +2025-09-09 10:29:59,541 - INFO - Pipeline initialized successfully +2025-09-09 10:30:04,484 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-09 10:30:04,485 - INFO - [2025-09-09 10:30:04] Salary detection complete +2025-09-09 10:30:47,978 - INFO - Shutting down Salary Analytics API... +2025-09-09 10:41:41,451 - INFO - Initializing pipeline... +2025-09-09 10:41:41,456 - INFO - [2025-09-09 10:41:41] Detecting salary... +2025-09-09 10:41:41,457 - INFO - Started autonomous salary detection loop. +2025-09-09 10:41:41,481 - INFO - Server running on hostname: 1c3f3ceb2429 +2025-09-09 10:41:41,485 - INFO - Server IP address: 172.25.0.2 +2025-09-09 10:41:41,486 - INFO - Server is accessible at: +2025-09-09 10:41:41,486 - INFO - - http://localhost:8000 +2025-09-09 10:41:41,488 - INFO - - http://127.0.0.1:8000 +2025-09-09 10:41:41,490 - INFO - - http://172.25.0.2:8000 +2025-09-09 10:41:41,491 - INFO - Pipeline initialized successfully +2025-09-09 10:41:42,431 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-09 10:41:42,432 - INFO - [2025-09-09 10:41:42] Salary detection complete +2025-09-09 10:43:42,431 - INFO - [2025-09-09 10:43:42] Detecting salary... +2025-09-09 10:43:43,092 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-09 10:43:43,093 - INFO - [2025-09-09 10:43:43] Salary detection complete +2025-09-09 10:45:43,093 - INFO - [2025-09-09 10:45:43] Detecting salary... +2025-09-09 10:45:43,818 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-09 10:45:43,819 - INFO - [2025-09-09 10:45:43] Salary detection complete +2025-09-09 10:47:16,454 - INFO - Shutting down Salary Analytics API... +2025-09-09 10:47:30,172 - INFO - Initializing pipeline... +2025-09-09 10:47:30,174 - INFO - [2025-09-09 10:47:30] Detecting salary... +2025-09-09 10:47:30,175 - INFO - Started autonomous salary detection loop. +2025-09-09 10:47:30,185 - INFO - Server running on hostname: 1c3f3ceb2429 +2025-09-09 10:47:30,188 - INFO - Server IP address: 172.25.0.2 +2025-09-09 10:47:30,188 - INFO - Server is accessible at: +2025-09-09 10:47:30,189 - INFO - - http://localhost:8000 +2025-09-09 10:47:30,190 - INFO - - http://127.0.0.1:8000 +2025-09-09 10:47:30,191 - INFO - - http://172.25.0.2:8000 +2025-09-09 10:47:30,191 - INFO - Pipeline initialized successfully +2025-09-09 10:47:31,032 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-09 10:47:31,033 - INFO - [2025-09-09 10:47:31] Salary detection complete +2025-09-09 10:47:38,286 - INFO - Shutting down Salary Analytics API... +2025-09-09 10:47:47,645 - INFO - generated new fontManager +2025-09-09 10:48:19,231 - INFO - generated new fontManager +2025-09-09 10:48:24,426 - INFO - Initializing pipeline... +2025-09-09 10:48:24,429 - INFO - [2025-09-09 10:48:24] Detecting salary... +2025-09-09 10:48:24,429 - INFO - Started autonomous salary detection loop. +2025-09-09 10:48:24,441 - INFO - Server running on hostname: 349f9fd0c78b +2025-09-09 10:48:24,442 - INFO - Server IP address: 172.25.0.2 +2025-09-09 10:48:24,444 - INFO - Server is accessible at: +2025-09-09 10:48:24,445 - INFO - - http://localhost:8000 +2025-09-09 10:48:24,448 - INFO - - http://127.0.0.1:8000 +2025-09-09 10:48:24,450 - INFO - - http://172.25.0.2:8000 +2025-09-09 10:48:24,451 - INFO - Pipeline initialized successfully +2025-09-09 10:48:25,094 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-09 10:48:25,095 - INFO - [2025-09-09 10:48:25] Salary detection complete +2025-09-09 10:49:03,380 - INFO - Shutting down Salary Analytics API... +2025-09-09 10:49:18,345 - INFO - Initializing pipeline... +2025-09-09 10:49:18,346 - INFO - [2025-09-09 10:49:18] Detecting salary... +2025-09-09 10:49:18,347 - INFO - Started autonomous salary detection loop. +2025-09-09 10:49:18,352 - INFO - Server running on hostname: 349f9fd0c78b +2025-09-09 10:49:18,353 - INFO - Server IP address: 172.25.0.2 +2025-09-09 10:49:18,353 - INFO - Server is accessible at: +2025-09-09 10:49:18,354 - INFO - - http://localhost:8000 +2025-09-09 10:49:18,355 - INFO - - http://127.0.0.1:8000 +2025-09-09 10:49:18,365 - INFO - - http://172.25.0.2:8000 +2025-09-09 10:49:18,366 - INFO - Pipeline initialized successfully +2025-09-09 10:50:37,994 - INFO - generated new fontManager +2025-09-09 10:50:45,235 - INFO - Initializing pipeline... +2025-09-09 10:50:45,238 - INFO - [2025-09-09 10:50:45] Detecting salary... +2025-09-09 10:50:45,238 - INFO - Started autonomous salary detection loop. +2025-09-09 10:50:45,244 - INFO - Server running on hostname: 087fb63cb9f0 +2025-09-09 10:50:45,244 - INFO - Server IP address: 172.25.0.2 +2025-09-09 10:50:45,245 - INFO - Server is accessible at: +2025-09-09 10:50:45,245 - INFO - - http://localhost:8000 +2025-09-09 10:50:45,246 - INFO - - http://127.0.0.1:8000 +2025-09-09 10:50:45,247 - INFO - - http://172.25.0.2:8000 +2025-09-09 10:50:45,248 - INFO - Pipeline initialized successfully +2025-09-09 10:50:46,400 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-09 10:50:46,401 - INFO - [2025-09-09 10:50:46] Salary detection complete +2025-09-09 10:51:51,570 - INFO - Shutting down Salary Analytics API... +2025-09-09 11:01:38,522 - INFO - generated new fontManager +2025-09-09 11:01:45,459 - INFO - Initializing pipeline... +2025-09-09 11:01:45,463 - INFO - [2025-09-09 11:01:45] Detecting salary... +2025-09-09 11:01:45,464 - INFO - Started autonomous salary detection loop. +2025-09-09 11:01:45,483 - INFO - Server running on hostname: 5d4fdd4232a7 +2025-09-09 11:01:45,484 - INFO - Server IP address: 172.25.0.2 +2025-09-09 11:01:45,485 - INFO - Server is accessible at: +2025-09-09 11:01:45,491 - INFO - - http://localhost:8000 +2025-09-09 11:01:45,493 - INFO - - http://127.0.0.1:8000 +2025-09-09 11:01:45,495 - INFO - - http://172.25.0.2:8000 +2025-09-09 11:01:45,496 - INFO - Pipeline initialized successfully +2025-09-09 11:02:00,358 - INFO - Shutting down Salary Analytics API... +2025-09-09 11:02:15,204 - INFO - Initializing pipeline... +2025-09-09 11:02:15,208 - INFO - [2025-09-09 11:02:15] Detecting salary... +2025-09-09 11:02:15,208 - INFO - Started autonomous salary detection loop. +2025-09-09 11:02:15,395 - INFO - Server running on hostname: 5d4fdd4232a7 +2025-09-09 11:02:15,397 - INFO - Server IP address: 172.25.0.2 +2025-09-09 11:02:15,415 - INFO - Server is accessible at: +2025-09-09 11:02:15,417 - INFO - - http://localhost:8000 +2025-09-09 11:02:15,417 - INFO - - http://127.0.0.1:8000 +2025-09-09 11:02:15,418 - INFO - - http://172.25.0.2:8000 +2025-09-09 11:02:15,419 - INFO - Pipeline initialized successfully +2025-09-09 11:04:18,780 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 500, response: +
+Internal Server Error
+ + + + +2025-09-09 11:04:18,781 - INFO - [2025-09-09 11:04:18] Salary detection complete +2025-09-09 11:04:41,264 - INFO - Initializing SalaryAnalyticsPipeline +2025-09-09 11:04:41,265 - INFO - Starting data loading process +2025-09-09 11:04:41,265 - INFO - No database connection. Attempting to connect... +2025-09-09 11:04:41,266 - INFO - Attempting to connect to database... +2025-09-09 11:05:42,201 - ERROR - Error connecting to database: (psycopg2.OperationalError) connection to server at "dev-data.simbrellang.net" (209.195.2.27), port 1521 failed: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. + +(Background on this error at: https://sqlalche.me/e/20/e3q8) +2025-09-09 11:05:42,202 - ERROR - Failed to establish database connection +2025-09-09 11:05:42,202 - ERROR - Failed to load data +2025-09-09 11:05:42,203 - ERROR - Failed to load data +2025-09-09 11:05:42,203 - INFO - Load data endpoint failed after 60.94 seconds +2025-09-09 11:05:42,204 - ERROR - Error loading data: 500: Failed to load data +2025-09-09 11:05:42,206 - INFO - Load data endpoint failed after 60.94 seconds +2025-09-09 11:06:18,783 - INFO - [2025-09-09 11:06:18] Detecting salary... diff --git a/app/api.py b/app/api.py deleted file mode 100644 index 3a65527..0000000 --- a/app/api.py +++ /dev/null @@ -1,601 +0,0 @@ -""" -FastAPI application for salary analytics. -""" - -from fastapi import FastAPI, HTTPException, BackgroundTasks, UploadFile, File, Depends -from fastapi.responses import FileResponse -import os -import socket -from typing import Optional, List, Union -import pandas as pd -import tempfile -from datetime import datetime -from sqlalchemy import text -import warnings -import time -from app.salary_analytics.services.main import SalaryAnalyticsPipeline -from app.config import OUTPUT_PATHS, TABLE_NAME -from app.salary_analytics.services.data_loader import DataLoader -from app.salary_analytics.middlewares.middleware import add_middlewares -from app.models.db_operations import DatabaseOperations -from app.salary_analytics.integrations.salary_detect import SalaryDetect -from app.utils.logger import logger -from app.salary_analytics.helpers.response_helpers import AnalysisResponse, BatchResponse - - -# Suppress warnings -warnings.filterwarnings('ignore', category=RuntimeWarning, module='numpy') -pd.options.mode.chained_assignment = None - -app = FastAPI( - title="Salary Analytics API", - description="API for analyzing and predicting salary patterns from transaction data", - version="1.0.0" -) - -# Add CORS middleware -add_middlewares(app) - -# Global pipeline instance -pipeline = SalaryAnalyticsPipeline() - -# Global variables to store loaded data and models -data_loader = None -df = None -salary_predictor = None -salary_earner_analyzer = None - -# salary_detect = SalaryDetect() - - - -# def check_data_loaded(): -# """Check if data is loaded before running analytics.""" -# if pipeline.df is None: -# raise HTTPException( -# status_code=400, -# detail="No data loaded. Please load data first using the /load-data endpoint." -# ) - -# @app.on_event("startup") -# async def startup_event(): -# """Initialize the pipeline on startup.""" -# try: -# logger.info("Initializing pipeline...") - -# # Start autonomous salary detection loop -# salary_detect.start() -# logger.info("Started autonomous salary detection loop.") - -# # Print network information -# hostname = socket.gethostname() -# ip_address = socket.gethostbyname(hostname) -# logger.info(f"Server running on hostname: {hostname}") -# logger.info(f"Server IP address: {ip_address}") -# logger.info(f"Server is accessible at:") -# logger.info(f"- http://localhost:8000") -# logger.info(f"- http://127.0.0.1:8000") -# logger.info(f"- http://{ip_address}:8000") -# logger.info("Pipeline initialized successfully") -# except Exception as e: -# logger.error(f"Error during startup: {str(e)}") -# raise - - - -# @app.get("/") -# async def root(): -# """Root endpoint.""" -# start_time = time.time() -# logger.info("Root endpoint accessed") -# response = {"message": "Welcome to Salary Analytics API"} -# logger.info(f"Root endpoint completed in {time.time() - start_time:.2f} seconds") -# return response - - - -# @app.get("/health") -# async def health_check(): -# """Health check endpoint.""" -# start_time = time.time() -# logger.info("Health check endpoint accessed") -# response = {"status": "healthy"} -# logger.info(f"Health check completed in {time.time() - start_time:.2f} seconds") -# return response - - - -# @app.post("/analyze/keyword", response_model=AnalysisResponse) -# async def analyze_keyword(): -# """Run keyword-based salary transaction analysis.""" -# start_time = time.time() -# try: -# check_data_loaded() -# logger.info("Starting keyword analysis...") -# data = pipeline.run_keyword_analysis() -# logger.info(f"Keyword analysis completed. Found {len(data)} matches") -# response = AnalysisResponse( -# message="Keyword analysis completed successfully", -# data={"count": len(data)} -# ) -# logger.info(f"Keyword analysis endpoint completed in {time.time() - start_time:.2f} seconds") -# return response -# except Exception as e: -# logger.error(f"Error in keyword analysis: {str(e)}") -# logger.info(f"Keyword analysis endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=500, detail=str(e)) - - - -# @app.post("/analyze/consistent-amount", response_model=AnalysisResponse) -# async def analyze_consistent_amount(): -# """Run consistent amount transaction analysis.""" -# start_time = time.time() -# try: -# check_data_loaded() -# logger.info("Starting consistent amount analysis...") -# data = pipeline.run_consistent_amount_analysis() -# logger.info(f"Consistent amount analysis completed. Found {len(data)} matches") -# response = AnalysisResponse( -# message="Consistent amount analysis completed successfully", -# data={"count": len(data)} -# ) -# logger.info(f"Consistent amount analysis endpoint completed in {time.time() - start_time:.2f} seconds") -# return response -# except Exception as e: -# logger.error(f"Error in consistent amount analysis: {str(e)}") -# logger.info(f"Consistent amount analysis endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=500, detail=str(e)) - - - -# @app.post("/analyze/transaction-type", response_model=AnalysisResponse) -# async def analyze_transaction_type(): -# """Run transaction type analysis.""" -# start_time = time.time() -# try: -# check_data_loaded() -# logger.info("Starting transaction type analysis...") -# data = pipeline.run_transaction_type_analysis() -# logger.info(f"Transaction type analysis completed. Found {len(data)} matches") -# response = AnalysisResponse( -# message="Transaction type analysis completed successfully", -# data={"count": len(data)} -# ) -# logger.info(f"Transaction type analysis endpoint completed in {time.time() - start_time:.2f} seconds") -# return response -# except Exception as e: -# logger.error(f"Error in transaction type analysis: {str(e)}") -# logger.info(f"Transaction type analysis endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=500, detail=str(e)) - - - -# @app.post("/generate/reports", response_model=AnalysisResponse) -# async def generate_reports(background_tasks: BackgroundTasks): -# """Generate salary earner reports.""" -# start_time = time.time() -# try: -# check_data_loaded() -# logger.info("Starting report generation...") -# reports = pipeline.generate_salary_earner_reports() -# logger.info("Reports generated successfully") -# response = AnalysisResponse( -# message="Reports generated successfully", -# data={ -# "verified_salary_earners": len(reports['final_table']), -# "likely_salary_earners": len(reports['likely_salary_earner']), -# "high_earners": reports['total_high_earners'] -# } -# ) -# logger.info(f"Report generation endpoint completed in {time.time() - start_time:.2f} seconds") -# return response -# except Exception as e: -# logger.error(f"Error in report generation: {str(e)}") -# logger.info(f"Report generation endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=500, detail=str(e)) - - - -# @app.post("/train/models", response_model=AnalysisResponse) -# async def train_models(): -# """Train salary prediction models.""" -# start_time = time.time() -# try: -# check_data_loaded() -# logger.info("Starting model training...") -# pipeline.train_salary_prediction_models() -# logger.info("Models trained successfully") -# response = AnalysisResponse( -# message="Models trained successfully" -# ) -# logger.info(f"Model training endpoint completed in {time.time() - start_time:.2f} seconds") -# return response -# except Exception as e: -# logger.error(f"Error in model training: {str(e)}") -# logger.info(f"Model training endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=500, detail=str(e)) - - - -# @app.get("/download/{report_type}") -# async def download_report(report_type: str): -# """Download generated reports.""" -# start_time = time.time() -# try: -# check_data_loaded() -# logger.info(f"Attempting to download report: {report_type}") -# file_paths = { -# "high_earners": OUTPUT_PATHS["high_earner_details"], -# "likely_earners": OUTPUT_PATHS["likely_salary_earner"], -# "final_table": OUTPUT_PATHS["final_table"], -# "consistent_plot": OUTPUT_PATHS["consistent_earners_plot"], -# "inconsistent_plot": OUTPUT_PATHS["inconsistent_earners_plot"], -# "hypothesis_plot": OUTPUT_PATHS["hypothesis_overlap_plot"] -# } - -# if report_type not in file_paths: -# logger.error(f"Report type not found: {report_type}") -# logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=404, detail="Report type not found") - -# file_path = file_paths[report_type] -# if not os.path.exists(file_path): -# logger.error(f"Report file not found: {file_path}") -# logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=404, detail="Report file not found") - -# logger.info(f"Successfully found report file: {file_path}") -# response = FileResponse( -# path=file_path, -# filename=os.path.basename(file_path), -# media_type="application/octet-stream" -# ) -# logger.info(f"Download endpoint completed in {time.time() - start_time:.2f} seconds") -# return response -# except Exception as e: -# logger.error(f"Error downloading report: {str(e)}") -# logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=500, detail=str(e)) - - - -# @app.post("/run/pipeline", response_model=AnalysisResponse) -# async def run_full_pipeline(): -# """Run the complete salary analytics pipeline.""" -# start_time = time.time() -# try: -# check_data_loaded() -# logger.info("Starting full pipeline...") -# success = pipeline.run_full_pipeline() -# if not success: -# logger.error("Pipeline failed") -# logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=500, detail="Pipeline failed") - -# logger.info("Pipeline completed successfully") -# response = AnalysisResponse( -# message="Pipeline completed successfully" -# ) -# logger.info(f"Full pipeline endpoint completed in {time.time() - start_time:.2f} seconds") -# return response -# except Exception as e: -# logger.error(f"Error in pipeline: {str(e)}") -# logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=500, detail=str(e)) - - - -# @app.post("/load-data") -# async def load_data(source: str = "db", file: Optional[UploadFile] = File(None)): -# """ -# Load data from either database or CSV file. - -# Args: -# source (str): Source of data ('db' or 'csv') -# file (UploadFile, optional): CSV file to load (required if source is 'csv') - -# Returns: -# dict: Status of data loading -# """ -# start_time = time.time() -# try: -# if source not in ['db', 'csv']: -# logger.error(f"Invalid source: {source}") -# logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'") - -# if source == 'csv' and not file: -# logger.error("No file provided for CSV source") -# logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=400, detail="File must be provided when loading from CSV") - -# if source == 'csv': -# # Save uploaded file temporarily -# with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file: -# content = await file.read() -# temp_file.write(content) -# temp_file_path = temp_file.name - -# try: -# success = pipeline.load_data(source='csv', file_path=temp_file_path) -# finally: -# # Clean up temporary file -# os.unlink(temp_file_path) -# else: -# success = pipeline.load_data(source='db') - -# if not success: -# logger.error("Failed to load data") -# logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=500, detail="Failed to load data") - -# response = { -# "status": "success", -# "message": f"Successfully loaded {len(pipeline.df)} rows of data", -# "columns": pipeline.df.columns.tolist(), -# "row_count": len(pipeline.df) -# } -# logger.info(f"Load data endpoint completed in {time.time() - start_time:.2f} seconds") -# return response -# except Exception as e: -# logger.error(f"Error loading data: {str(e)}") -# logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=500, detail=str(e)) - -# async def get_file_if_csv(source: str, file: Optional[UploadFile] = File(None)): -# """Dependency to handle file upload only when source is csv.""" -# if source == 'csv' and not file: -# raise HTTPException(status_code=400, detail="File must be provided when loading from CSV") -# return file - - - -# @app.post("/run/streaming-pipeline", response_model=List[BatchResponse]) -# async def run_streaming_pipeline( -# source: str = "db", -# batch_size: int = 10000, -# file: Optional[Union[UploadFile, str]] = File(None) -# ): -# """ -# Run the complete salary analytics pipeline in batches. - -# Args: -# source (str): Source of data ('db' or 'csv') -# batch_size (int): Number of rows to process in each batch -# file (UploadFile, optional): CSV file to load (required if source is 'csv') - -# Returns: -# List[BatchResponse]: List of responses for each batch processed -# """ -# start_time = time.time() -# try: -# if source not in ['db', 'csv']: -# logger.error(f"Invalid source: {source}") -# logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'") - -# if source == 'csv' and not file: -# logger.error("No file provided for CSV source") -# logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=400, detail="File must be provided when loading from CSV") - -# # Initialize data loader -# data_loader = DataLoader() -# data_loader.chunk_size = batch_size - -# # Create output directory for batch results -# timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") -# batch_output_dir = os.path.join(os.path.dirname(OUTPUT_PATHS['final_table']), f"batch_results_{timestamp}") -# os.makedirs(batch_output_dir, exist_ok=True) - -# # Initialize database operations -# if not data_loader.connect(): -# logger.error("Failed to connect to database") -# logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=500, detail="Failed to connect to database") - -# db_ops = DatabaseOperations(data_loader.engine) -# if not db_ops.create_batch_results_table(): -# logger.error("Failed to create batch results table") -# logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=500, detail="Failed to create batch results table") - -# responses = [] -# batch_number = 0 -# batch_start_time = time.time() - -# def preprocess_chunk(chunk): -# """Preprocess a chunk of data with the same logic as DataLoader.""" -# # Convert dates -# chunk['trx_start_date'] = pd.to_datetime(chunk['trx_start_date']) -# chunk['trx_end_date'] = pd.to_datetime(chunk['trx_end_date']) - -# # Rename columns -# chunk = chunk.rename(columns={ -# 'd1': 'trx_type', -# 'd2': 'trx_subtype', -# 'd3': 'initiated_by', -# 'd4': 'customer_id' -# }) - -# chunk = chunk.dropna() - -# return chunk - -# if source == 'csv': -# # Save uploaded file temporarily -# with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file: -# content = await file.read() -# temp_file.write(content) -# temp_file_path = temp_file.name - -# try: -# # Process CSV in chunks -# for chunk in pd.read_csv(temp_file_path, chunksize=batch_size): -# batch_number += 1 -# logger.info(f"Processing batch {batch_number}") - -# # Preprocess chunk -# chunk = preprocess_chunk(chunk) - -# # Run pipeline on chunk -# pipeline = SalaryAnalyticsPipeline() -# pipeline.df = chunk - -# try: -# batch_start_time = time.time() -# # Run analyses -# pipeline.run_keyword_analysis() -# pipeline.run_consistent_amount_analysis() -# pipeline.run_transaction_type_analysis() - -# # Generate reports -# reports = pipeline.generate_salary_earner_reports() - -# # Add batch metadata to results -# results_df = reports['final_table'].copy() -# results_df['batch_number'] = batch_number -# results_df['total_batches'] = -1 # Unknown for CSV -# results_df['processed_at'] = datetime.now() - -# # Save batch results to CSV -# batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv") -# results_df.to_csv(batch_results_path, index=False) - -# # Save to database -# db_ops.save_batch_to_db( -# batch_number=batch_number, -# total_batches=-1, # Unknown for CSV -# results_df=results_df, -# status="success" -# ) - -# logger.info(f"Batch {batch_number} processed in {time.time() - batch_start_time:.2f} seconds") - -# responses.append(BatchResponse( -# batch_number=batch_number, -# total_batches=-1, # Unknown for CSV -# processed_rows=len(chunk), -# results_path=batch_results_path, -# message=f"Successfully processed batch {batch_number}" -# )) -# except Exception as e: -# error_message = str(e) -# logger.error(f"Error processing batch {batch_number}: {error_message}") - -# # Save error to database -# db_ops.save_batch_to_db( -# batch_number=batch_number, -# total_batches=-1, -# results_df=pd.DataFrame(), # Empty DataFrame for error case -# status="error" -# ) - -# responses.append(BatchResponse( -# batch_number=batch_number, -# total_batches=-1, -# processed_rows=len(chunk), -# results_path="", -# message=f"Error processing batch {batch_number}: {error_message}" -# )) -# finally: -# # Clean up temporary file -# os.unlink(temp_file_path) -# else: -# # Process database in chunks -# if not data_loader.connect(): -# raise HTTPException(status_code=500, detail="Failed to connect to database") - -# # Get total row count -# with data_loader.engine.connect() as conn: -# count_query = text(f"SELECT COUNT(*) FROM {TABLE_NAME}") -# total_rows = conn.execute(count_query).scalar() - -# total_batches = (total_rows + batch_size - 1) // batch_size -# offset = 0 - -# while offset < total_rows: -# batch_number += 1 -# logger.info(f"Processing batch {batch_number} of {total_batches}") - -# # Load chunk from database -# query = f"SELECT * FROM {TABLE_NAME} LIMIT {batch_size} OFFSET {offset}" -# chunk = pd.read_sql(query, data_loader.engine) - -# if chunk.empty: -# break - -# # Preprocess chunk -# chunk = preprocess_chunk(chunk) - -# # Run pipeline on chunk -# pipeline = SalaryAnalyticsPipeline() -# pipeline.df = chunk - -# try: -# batch_start_time = time.time() -# # Run analyses -# pipeline.run_keyword_analysis() -# pipeline.run_consistent_amount_analysis() -# pipeline.run_transaction_type_analysis() - -# # Generate reports -# reports = pipeline.generate_salary_earner_reports() - -# # Add batch metadata to results -# results_df = reports['final_table'].copy() -# results_df['batch_number'] = batch_number -# results_df['total_batches'] = total_batches -# results_df['processed_at'] = datetime.now() - -# # Save batch results to CSV -# batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv") -# results_df.to_csv(batch_results_path, index=False) - -# # Save to database -# db_ops.save_batch_to_db( -# batch_number=batch_number, -# total_batches=total_batches, -# results_df=results_df, -# status="success" -# ) - -# logger.info(f"Batch {batch_number} of {total_batches} processed in {time.time() - batch_start_time:.2f} seconds") - -# responses.append(BatchResponse( -# batch_number=batch_number, -# total_batches=total_batches, -# processed_rows=len(chunk), -# results_path=batch_results_path, -# message=f"Successfully processed batch {batch_number} of {total_batches}" -# )) -# except Exception as e: -# error_message = str(e) -# logger.error(f"Error processing batch {batch_number}: {error_message}") - -# # Save error to database -# db_ops.save_batch_to_db( -# batch_number=batch_number, -# total_batches=total_batches, -# results_df=pd.DataFrame(), # Empty DataFrame for error case -# status="error" -# ) - -# responses.append(BatchResponse( -# batch_number=batch_number, -# total_batches=total_batches, -# processed_rows=len(chunk), -# results_path="", -# message=f"Error processing batch {batch_number}: {error_message}" -# )) - -# offset += batch_size - -# logger.info(f"Streaming pipeline endpoint completed in {time.time() - start_time:.2f} seconds") -# return responses -# except Exception as e: -# logger.error(f"Error in streaming pipeline: {str(e)}") -# logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds") -# raise HTTPException(status_code=500, detail=str(e)) - diff --git a/app/config.py b/app/config.py index b2f671a..73e206e 100644 --- a/app/config.py +++ b/app/config.py @@ -25,18 +25,29 @@ os.makedirs(MODEL_DIR, exist_ok=True) # Database Configuration DB_CONFIG = { - "user": os.getenv("DB_USER"), - "password": os.getenv("DB_PASSWORD"), - "name": os.getenv("DB_NAME"), - "port": os.getenv("DB_PORT"), - "host": os.getenv("DB_HOST") + "user": os.getenv("DATABASE_USER"), + "password": os.getenv("DATABASE_PASSWORD"), + "name": os.getenv("DATABASE_NAME"), + "port": os.getenv("DATABASE_PORT", 10532), + "host": os.getenv("DATABASE_HOST", "firstadvancedev"), + "sid": os.getenv("DATABASE_SID", "FREE") } + +DNS = f"(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST={DB_CONFIG['host']})(PORT={DB_CONFIG['port']}))(CONNECT_DATA=(SID={DB_CONFIG['sid']})))" + +# Database Connection +SQLALCHEMY_DATABASE_URI_INTERNAL = (f"oracle+oracledb://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DNS}") +SQLALCHEMY_DATABASE_URI = os.getenv("SQLALCHEMY_DATABASE_URI_FULL", SQLALCHEMY_DATABASE_URI_INTERNAL) + +#SQLALCHEMY_DATABASE_URI_FULL = 'oracle+oracledb://FIRSTADVSTG:Pchanged_56789@10.2.110.30:1521/?service_name=firstadv' + # SQLAlchemy Configuration -SQLALCHEMY_DATABASE_URI = ( - f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@" - f"{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}" -) +# SQLALCHEMY_DATABASE_URI = ( +# f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@" +# f"{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}" +# ) + SQLALCHEMY_TRACK_MODIFICATIONS = False # Table Configuration @@ -81,7 +92,7 @@ OUTPUT_PATHS = { } SIMBRELLA_BASE_URL = os.getenv("SIMBRELLA_BASE_URL", "http://127.0.0.1:6337") -SIMBRELLA_ENDPOINT_RAC_CHECKS = os.getenv("SIMBRELLA_ENDPOINT_RAC_CHECKS","api/rac-check") +SIMBRELLA_ENDPOINT_RAC_CHECKS = os.getenv("SIMBRELLA_ENDPOINT_RAC_CHECKS", "api/rac-check") # Salary Detect Endpoint Config SALARY_DETECT_URL = "http://www.simbrellang.net:5000/autocall/analytic-salary-detect" -- 2.34.1 From 239d19fbac7019f9187bceeca56d9768ddbc7af4 Mon Sep 17 00:00:00 2001 From: VivianDee <115420678+VivianDee@users.noreply.github.com> Date: Tue, 9 Sep 2025 12:22:05 +0100 Subject: [PATCH 07/19] [add]: Database config --- app.log | 132 +++++++++++++++++++ app/salary_analytics/services/data_loader.py | 5 +- requirements.txt | 4 + 3 files changed, 138 insertions(+), 3 deletions(-) diff --git a/app.log b/app.log index 4282a8a..8ef3108 100644 --- a/app.log +++ b/app.log @@ -211,3 +211,135 @@ 2025-09-09 11:05:42,204 - ERROR - Error loading data: 500: Failed to load data 2025-09-09 11:05:42,206 - INFO - Load data endpoint failed after 60.94 seconds 2025-09-09 11:06:18,783 - INFO - [2025-09-09 11:06:18] Detecting salary... +2025-09-09 11:13:13,121 - INFO - Initializing pipeline... +2025-09-09 11:13:13,125 - INFO - [2025-09-09 11:13:13] Detecting salary... +2025-09-09 11:13:13,126 - INFO - Started autonomous salary detection loop. +2025-09-09 11:13:13,131 - INFO - Server running on hostname: 5d4fdd4232a7 +2025-09-09 11:13:13,138 - INFO - Server IP address: 172.25.0.2 +2025-09-09 11:13:13,139 - INFO - Server is accessible at: +2025-09-09 11:13:13,150 - INFO - - http://localhost:8000 +2025-09-09 11:13:13,151 - INFO - - http://127.0.0.1:8000 +2025-09-09 11:13:13,155 - INFO - - http://172.25.0.2:8000 +2025-09-09 11:13:13,155 - INFO - Pipeline initialized successfully +2025-09-09 11:13:13,762 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-09 11:13:13,763 - INFO - [2025-09-09 11:13:13] Salary detection complete +2025-09-09 11:15:13,767 - INFO - [2025-09-09 11:15:13] Detecting salary... +2025-09-09 11:15:14,439 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-09 11:15:14,440 - INFO - [2025-09-09 11:15:14] Salary detection complete +2025-09-09 11:15:18,049 - INFO - Shutting down Salary Analytics API... +2025-09-09 11:15:31,532 - INFO - Initializing pipeline... +2025-09-09 11:15:31,535 - INFO - [2025-09-09 11:15:31] Detecting salary... +2025-09-09 11:15:31,535 - INFO - Started autonomous salary detection loop. +2025-09-09 11:15:31,544 - INFO - Server running on hostname: 5d4fdd4232a7 +2025-09-09 11:15:31,545 - INFO - Server IP address: 172.25.0.2 +2025-09-09 11:15:31,546 - INFO - Server is accessible at: +2025-09-09 11:15:31,548 - INFO - - http://localhost:8000 +2025-09-09 11:15:31,548 - INFO - - http://127.0.0.1:8000 +2025-09-09 11:15:31,550 - INFO - - http://172.25.0.2:8000 +2025-09-09 11:15:31,551 - INFO - Pipeline initialized successfully +2025-09-09 11:15:44,793 - INFO - Initializing pipeline... +2025-09-09 11:15:44,795 - INFO - [2025-09-09 11:15:44] Detecting salary... +2025-09-09 11:15:44,795 - INFO - Started autonomous salary detection loop. +2025-09-09 11:15:44,804 - INFO - Server running on hostname: 5d4fdd4232a7 +2025-09-09 11:15:44,805 - INFO - Server IP address: 172.25.0.2 +2025-09-09 11:15:44,805 - INFO - Server is accessible at: +2025-09-09 11:15:44,806 - INFO - - http://localhost:8000 +2025-09-09 11:15:44,807 - INFO - - http://127.0.0.1:8000 +2025-09-09 11:15:44,809 - INFO - - http://172.25.0.2:8000 +2025-09-09 11:15:44,810 - INFO - Pipeline initialized successfully +2025-09-09 11:16:01,481 - INFO - Initializing pipeline... +2025-09-09 11:16:01,485 - INFO - [2025-09-09 11:16:01] Detecting salary... +2025-09-09 11:16:01,486 - INFO - Started autonomous salary detection loop. +2025-09-09 11:16:01,497 - INFO - Server running on hostname: 5d4fdd4232a7 +2025-09-09 11:16:01,499 - INFO - Server IP address: 172.25.0.2 +2025-09-09 11:16:01,500 - INFO - Server is accessible at: +2025-09-09 11:16:01,501 - INFO - - http://localhost:8000 +2025-09-09 11:16:01,504 - INFO - - http://127.0.0.1:8000 +2025-09-09 11:16:01,505 - INFO - - http://172.25.0.2:8000 +2025-09-09 11:16:01,520 - INFO - Pipeline initialized successfully +2025-09-09 11:16:02,407 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-09 11:16:02,416 - INFO - [2025-09-09 11:16:02] Salary detection complete +2025-09-09 11:18:02,428 - INFO - [2025-09-09 11:18:02] Detecting salary... +2025-09-09 11:18:03,770 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-09 11:18:03,770 - INFO - [2025-09-09 11:18:03] Salary detection complete +2025-09-09 11:18:43,964 - INFO - Shutting down Salary Analytics API... +2025-09-09 11:19:08,819 - INFO - generated new fontManager +2025-09-09 11:19:13,397 - INFO - Initializing pipeline... +2025-09-09 11:19:13,399 - INFO - [2025-09-09 11:19:13] Detecting salary... +2025-09-09 11:19:13,399 - INFO - Started autonomous salary detection loop. +2025-09-09 11:19:13,409 - INFO - Server running on hostname: 7c67294712af +2025-09-09 11:19:13,409 - INFO - Server IP address: 172.25.0.2 +2025-09-09 11:19:13,417 - INFO - Server is accessible at: +2025-09-09 11:19:13,421 - INFO - - http://localhost:8000 +2025-09-09 11:19:13,422 - INFO - - http://127.0.0.1:8000 +2025-09-09 11:19:13,423 - INFO - - http://172.25.0.2:8000 +2025-09-09 11:19:13,425 - INFO - Pipeline initialized successfully +2025-09-09 11:19:14,059 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-09 11:19:14,060 - INFO - [2025-09-09 11:19:14] Salary detection complete +2025-09-09 11:19:58,717 - INFO - Initializing SalaryAnalyticsPipeline +2025-09-09 11:19:58,718 - INFO - Starting data loading process +2025-09-09 11:19:58,718 - INFO - No database connection. Attempting to connect... +2025-09-09 11:19:58,719 - INFO - Attempting to connect to database... +2025-09-09 11:19:58,793 - ERROR - Error connecting to database: No module named 'oracledb' +2025-09-09 11:19:58,794 - ERROR - Failed to establish database connection +2025-09-09 11:19:58,794 - ERROR - Failed to load data +2025-09-09 11:19:58,795 - ERROR - Failed to load data +2025-09-09 11:19:58,795 - INFO - Load data endpoint failed after 0.08 seconds +2025-09-09 11:19:58,796 - ERROR - Error loading data: 500: Failed to load data +2025-09-09 11:19:58,797 - INFO - Load data endpoint failed after 0.08 seconds +2025-09-09 11:20:01,415 - INFO - Starting data loading process +2025-09-09 11:20:01,437 - INFO - No database connection. Attempting to connect... +2025-09-09 11:20:01,443 - INFO - Attempting to connect to database... +2025-09-09 11:20:01,445 - ERROR - Error connecting to database: No module named 'oracledb' +2025-09-09 11:20:01,445 - ERROR - Failed to establish database connection +2025-09-09 11:20:01,447 - ERROR - Failed to load data +2025-09-09 11:20:01,448 - ERROR - Failed to load data +2025-09-09 11:20:01,449 - INFO - Load data endpoint failed after 0.03 seconds +2025-09-09 11:20:01,449 - ERROR - Error loading data: 500: Failed to load data +2025-09-09 11:20:01,450 - INFO - Load data endpoint failed after 0.03 seconds +2025-09-09 11:21:14,063 - INFO - [2025-09-09 11:21:14] Detecting salary... +2025-09-09 11:21:14,834 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-09 11:21:14,835 - INFO - [2025-09-09 11:21:14] Salary detection complete +2025-09-09 11:21:19,646 - INFO - Shutting down Salary Analytics API... diff --git a/app/salary_analytics/services/data_loader.py b/app/salary_analytics/services/data_loader.py index 3aadd99..406608c 100644 --- a/app/salary_analytics/services/data_loader.py +++ b/app/salary_analytics/services/data_loader.py @@ -7,7 +7,7 @@ import pandas as pd from datetime import datetime import logging import os -from app.config import DB_CONFIG, TABLE_NAME +from app.config import SQLALCHEMY_DATABASE_URI, TABLE_NAME from app.utils.logger import logger class DataLoader: @@ -20,8 +20,7 @@ class DataLoader: """Establish database connection.""" try: logger.info("Attempting to connect to database...") - DATABASE_URL = f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}" - self.engine = create_engine(DATABASE_URL) + self.engine = create_engine(SQLALCHEMY_DATABASE_URI) with self.engine.connect() as conn: # First check if table exists check_table = text(f"SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = '{TABLE_NAME}')") diff --git a/requirements.txt b/requirements.txt index 4937b16..461ceb2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,8 @@ +# Database and ORM sqlalchemy>=2.0.0 +oracledb>=1.0.0 + + pandas>=1.5.0 numpy>=1.21.0 matplotlib>=3.5.0 -- 2.34.1 From 95732a470dc6a11356d92f46abf1c9347404619e Mon Sep 17 00:00:00 2001 From: VivianDee <115420678+VivianDee@users.noreply.github.com> Date: Wed, 10 Sep 2025 11:36:32 +0100 Subject: [PATCH 08/19] [add]: Final commit before model fix --- app.log | 423 ++++++++++++++++---------------------------------- app/config.py | 14 +- 2 files changed, 141 insertions(+), 296 deletions(-) diff --git a/app.log b/app.log index 8ef3108..d1fa785 100644 --- a/app.log +++ b/app.log @@ -1,15 +1,14 @@ -2025-09-09 10:23:31,651 - INFO - generated new fontManager -2025-09-09 10:23:38,494 - INFO - Initializing pipeline... -2025-09-09 10:23:38,496 - INFO - [2025-09-09 10:23:38] Detecting salary... -2025-09-09 10:23:38,497 - INFO - Started autonomous salary detection loop. -2025-09-09 10:23:38,509 - INFO - Server running on hostname: 1c3f3ceb2429 -2025-09-09 10:23:38,511 - INFO - Server IP address: 172.25.0.2 -2025-09-09 10:23:38,523 - INFO - Server is accessible at: -2025-09-09 10:23:38,525 - INFO - - http://localhost:8000 -2025-09-09 10:23:38,527 - INFO - - http://127.0.0.1:8000 -2025-09-09 10:23:38,528 - INFO - - http://172.25.0.2:8000 -2025-09-09 10:23:38,529 - INFO - Pipeline initialized successfully -2025-09-09 10:23:41,368 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { +2025-09-10 10:21:11,016 - INFO - Initializing pipeline... +2025-09-10 10:21:11,019 - INFO - [2025-09-10 10:21:11] Detecting salary... +2025-09-10 10:21:11,019 - INFO - Started autonomous salary detection loop. +2025-09-10 10:21:11,030 - INFO - Server running on hostname: 22bad35c69c3 +2025-09-10 10:21:11,035 - INFO - Server IP address: 172.25.0.2 +2025-09-10 10:21:11,036 - INFO - Server is accessible at: +2025-09-10 10:21:11,037 - INFO - - http://localhost:8000 +2025-09-10 10:21:11,039 - INFO - - http://127.0.0.1:8000 +2025-09-10 10:21:11,040 - INFO - - http://172.25.0.2:8000 +2025-09-10 10:21:11,041 - INFO - Pipeline initialized successfully +2025-09-10 10:21:11,532 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { "data": [], "error": {}, "message": "AutoCall Add Salary Successful", @@ -17,9 +16,30 @@ "statusCode": 200 } -2025-09-09 10:23:41,369 - INFO - [2025-09-09 10:23:41] Salary detection complete -2025-09-09 10:25:41,371 - INFO - [2025-09-09 10:25:41] Detecting salary... -2025-09-09 10:25:42,098 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { +2025-09-10 10:21:11,533 - INFO - [2025-09-10 10:21:11] Salary detection complete +2025-09-10 10:21:19,451 - INFO - Initializing SalaryAnalyticsPipeline +2025-09-10 10:21:19,452 - INFO - Starting data loading process +2025-09-10 10:21:19,453 - INFO - No database connection. Attempting to connect... +2025-09-10 10:21:19,455 - INFO - Attempting to connect to database... +2025-09-10 10:21:27,271 - INFO - Table customer_account_transaction_hx exists with 5354307 rows +2025-09-10 10:21:27,488 - INFO - Connected successfully to database! +2025-09-10 10:21:27,715 - INFO - Loading data from table: customer_account_transaction_hx +2025-09-10 10:21:28,614 - INFO - Total rows to process: 5354307 +2025-09-10 10:21:28,852 - INFO - Loading chunk starting at offset 0 +2025-09-10 10:21:40,251 - INFO - Loading chunk starting at offset 10000 +2025-09-10 10:21:46,981 - INFO - Loading chunk starting at offset 20000 +2025-09-10 10:21:57,010 - INFO - Loading chunk starting at offset 30000 +2025-09-10 10:22:04,792 - INFO - Loading chunk starting at offset 40000 +2025-09-10 10:22:09,599 - INFO - Loading chunk starting at offset 50000 +2025-09-10 10:22:15,016 - INFO - Loading chunk starting at offset 60000 +2025-09-10 10:22:18,613 - INFO - Loading chunk starting at offset 70000 +2025-09-10 10:22:22,007 - INFO - Loading chunk starting at offset 80000 +2025-09-10 10:22:28,397 - INFO - Loading chunk starting at offset 90000 +2025-09-10 10:22:36,081 - INFO - Loading chunk starting at offset 100000 +2025-09-10 10:22:48,738 - INFO - Loading chunk starting at offset 110000 +2025-09-10 10:23:01,521 - INFO - Loading chunk starting at offset 120000 +2025-09-10 10:23:11,532 - INFO - [2025-09-10 10:23:11] Detecting salary... +2025-09-10 10:23:13,504 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { "data": [], "error": {}, "message": "AutoCall Add Salary Successful", @@ -27,19 +47,19 @@ "statusCode": 200 } -2025-09-09 10:25:42,100 - INFO - [2025-09-09 10:25:42] Salary detection complete -2025-09-09 10:27:03,741 - INFO - Shutting down Salary Analytics API... -2025-09-09 10:29:59,503 - INFO - Initializing pipeline... -2025-09-09 10:29:59,506 - INFO - [2025-09-09 10:29:59] Detecting salary... -2025-09-09 10:29:59,506 - INFO - Started autonomous salary detection loop. -2025-09-09 10:29:59,534 - INFO - Server running on hostname: 1c3f3ceb2429 -2025-09-09 10:29:59,535 - INFO - Server IP address: 172.25.0.2 -2025-09-09 10:29:59,535 - INFO - Server is accessible at: -2025-09-09 10:29:59,536 - INFO - - http://localhost:8000 -2025-09-09 10:29:59,537 - INFO - - http://127.0.0.1:8000 -2025-09-09 10:29:59,539 - INFO - - http://172.25.0.2:8000 -2025-09-09 10:29:59,541 - INFO - Pipeline initialized successfully -2025-09-09 10:30:04,484 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { +2025-09-10 10:23:13,504 - INFO - [2025-09-10 10:23:13] Salary detection complete +2025-09-10 10:23:16,348 - INFO - Loading chunk starting at offset 130000 +2025-09-10 10:23:23,259 - INFO - Loading chunk starting at offset 140000 +2025-09-10 10:23:29,284 - INFO - Loading chunk starting at offset 150000 +2025-09-10 10:23:41,579 - INFO - Loading chunk starting at offset 160000 +2025-09-10 10:23:54,788 - INFO - Loading chunk starting at offset 170000 +2025-09-10 10:24:19,519 - INFO - Loading chunk starting at offset 180000 +2025-09-10 10:24:31,657 - INFO - Loading chunk starting at offset 190000 +2025-09-10 10:24:46,130 - INFO - Loading chunk starting at offset 200000 +2025-09-10 10:24:57,289 - INFO - Loading chunk starting at offset 210000 +2025-09-10 10:25:07,964 - INFO - Loading chunk starting at offset 220000 +2025-09-10 10:25:13,503 - INFO - [2025-09-10 10:25:13] Detecting salary... +2025-09-10 10:25:15,477 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { "data": [], "error": {}, "message": "AutoCall Add Salary Successful", @@ -47,19 +67,21 @@ "statusCode": 200 } -2025-09-09 10:30:04,485 - INFO - [2025-09-09 10:30:04] Salary detection complete -2025-09-09 10:30:47,978 - INFO - Shutting down Salary Analytics API... -2025-09-09 10:41:41,451 - INFO - Initializing pipeline... -2025-09-09 10:41:41,456 - INFO - [2025-09-09 10:41:41] Detecting salary... -2025-09-09 10:41:41,457 - INFO - Started autonomous salary detection loop. -2025-09-09 10:41:41,481 - INFO - Server running on hostname: 1c3f3ceb2429 -2025-09-09 10:41:41,485 - INFO - Server IP address: 172.25.0.2 -2025-09-09 10:41:41,486 - INFO - Server is accessible at: -2025-09-09 10:41:41,486 - INFO - - http://localhost:8000 -2025-09-09 10:41:41,488 - INFO - - http://127.0.0.1:8000 -2025-09-09 10:41:41,490 - INFO - - http://172.25.0.2:8000 -2025-09-09 10:41:41,491 - INFO - Pipeline initialized successfully -2025-09-09 10:41:42,431 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { +2025-09-10 10:25:15,478 - INFO - [2025-09-10 10:25:15] Salary detection complete +2025-09-10 10:25:17,793 - INFO - Loading chunk starting at offset 230000 +2025-09-10 10:25:25,026 - INFO - Loading chunk starting at offset 240000 +2025-09-10 10:25:32,079 - INFO - Loading chunk starting at offset 250000 +2025-09-10 10:25:39,990 - INFO - Loading chunk starting at offset 260000 +2025-09-10 10:25:50,492 - INFO - Loading chunk starting at offset 270000 +2025-09-10 10:26:00,181 - INFO - Loading chunk starting at offset 280000 +2025-09-10 10:26:10,138 - INFO - Loading chunk starting at offset 290000 +2025-09-10 10:26:20,437 - INFO - Loading chunk starting at offset 300000 +2025-09-10 10:26:34,962 - INFO - Loading chunk starting at offset 310000 +2025-09-10 10:26:46,248 - INFO - Loading chunk starting at offset 320000 +2025-09-10 10:26:55,275 - INFO - Loading chunk starting at offset 330000 +2025-09-10 10:27:09,733 - INFO - Loading chunk starting at offset 340000 +2025-09-10 10:27:15,480 - INFO - [2025-09-10 10:27:15] Detecting salary... +2025-09-10 10:27:16,553 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { "data": [], "error": {}, "message": "AutoCall Add Salary Successful", @@ -67,9 +89,22 @@ "statusCode": 200 } -2025-09-09 10:41:42,432 - INFO - [2025-09-09 10:41:42] Salary detection complete -2025-09-09 10:43:42,431 - INFO - [2025-09-09 10:43:42] Detecting salary... -2025-09-09 10:43:43,092 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { +2025-09-10 10:27:16,553 - INFO - [2025-09-10 10:27:16] Salary detection complete +2025-09-10 10:27:26,568 - INFO - Loading chunk starting at offset 350000 +2025-09-10 10:27:36,472 - INFO - Loading chunk starting at offset 360000 +2025-09-10 10:27:44,909 - INFO - Loading chunk starting at offset 370000 +2025-09-10 10:27:54,557 - INFO - Loading chunk starting at offset 380000 +2025-09-10 10:28:00,588 - INFO - Loading chunk starting at offset 390000 +2025-09-10 10:28:05,957 - INFO - Loading chunk starting at offset 400000 +2025-09-10 10:28:12,058 - INFO - Loading chunk starting at offset 410000 +2025-09-10 10:28:19,248 - INFO - Loading chunk starting at offset 420000 +2025-09-10 10:28:29,938 - INFO - Loading chunk starting at offset 430000 +2025-09-10 10:28:51,274 - INFO - Loading chunk starting at offset 440000 +2025-09-10 10:28:57,720 - INFO - Loading chunk starting at offset 450000 +2025-09-10 10:29:02,117 - INFO - Loading chunk starting at offset 460000 +2025-09-10 10:29:11,555 - INFO - Loading chunk starting at offset 470000 +2025-09-10 10:29:16,565 - INFO - [2025-09-10 10:29:16] Detecting salary... +2025-09-10 10:29:17,452 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { "data": [], "error": {}, "message": "AutoCall Add Salary Successful", @@ -77,9 +112,24 @@ "statusCode": 200 } -2025-09-09 10:43:43,093 - INFO - [2025-09-09 10:43:43] Salary detection complete -2025-09-09 10:45:43,093 - INFO - [2025-09-09 10:45:43] Detecting salary... -2025-09-09 10:45:43,818 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { +2025-09-10 10:29:17,455 - INFO - [2025-09-10 10:29:17] Salary detection complete +2025-09-10 10:29:30,145 - INFO - Loading chunk starting at offset 480000 +2025-09-10 10:29:39,048 - INFO - Loading chunk starting at offset 490000 +2025-09-10 10:29:43,412 - INFO - Loading chunk starting at offset 500000 +2025-09-10 10:29:47,415 - INFO - Loading chunk starting at offset 510000 +2025-09-10 10:29:52,149 - INFO - Loading chunk starting at offset 520000 +2025-09-10 10:29:58,331 - INFO - Loading chunk starting at offset 530000 +2025-09-10 10:30:04,933 - INFO - Loading chunk starting at offset 540000 +2025-09-10 10:30:11,934 - INFO - Loading chunk starting at offset 550000 +2025-09-10 10:30:19,315 - INFO - Loading chunk starting at offset 560000 +2025-09-10 10:30:25,683 - INFO - Loading chunk starting at offset 570000 +2025-09-10 10:30:33,577 - INFO - Loading chunk starting at offset 580000 +2025-09-10 10:30:40,363 - INFO - Loading chunk starting at offset 590000 +2025-09-10 10:30:46,205 - INFO - Loading chunk starting at offset 600000 +2025-09-10 10:30:53,957 - INFO - Loading chunk starting at offset 610000 +2025-09-10 10:31:08,083 - INFO - Loading chunk starting at offset 620000 +2025-09-10 10:31:17,454 - INFO - [2025-09-10 10:31:17] Detecting salary... +2025-09-10 10:31:17,989 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { "data": [], "error": {}, "message": "AutoCall Add Salary Successful", @@ -87,19 +137,22 @@ "statusCode": 200 } -2025-09-09 10:45:43,819 - INFO - [2025-09-09 10:45:43] Salary detection complete -2025-09-09 10:47:16,454 - INFO - Shutting down Salary Analytics API... -2025-09-09 10:47:30,172 - INFO - Initializing pipeline... -2025-09-09 10:47:30,174 - INFO - [2025-09-09 10:47:30] Detecting salary... -2025-09-09 10:47:30,175 - INFO - Started autonomous salary detection loop. -2025-09-09 10:47:30,185 - INFO - Server running on hostname: 1c3f3ceb2429 -2025-09-09 10:47:30,188 - INFO - Server IP address: 172.25.0.2 -2025-09-09 10:47:30,188 - INFO - Server is accessible at: -2025-09-09 10:47:30,189 - INFO - - http://localhost:8000 -2025-09-09 10:47:30,190 - INFO - - http://127.0.0.1:8000 -2025-09-09 10:47:30,191 - INFO - - http://172.25.0.2:8000 -2025-09-09 10:47:30,191 - INFO - Pipeline initialized successfully -2025-09-09 10:47:31,032 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { +2025-09-10 10:31:17,989 - INFO - [2025-09-10 10:31:17] Salary detection complete +2025-09-10 10:31:18,901 - INFO - Loading chunk starting at offset 630000 +2025-09-10 10:31:27,551 - INFO - Loading chunk starting at offset 640000 +2025-09-10 10:31:33,662 - INFO - Loading chunk starting at offset 650000 +2025-09-10 10:31:40,107 - INFO - Loading chunk starting at offset 660000 +2025-09-10 10:31:49,938 - INFO - Loading chunk starting at offset 670000 +2025-09-10 10:31:57,777 - INFO - Loading chunk starting at offset 680000 +2025-09-10 10:32:06,629 - INFO - Loading chunk starting at offset 690000 +2025-09-10 10:32:15,281 - INFO - Loading chunk starting at offset 700000 +2025-09-10 10:32:25,328 - INFO - Loading chunk starting at offset 710000 +2025-09-10 10:32:40,639 - INFO - Loading chunk starting at offset 720000 +2025-09-10 10:32:51,078 - INFO - Loading chunk starting at offset 730000 +2025-09-10 10:33:03,924 - INFO - Loading chunk starting at offset 740000 +2025-09-10 10:33:13,007 - INFO - Loading chunk starting at offset 750000 +2025-09-10 10:33:17,989 - INFO - [2025-09-10 10:33:17] Detecting salary... +2025-09-10 10:33:19,698 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { "data": [], "error": {}, "message": "AutoCall Add Salary Successful", @@ -107,21 +160,22 @@ "statusCode": 200 } -2025-09-09 10:47:31,033 - INFO - [2025-09-09 10:47:31] Salary detection complete -2025-09-09 10:47:38,286 - INFO - Shutting down Salary Analytics API... -2025-09-09 10:47:47,645 - INFO - generated new fontManager -2025-09-09 10:48:19,231 - INFO - generated new fontManager -2025-09-09 10:48:24,426 - INFO - Initializing pipeline... -2025-09-09 10:48:24,429 - INFO - [2025-09-09 10:48:24] Detecting salary... -2025-09-09 10:48:24,429 - INFO - Started autonomous salary detection loop. -2025-09-09 10:48:24,441 - INFO - Server running on hostname: 349f9fd0c78b -2025-09-09 10:48:24,442 - INFO - Server IP address: 172.25.0.2 -2025-09-09 10:48:24,444 - INFO - Server is accessible at: -2025-09-09 10:48:24,445 - INFO - - http://localhost:8000 -2025-09-09 10:48:24,448 - INFO - - http://127.0.0.1:8000 -2025-09-09 10:48:24,450 - INFO - - http://172.25.0.2:8000 -2025-09-09 10:48:24,451 - INFO - Pipeline initialized successfully -2025-09-09 10:48:25,094 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { +2025-09-10 10:33:20,039 - INFO - [2025-09-10 10:33:20] Salary detection complete +2025-09-10 10:33:20,857 - INFO - Loading chunk starting at offset 760000 +2025-09-10 10:33:30,803 - INFO - Loading chunk starting at offset 770000 +2025-09-10 10:33:38,162 - INFO - Loading chunk starting at offset 780000 +2025-09-10 10:33:44,504 - INFO - Loading chunk starting at offset 790000 +2025-09-10 10:33:50,063 - INFO - Loading chunk starting at offset 800000 +2025-09-10 10:33:57,957 - INFO - Loading chunk starting at offset 810000 +2025-09-10 10:34:05,256 - INFO - Loading chunk starting at offset 820000 +2025-09-10 10:34:12,212 - INFO - Loading chunk starting at offset 830000 +2025-09-10 10:34:20,478 - INFO - Loading chunk starting at offset 840000 +2025-09-10 10:34:28,018 - INFO - Loading chunk starting at offset 850000 +2025-09-10 10:34:36,413 - INFO - Loading chunk starting at offset 860000 +2025-09-10 10:34:50,168 - INFO - Loading chunk starting at offset 870000 +2025-09-10 10:35:13,867 - INFO - Loading chunk starting at offset 880000 +2025-09-10 10:35:20,391 - INFO - [2025-09-10 10:35:20] Detecting salary... +2025-09-10 10:35:21,791 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { "data": [], "error": {}, "message": "AutoCall Add Salary Successful", @@ -129,217 +183,8 @@ "statusCode": 200 } -2025-09-09 10:48:25,095 - INFO - [2025-09-09 10:48:25] Salary detection complete -2025-09-09 10:49:03,380 - INFO - Shutting down Salary Analytics API... -2025-09-09 10:49:18,345 - INFO - Initializing pipeline... -2025-09-09 10:49:18,346 - INFO - [2025-09-09 10:49:18] Detecting salary... -2025-09-09 10:49:18,347 - INFO - Started autonomous salary detection loop. -2025-09-09 10:49:18,352 - INFO - Server running on hostname: 349f9fd0c78b -2025-09-09 10:49:18,353 - INFO - Server IP address: 172.25.0.2 -2025-09-09 10:49:18,353 - INFO - Server is accessible at: -2025-09-09 10:49:18,354 - INFO - - http://localhost:8000 -2025-09-09 10:49:18,355 - INFO - - http://127.0.0.1:8000 -2025-09-09 10:49:18,365 - INFO - - http://172.25.0.2:8000 -2025-09-09 10:49:18,366 - INFO - Pipeline initialized successfully -2025-09-09 10:50:37,994 - INFO - generated new fontManager -2025-09-09 10:50:45,235 - INFO - Initializing pipeline... -2025-09-09 10:50:45,238 - INFO - [2025-09-09 10:50:45] Detecting salary... -2025-09-09 10:50:45,238 - INFO - Started autonomous salary detection loop. -2025-09-09 10:50:45,244 - INFO - Server running on hostname: 087fb63cb9f0 -2025-09-09 10:50:45,244 - INFO - Server IP address: 172.25.0.2 -2025-09-09 10:50:45,245 - INFO - Server is accessible at: -2025-09-09 10:50:45,245 - INFO - - http://localhost:8000 -2025-09-09 10:50:45,246 - INFO - - http://127.0.0.1:8000 -2025-09-09 10:50:45,247 - INFO - - http://172.25.0.2:8000 -2025-09-09 10:50:45,248 - INFO - Pipeline initialized successfully -2025-09-09 10:50:46,400 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { - "data": [], - "error": {}, - "message": "AutoCall Add Salary Successful", - "status": true, - "statusCode": 200 -} - -2025-09-09 10:50:46,401 - INFO - [2025-09-09 10:50:46] Salary detection complete -2025-09-09 10:51:51,570 - INFO - Shutting down Salary Analytics API... -2025-09-09 11:01:38,522 - INFO - generated new fontManager -2025-09-09 11:01:45,459 - INFO - Initializing pipeline... -2025-09-09 11:01:45,463 - INFO - [2025-09-09 11:01:45] Detecting salary... -2025-09-09 11:01:45,464 - INFO - Started autonomous salary detection loop. -2025-09-09 11:01:45,483 - INFO - Server running on hostname: 5d4fdd4232a7 -2025-09-09 11:01:45,484 - INFO - Server IP address: 172.25.0.2 -2025-09-09 11:01:45,485 - INFO - Server is accessible at: -2025-09-09 11:01:45,491 - INFO - - http://localhost:8000 -2025-09-09 11:01:45,493 - INFO - - http://127.0.0.1:8000 -2025-09-09 11:01:45,495 - INFO - - http://172.25.0.2:8000 -2025-09-09 11:01:45,496 - INFO - Pipeline initialized successfully -2025-09-09 11:02:00,358 - INFO - Shutting down Salary Analytics API... -2025-09-09 11:02:15,204 - INFO - Initializing pipeline... -2025-09-09 11:02:15,208 - INFO - [2025-09-09 11:02:15] Detecting salary... -2025-09-09 11:02:15,208 - INFO - Started autonomous salary detection loop. -2025-09-09 11:02:15,395 - INFO - Server running on hostname: 5d4fdd4232a7 -2025-09-09 11:02:15,397 - INFO - Server IP address: 172.25.0.2 -2025-09-09 11:02:15,415 - INFO - Server is accessible at: -2025-09-09 11:02:15,417 - INFO - - http://localhost:8000 -2025-09-09 11:02:15,417 - INFO - - http://127.0.0.1:8000 -2025-09-09 11:02:15,418 - INFO - - http://172.25.0.2:8000 -2025-09-09 11:02:15,419 - INFO - Pipeline initialized successfully -2025-09-09 11:04:18,780 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 500, response: - -Internal Server Error
- - - - -2025-09-09 11:04:18,781 - INFO - [2025-09-09 11:04:18] Salary detection complete -2025-09-09 11:04:41,264 - INFO - Initializing SalaryAnalyticsPipeline -2025-09-09 11:04:41,265 - INFO - Starting data loading process -2025-09-09 11:04:41,265 - INFO - No database connection. Attempting to connect... -2025-09-09 11:04:41,266 - INFO - Attempting to connect to database... -2025-09-09 11:05:42,201 - ERROR - Error connecting to database: (psycopg2.OperationalError) connection to server at "dev-data.simbrellang.net" (209.195.2.27), port 1521 failed: server closed the connection unexpectedly - This probably means the server terminated abnormally - before or while processing the request. - -(Background on this error at: https://sqlalche.me/e/20/e3q8) -2025-09-09 11:05:42,202 - ERROR - Failed to establish database connection -2025-09-09 11:05:42,202 - ERROR - Failed to load data -2025-09-09 11:05:42,203 - ERROR - Failed to load data -2025-09-09 11:05:42,203 - INFO - Load data endpoint failed after 60.94 seconds -2025-09-09 11:05:42,204 - ERROR - Error loading data: 500: Failed to load data -2025-09-09 11:05:42,206 - INFO - Load data endpoint failed after 60.94 seconds -2025-09-09 11:06:18,783 - INFO - [2025-09-09 11:06:18] Detecting salary... -2025-09-09 11:13:13,121 - INFO - Initializing pipeline... -2025-09-09 11:13:13,125 - INFO - [2025-09-09 11:13:13] Detecting salary... -2025-09-09 11:13:13,126 - INFO - Started autonomous salary detection loop. -2025-09-09 11:13:13,131 - INFO - Server running on hostname: 5d4fdd4232a7 -2025-09-09 11:13:13,138 - INFO - Server IP address: 172.25.0.2 -2025-09-09 11:13:13,139 - INFO - Server is accessible at: -2025-09-09 11:13:13,150 - INFO - - http://localhost:8000 -2025-09-09 11:13:13,151 - INFO - - http://127.0.0.1:8000 -2025-09-09 11:13:13,155 - INFO - - http://172.25.0.2:8000 -2025-09-09 11:13:13,155 - INFO - Pipeline initialized successfully -2025-09-09 11:13:13,762 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { - "data": [], - "error": {}, - "message": "AutoCall Add Salary Successful", - "status": true, - "statusCode": 200 -} - -2025-09-09 11:13:13,763 - INFO - [2025-09-09 11:13:13] Salary detection complete -2025-09-09 11:15:13,767 - INFO - [2025-09-09 11:15:13] Detecting salary... -2025-09-09 11:15:14,439 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { - "data": [], - "error": {}, - "message": "AutoCall Add Salary Successful", - "status": true, - "statusCode": 200 -} - -2025-09-09 11:15:14,440 - INFO - [2025-09-09 11:15:14] Salary detection complete -2025-09-09 11:15:18,049 - INFO - Shutting down Salary Analytics API... -2025-09-09 11:15:31,532 - INFO - Initializing pipeline... -2025-09-09 11:15:31,535 - INFO - [2025-09-09 11:15:31] Detecting salary... -2025-09-09 11:15:31,535 - INFO - Started autonomous salary detection loop. -2025-09-09 11:15:31,544 - INFO - Server running on hostname: 5d4fdd4232a7 -2025-09-09 11:15:31,545 - INFO - Server IP address: 172.25.0.2 -2025-09-09 11:15:31,546 - INFO - Server is accessible at: -2025-09-09 11:15:31,548 - INFO - - http://localhost:8000 -2025-09-09 11:15:31,548 - INFO - - http://127.0.0.1:8000 -2025-09-09 11:15:31,550 - INFO - - http://172.25.0.2:8000 -2025-09-09 11:15:31,551 - INFO - Pipeline initialized successfully -2025-09-09 11:15:44,793 - INFO - Initializing pipeline... -2025-09-09 11:15:44,795 - INFO - [2025-09-09 11:15:44] Detecting salary... -2025-09-09 11:15:44,795 - INFO - Started autonomous salary detection loop. -2025-09-09 11:15:44,804 - INFO - Server running on hostname: 5d4fdd4232a7 -2025-09-09 11:15:44,805 - INFO - Server IP address: 172.25.0.2 -2025-09-09 11:15:44,805 - INFO - Server is accessible at: -2025-09-09 11:15:44,806 - INFO - - http://localhost:8000 -2025-09-09 11:15:44,807 - INFO - - http://127.0.0.1:8000 -2025-09-09 11:15:44,809 - INFO - - http://172.25.0.2:8000 -2025-09-09 11:15:44,810 - INFO - Pipeline initialized successfully -2025-09-09 11:16:01,481 - INFO - Initializing pipeline... -2025-09-09 11:16:01,485 - INFO - [2025-09-09 11:16:01] Detecting salary... -2025-09-09 11:16:01,486 - INFO - Started autonomous salary detection loop. -2025-09-09 11:16:01,497 - INFO - Server running on hostname: 5d4fdd4232a7 -2025-09-09 11:16:01,499 - INFO - Server IP address: 172.25.0.2 -2025-09-09 11:16:01,500 - INFO - Server is accessible at: -2025-09-09 11:16:01,501 - INFO - - http://localhost:8000 -2025-09-09 11:16:01,504 - INFO - - http://127.0.0.1:8000 -2025-09-09 11:16:01,505 - INFO - - http://172.25.0.2:8000 -2025-09-09 11:16:01,520 - INFO - Pipeline initialized successfully -2025-09-09 11:16:02,407 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { - "data": [], - "error": {}, - "message": "AutoCall Add Salary Successful", - "status": true, - "statusCode": 200 -} - -2025-09-09 11:16:02,416 - INFO - [2025-09-09 11:16:02] Salary detection complete -2025-09-09 11:18:02,428 - INFO - [2025-09-09 11:18:02] Detecting salary... -2025-09-09 11:18:03,770 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { - "data": [], - "error": {}, - "message": "AutoCall Add Salary Successful", - "status": true, - "statusCode": 200 -} - -2025-09-09 11:18:03,770 - INFO - [2025-09-09 11:18:03] Salary detection complete -2025-09-09 11:18:43,964 - INFO - Shutting down Salary Analytics API... -2025-09-09 11:19:08,819 - INFO - generated new fontManager -2025-09-09 11:19:13,397 - INFO - Initializing pipeline... -2025-09-09 11:19:13,399 - INFO - [2025-09-09 11:19:13] Detecting salary... -2025-09-09 11:19:13,399 - INFO - Started autonomous salary detection loop. -2025-09-09 11:19:13,409 - INFO - Server running on hostname: 7c67294712af -2025-09-09 11:19:13,409 - INFO - Server IP address: 172.25.0.2 -2025-09-09 11:19:13,417 - INFO - Server is accessible at: -2025-09-09 11:19:13,421 - INFO - - http://localhost:8000 -2025-09-09 11:19:13,422 - INFO - - http://127.0.0.1:8000 -2025-09-09 11:19:13,423 - INFO - - http://172.25.0.2:8000 -2025-09-09 11:19:13,425 - INFO - Pipeline initialized successfully -2025-09-09 11:19:14,059 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { - "data": [], - "error": {}, - "message": "AutoCall Add Salary Successful", - "status": true, - "statusCode": 200 -} - -2025-09-09 11:19:14,060 - INFO - [2025-09-09 11:19:14] Salary detection complete -2025-09-09 11:19:58,717 - INFO - Initializing SalaryAnalyticsPipeline -2025-09-09 11:19:58,718 - INFO - Starting data loading process -2025-09-09 11:19:58,718 - INFO - No database connection. Attempting to connect... -2025-09-09 11:19:58,719 - INFO - Attempting to connect to database... -2025-09-09 11:19:58,793 - ERROR - Error connecting to database: No module named 'oracledb' -2025-09-09 11:19:58,794 - ERROR - Failed to establish database connection -2025-09-09 11:19:58,794 - ERROR - Failed to load data -2025-09-09 11:19:58,795 - ERROR - Failed to load data -2025-09-09 11:19:58,795 - INFO - Load data endpoint failed after 0.08 seconds -2025-09-09 11:19:58,796 - ERROR - Error loading data: 500: Failed to load data -2025-09-09 11:19:58,797 - INFO - Load data endpoint failed after 0.08 seconds -2025-09-09 11:20:01,415 - INFO - Starting data loading process -2025-09-09 11:20:01,437 - INFO - No database connection. Attempting to connect... -2025-09-09 11:20:01,443 - INFO - Attempting to connect to database... -2025-09-09 11:20:01,445 - ERROR - Error connecting to database: No module named 'oracledb' -2025-09-09 11:20:01,445 - ERROR - Failed to establish database connection -2025-09-09 11:20:01,447 - ERROR - Failed to load data -2025-09-09 11:20:01,448 - ERROR - Failed to load data -2025-09-09 11:20:01,449 - INFO - Load data endpoint failed after 0.03 seconds -2025-09-09 11:20:01,449 - ERROR - Error loading data: 500: Failed to load data -2025-09-09 11:20:01,450 - INFO - Load data endpoint failed after 0.03 seconds -2025-09-09 11:21:14,063 - INFO - [2025-09-09 11:21:14] Detecting salary... -2025-09-09 11:21:14,834 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { - "data": [], - "error": {}, - "message": "AutoCall Add Salary Successful", - "status": true, - "statusCode": 200 -} - -2025-09-09 11:21:14,835 - INFO - [2025-09-09 11:21:14] Salary detection complete -2025-09-09 11:21:19,646 - INFO - Shutting down Salary Analytics API... +2025-09-10 10:35:21,794 - INFO - [2025-09-10 10:35:21] Salary detection complete +2025-09-10 10:35:28,918 - INFO - Loading chunk starting at offset 890000 +2025-09-10 10:35:40,300 - INFO - Loading chunk starting at offset 900000 +2025-09-10 10:36:05,763 - INFO - Loading chunk starting at offset 910000 +2025-09-10 10:36:15,199 - INFO - Loading chunk starting at offset 920000 diff --git a/app/config.py b/app/config.py index 73e206e..2d28031 100644 --- a/app/config.py +++ b/app/config.py @@ -34,19 +34,19 @@ DB_CONFIG = { } -DNS = f"(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST={DB_CONFIG['host']})(PORT={DB_CONFIG['port']}))(CONNECT_DATA=(SID={DB_CONFIG['sid']})))" +# DNS = f"(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST={DB_CONFIG['host']})(PORT={DB_CONFIG['port']}))(CONNECT_DATA=(SID={DB_CONFIG['sid']})))" # Database Connection -SQLALCHEMY_DATABASE_URI_INTERNAL = (f"oracle+oracledb://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DNS}") -SQLALCHEMY_DATABASE_URI = os.getenv("SQLALCHEMY_DATABASE_URI_FULL", SQLALCHEMY_DATABASE_URI_INTERNAL) +# SQLALCHEMY_DATABASE_URI_INTERNAL = (f"oracle+oracledb://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DNS}") +# SQLALCHEMY_DATABASE_URI = os.getenv("SQLALCHEMY_DATABASE_URI_FULL", SQLALCHEMY_DATABASE_URI_INTERNAL) #SQLALCHEMY_DATABASE_URI_FULL = 'oracle+oracledb://FIRSTADVSTG:Pchanged_56789@10.2.110.30:1521/?service_name=firstadv' # SQLAlchemy Configuration -# SQLALCHEMY_DATABASE_URI = ( -# f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@" -# f"{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}" -# ) +SQLALCHEMY_DATABASE_URI = ( + f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@" + f"{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}" +) SQLALCHEMY_TRACK_MODIFICATIONS = False -- 2.34.1 From 8236462b83c00dba8d67ee3d9a9cfeaa1f2418e9 Mon Sep 17 00:00:00 2001 From: VivianDee <115420678+VivianDee@users.noreply.github.com> Date: Wed, 10 Sep 2025 12:02:36 +0100 Subject: [PATCH 09/19] [add]: database connection --- app.log | 219 ++++++++++++++++++ app/config.py | 16 +- app/models/batch_results.py | 16 +- app/models/customer_account_transaction_hx.py | 100 ++++++++ app/models/db_operations.py | 2 +- 5 files changed, 336 insertions(+), 17 deletions(-) create mode 100644 app/models/customer_account_transaction_hx.py diff --git a/app.log b/app.log index d1fa785..a55a9c9 100644 --- a/app.log +++ b/app.log @@ -188,3 +188,222 @@ 2025-09-10 10:35:40,300 - INFO - Loading chunk starting at offset 900000 2025-09-10 10:36:05,763 - INFO - Loading chunk starting at offset 910000 2025-09-10 10:36:15,199 - INFO - Loading chunk starting at offset 920000 +2025-09-10 10:36:46,036 - INFO - Loading chunk starting at offset 930000 +2025-09-10 10:36:53,221 - INFO - Loading chunk starting at offset 940000 +2025-09-10 10:37:06,754 - INFO - Loading chunk starting at offset 950000 +2025-09-10 10:37:15,558 - INFO - Loading chunk starting at offset 960000 +2025-09-10 10:37:21,798 - INFO - [2025-09-10 10:37:21] Detecting salary... +2025-09-10 10:37:22,954 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 10:37:22,955 - INFO - [2025-09-10 10:37:22] Salary detection complete +2025-09-10 10:37:29,088 - INFO - Loading chunk starting at offset 970000 +2025-09-10 10:37:54,730 - INFO - Loading chunk starting at offset 980000 +2025-09-10 10:38:13,667 - INFO - Loading chunk starting at offset 990000 +2025-09-10 10:38:27,880 - INFO - Loading chunk starting at offset 1000000 +2025-09-10 10:39:07,546 - INFO - Loading chunk starting at offset 1010000 +2025-09-10 10:39:17,369 - INFO - Loading chunk starting at offset 1020000 +2025-09-10 10:39:23,176 - INFO - [2025-09-10 10:39:23] Detecting salary... +2025-09-10 10:39:24,444 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 10:39:24,445 - INFO - [2025-09-10 10:39:24] Salary detection complete +2025-09-10 10:39:26,048 - INFO - Loading chunk starting at offset 1030000 +2025-09-10 10:39:45,494 - INFO - Loading chunk starting at offset 1040000 +2025-09-10 10:39:50,480 - INFO - Loading chunk starting at offset 1050000 +2025-09-10 10:39:54,793 - INFO - Loading chunk starting at offset 1060000 +2025-09-10 10:39:59,415 - INFO - Loading chunk starting at offset 1070000 +2025-09-10 10:40:04,306 - INFO - Loading chunk starting at offset 1080000 +2025-09-10 10:40:10,517 - INFO - Loading chunk starting at offset 1090000 +2025-09-10 10:40:15,671 - INFO - Loading chunk starting at offset 1100000 +2025-09-10 10:40:21,662 - INFO - Loading chunk starting at offset 1110000 +2025-09-10 10:40:30,551 - INFO - Loading chunk starting at offset 1120000 +2025-09-10 10:40:47,391 - INFO - Loading chunk starting at offset 1130000 +2025-09-10 10:40:55,609 - INFO - Loading chunk starting at offset 1140000 +2025-09-10 10:41:04,496 - INFO - Loading chunk starting at offset 1150000 +2025-09-10 10:41:14,608 - INFO - Loading chunk starting at offset 1160000 +2025-09-10 10:41:24,447 - INFO - [2025-09-10 10:41:24] Detecting salary... +2025-09-10 10:41:28,537 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 10:41:28,548 - INFO - [2025-09-10 10:41:28] Salary detection complete +2025-09-10 10:42:00,867 - INFO - Loading chunk starting at offset 1170000 +2025-09-10 10:42:13,069 - INFO - Loading chunk starting at offset 1180000 +2025-09-10 10:42:21,593 - INFO - Loading chunk starting at offset 1190000 +2025-09-10 10:42:32,011 - INFO - Loading chunk starting at offset 1200000 +2025-09-10 10:42:37,982 - INFO - Loading chunk starting at offset 1210000 +2025-09-10 10:42:45,458 - INFO - Loading chunk starting at offset 1220000 +2025-09-10 10:42:54,545 - INFO - Loading chunk starting at offset 1230000 +2025-09-10 10:43:15,705 - INFO - Loading chunk starting at offset 1240000 +2025-09-10 10:43:28,549 - INFO - [2025-09-10 10:43:28] Detecting salary... +2025-09-10 10:43:31,640 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 10:43:31,641 - INFO - [2025-09-10 10:43:31] Salary detection complete +2025-09-10 10:43:43,344 - INFO - Loading chunk starting at offset 1250000 +2025-09-10 10:43:50,004 - INFO - Loading chunk starting at offset 1260000 +2025-09-10 10:43:59,430 - INFO - Loading chunk starting at offset 1270000 +2025-09-10 10:44:07,478 - INFO - Loading chunk starting at offset 1280000 +2025-09-10 10:44:18,927 - INFO - Loading chunk starting at offset 1290000 +2025-09-10 10:44:28,523 - INFO - Loading chunk starting at offset 1300000 +2025-09-10 10:44:38,629 - INFO - Loading chunk starting at offset 1310000 +2025-09-10 10:44:56,977 - INFO - Loading chunk starting at offset 1320000 +2025-09-10 10:45:09,203 - INFO - Loading chunk starting at offset 1330000 +2025-09-10 10:45:18,657 - INFO - Loading chunk starting at offset 1340000 +2025-09-10 10:45:27,201 - INFO - Loading chunk starting at offset 1350000 +2025-09-10 10:45:31,641 - INFO - [2025-09-10 10:45:31] Detecting salary... +2025-09-10 10:45:32,261 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 10:45:32,262 - INFO - [2025-09-10 10:45:32] Salary detection complete +2025-09-10 10:45:35,308 - INFO - Loading chunk starting at offset 1360000 +2025-09-10 10:45:44,783 - INFO - Loading chunk starting at offset 1370000 +2025-09-10 10:45:52,048 - INFO - Loading chunk starting at offset 1380000 +2025-09-10 10:45:58,491 - INFO - Loading chunk starting at offset 1390000 +2025-09-10 10:46:13,270 - INFO - Loading chunk starting at offset 1400000 +2025-09-10 10:46:25,221 - INFO - Loading chunk starting at offset 1410000 +2025-09-10 10:46:34,358 - INFO - Loading chunk starting at offset 1420000 +2025-09-10 10:46:38,777 - INFO - Loading chunk starting at offset 1430000 +2025-09-10 10:46:42,534 - INFO - Loading chunk starting at offset 1440000 +2025-09-10 10:46:46,147 - INFO - Loading chunk starting at offset 1450000 +2025-09-10 10:46:50,727 - INFO - Loading chunk starting at offset 1460000 +2025-09-10 10:46:55,939 - INFO - Loading chunk starting at offset 1470000 +2025-09-10 10:47:01,696 - INFO - Loading chunk starting at offset 1480000 +2025-09-10 10:47:07,411 - INFO - Loading chunk starting at offset 1490000 +2025-09-10 10:47:13,449 - INFO - Loading chunk starting at offset 1500000 +2025-09-10 10:47:23,058 - INFO - Loading chunk starting at offset 1510000 +2025-09-10 10:47:30,097 - INFO - Loading chunk starting at offset 1520000 +2025-09-10 10:47:32,259 - INFO - [2025-09-10 10:47:32] Detecting salary... +2025-09-10 10:47:32,923 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 10:47:32,924 - INFO - [2025-09-10 10:47:32] Salary detection complete +2025-09-10 10:47:36,423 - INFO - Loading chunk starting at offset 1530000 +2025-09-10 10:47:45,086 - INFO - Loading chunk starting at offset 1540000 +2025-09-10 10:47:57,079 - INFO - Loading chunk starting at offset 1550000 +2025-09-10 10:48:19,741 - INFO - Loading chunk starting at offset 1560000 +2025-09-10 10:48:41,300 - INFO - Loading chunk starting at offset 1570000 +2025-09-10 10:48:51,349 - INFO - Loading chunk starting at offset 1580000 +2025-09-10 10:48:58,892 - INFO - Loading chunk starting at offset 1590000 +2025-09-10 10:49:04,857 - INFO - Loading chunk starting at offset 1600000 +2025-09-10 10:49:10,299 - INFO - Loading chunk starting at offset 1610000 +2025-09-10 10:49:16,650 - INFO - Loading chunk starting at offset 1620000 +2025-09-10 10:49:23,107 - INFO - Loading chunk starting at offset 1630000 +2025-09-10 10:49:32,320 - INFO - Loading chunk starting at offset 1640000 +2025-09-10 10:49:32,927 - INFO - [2025-09-10 10:49:32] Detecting salary... +2025-09-10 10:49:33,484 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 10:49:33,485 - INFO - [2025-09-10 10:49:33] Salary detection complete +2025-09-10 10:49:40,707 - INFO - Loading chunk starting at offset 1650000 +2025-09-10 10:49:50,677 - INFO - Loading chunk starting at offset 1660000 +2025-09-10 10:49:59,664 - INFO - Loading chunk starting at offset 1670000 +2025-09-10 10:50:07,452 - INFO - Loading chunk starting at offset 1680000 +2025-09-10 10:50:13,583 - INFO - Loading chunk starting at offset 1690000 +2025-09-10 10:50:20,858 - INFO - Loading chunk starting at offset 1700000 +2025-09-10 10:50:27,519 - INFO - Loading chunk starting at offset 1710000 +2025-09-10 10:50:34,012 - INFO - Loading chunk starting at offset 1720000 +2025-09-10 10:50:39,628 - INFO - Loading chunk starting at offset 1730000 +2025-09-10 10:50:50,012 - INFO - Loading chunk starting at offset 1740000 +2025-09-10 10:51:06,647 - INFO - Loading chunk starting at offset 1750000 +2025-09-10 10:51:17,401 - INFO - Loading chunk starting at offset 1760000 +2025-09-10 10:51:26,360 - INFO - Loading chunk starting at offset 1770000 +2025-09-10 10:51:33,485 - INFO - [2025-09-10 10:51:33] Detecting salary... +2025-09-10 10:51:55,392 - INFO - Initializing pipeline... +2025-09-10 10:51:55,395 - INFO - [2025-09-10 10:51:55] Detecting salary... +2025-09-10 10:51:55,395 - INFO - Started autonomous salary detection loop. +2025-09-10 10:51:55,409 - INFO - Server running on hostname: 22bad35c69c3 +2025-09-10 10:51:55,416 - INFO - Server IP address: 172.25.0.2 +2025-09-10 10:51:55,424 - INFO - Server is accessible at: +2025-09-10 10:51:55,426 - INFO - - http://localhost:8000 +2025-09-10 10:51:55,444 - INFO - - http://127.0.0.1:8000 +2025-09-10 10:51:55,455 - INFO - - http://172.25.0.2:8000 +2025-09-10 10:51:55,456 - INFO - Pipeline initialized successfully +2025-09-10 10:51:56,024 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 10:51:56,024 - INFO - [2025-09-10 10:51:56] Salary detection complete +2025-09-10 10:52:36,450 - INFO - Initializing SalaryAnalyticsPipeline +2025-09-10 10:52:36,451 - INFO - Starting data loading process +2025-09-10 10:52:36,452 - INFO - No database connection. Attempting to connect... +2025-09-10 10:52:36,452 - INFO - Attempting to connect to database... +2025-09-10 10:52:36,538 - ERROR - Error connecting to database: No module named 'oracledb' +2025-09-10 10:52:36,539 - ERROR - Failed to establish database connection +2025-09-10 10:52:36,539 - ERROR - Failed to load data +2025-09-10 10:52:36,540 - ERROR - Failed to load data +2025-09-10 10:52:36,541 - INFO - Load data endpoint failed after 0.09 seconds +2025-09-10 10:52:36,541 - ERROR - Error loading data: 500: Failed to load data +2025-09-10 10:52:36,542 - INFO - Load data endpoint failed after 0.09 seconds +2025-09-10 10:52:58,149 - INFO - Shutting down Salary Analytics API... +2025-09-10 10:53:10,697 - INFO - generated new fontManager +2025-09-10 10:53:16,039 - INFO - Initializing pipeline... +2025-09-10 10:53:16,041 - INFO - [2025-09-10 10:53:16] Detecting salary... +2025-09-10 10:53:16,042 - INFO - Started autonomous salary detection loop. +2025-09-10 10:53:16,055 - INFO - Server running on hostname: 1c5d2376fb2a +2025-09-10 10:53:16,056 - INFO - Server IP address: 172.25.0.2 +2025-09-10 10:53:16,057 - INFO - Server is accessible at: +2025-09-10 10:53:16,058 - INFO - - http://localhost:8000 +2025-09-10 10:53:16,059 - INFO - - http://127.0.0.1:8000 +2025-09-10 10:53:16,060 - INFO - - http://172.25.0.2:8000 +2025-09-10 10:53:16,062 - INFO - Pipeline initialized successfully +2025-09-10 10:53:16,812 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 10:53:16,813 - INFO - [2025-09-10 10:53:16] Salary detection complete +2025-09-10 10:53:25,477 - INFO - Initializing SalaryAnalyticsPipeline +2025-09-10 10:53:25,482 - INFO - Starting data loading process +2025-09-10 10:53:25,483 - INFO - No database connection. Attempting to connect... +2025-09-10 10:53:25,484 - INFO - Attempting to connect to database... +2025-09-10 10:53:25,679 - ERROR - Error connecting to database: No module named 'oracledb' +2025-09-10 10:53:25,679 - ERROR - Failed to establish database connection +2025-09-10 10:53:25,680 - ERROR - Failed to load data +2025-09-10 10:53:25,680 - ERROR - Failed to load data +2025-09-10 10:53:25,681 - INFO - Load data endpoint failed after 0.20 seconds +2025-09-10 10:53:25,682 - ERROR - Error loading data: 500: Failed to load data +2025-09-10 10:53:25,682 - INFO - Load data endpoint failed after 0.20 seconds +2025-09-10 10:53:46,324 - INFO - Shutting down Salary Analytics API... diff --git a/app/config.py b/app/config.py index 2d28031..1fbc04f 100644 --- a/app/config.py +++ b/app/config.py @@ -34,19 +34,19 @@ DB_CONFIG = { } -# DNS = f"(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST={DB_CONFIG['host']})(PORT={DB_CONFIG['port']}))(CONNECT_DATA=(SID={DB_CONFIG['sid']})))" +DNS = f"(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST={DB_CONFIG['host']})(PORT={DB_CONFIG['port']}))(CONNECT_DATA=(SID={DB_CONFIG['sid']})))" # Database Connection -# SQLALCHEMY_DATABASE_URI_INTERNAL = (f"oracle+oracledb://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DNS}") -# SQLALCHEMY_DATABASE_URI = os.getenv("SQLALCHEMY_DATABASE_URI_FULL", SQLALCHEMY_DATABASE_URI_INTERNAL) +SQLALCHEMY_DATABASE_URI_INTERNAL = (f"oracle+oracledb://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DNS}") +SQLALCHEMY_DATABASE_URI = os.getenv("SQLALCHEMY_DATABASE_URI_FULL", SQLALCHEMY_DATABASE_URI_INTERNAL) -#SQLALCHEMY_DATABASE_URI_FULL = 'oracle+oracledb://FIRSTADVSTG:Pchanged_56789@10.2.110.30:1521/?service_name=firstadv' +# SQLALCHEMY_DATABASE_URI_FULL = 'oracle+oracledb://FIRSTADVSTG:Pchanged_56789@10.2.110.30:1521/?service_name=firstadv' # SQLAlchemy Configuration -SQLALCHEMY_DATABASE_URI = ( - f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@" - f"{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}" -) +# SQLALCHEMY_DATABASE_URI = ( +# f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@" +# f"{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}" +# ) SQLALCHEMY_TRACK_MODIFICATIONS = False diff --git a/app/models/batch_results.py b/app/models/batch_results.py index e65cce0..fcfdd91 100644 --- a/app/models/batch_results.py +++ b/app/models/batch_results.py @@ -24,7 +24,7 @@ class BatchResult(db.Model): @classmethod - def save_batch(cls, session: Session, batch_number, total_batches, results_df, status="success"): + def save_batch(cls, batch_number, total_batches, results_df, status="success"): """Save batch results into DB using ORM bulk insert.""" try: results_df["batch_number"] = batch_number @@ -42,21 +42,21 @@ class BatchResult(db.Model): for row in results_df.to_dict("records") ] - session.bulk_save_objects(records) - session.commit() + db.session.bulk_save_objects(records) + db.session.commit() logger.info(f"Saved batch {batch_number} successfully.") return True except Exception as e: - session.rollback() + db.session.rollback() logger.error(f"Error saving batch {batch_number}: {str(e)}") return False @classmethod - def get_batch_status(cls, session: Session, batch_number: int): + def get_batch_status(cls, batch_number: int): """Return summary info about one batch.""" try: result = ( - session.query( + db.session.query( cls.batch_number, cls.total_batches, cls.processed_at, @@ -75,11 +75,11 @@ class BatchResult(db.Model): return None @classmethod - def get_all_batches(cls, session: Session): + def get_all_batches(cls): """Return summaries for all batches.""" try: results = ( - session.query( + db.session.query( cls.batch_number, cls.total_batches, cls.processed_at, diff --git a/app/models/customer_account_transaction_hx.py b/app/models/customer_account_transaction_hx.py new file mode 100644 index 0000000..ac3e006 --- /dev/null +++ b/app/models/customer_account_transaction_hx.py @@ -0,0 +1,100 @@ +from venv import logger +from sqlalchemy import Column, Integer, String, Float, DateTime, ForeignKey +from sqlalchemy.orm import relationship +from app.extensions import db +import pandas as pd + +class CustomerAccountTransactionHx(db.Model): + __tablename__ = "customer_account_transaction_hx" + + id = Column(Integer, primary_key=True, autoincrement=True) + accountid = Column(String(64), nullable=False, index=True) + trx_type = Column(String(50), nullable=False) + amount = Column(Float, nullable=False) + description = Column(String(255)) + customer_id = Column(String(64)) + trx_start_date = Column(DateTime, nullable=False) + trx_end_date = Column(DateTime) + is_salary_related = Column(Integer, default=0) + is_consistent_amount = Column(Integer, default=0) + is_salary_type = Column(Integer, default=0) + + + + @classmethod + def get_all(cls): + """Fetch all transactions.""" + return db.session.query(cls).all() + + @classmethod + def get_rows_count(cls): + """Return total number of transaction rows.""" + try: + count = db.session.query(db.func.count(cls.id)).scalar() + return count + except Exception as e: + logger.error(f"Error getting row count: {str(e)}") + return None + + @classmethod + def get_by_account(cls, accountid: str): + """Fetch transactions for a given account.""" + return db.session.query(cls).filter_by(accountid=accountid).all() + + @classmethod + def get_accounts(cls, limit=None): + """Fetch distinct account IDs.""" + query = db.session.query(cls.accountid).distinct() + if limit: + query = query.limit(limit) + return [row.accountid for row in query.all()] + + @classmethod + def insert_transaction(cls, **kwargs): + """Insert a new transaction.""" + trx = cls(**kwargs) + try: + db.session.add(trx) + db.session.commit() + except Exception as e: + logger.error(f"Error inserting transaction: {str(e)}") + return None + return trx + + @classmethod + def bulk_insert(cls, transactions: list[dict]): + """Insert multiple transactions at once.""" + objs = [cls(**trx) for trx in transactions] + + try: + db.session.bulk_save_objects(objs) + db.session.commit() + except Exception as e: + logger.error(f"Error in bulk insert: {str(e)}") + return None + return objs + + @classmethod + def get_transactions_df(cls, accountids: list[str] = None): + """Return a Pandas DataFrame for ML model preparation.""" + query = db.session.query(cls) + if accountids: + query = query.filter(cls.accountid.in_(accountids)) + rows = query.all() + + + df = pd.DataFrame([{ + "id": trx.id, + "accountid": trx.accountid, + "trx_type": trx.trx_type, + "amount": trx.amount, + "description": trx.description, + "customer_id": trx.customer_id, + "trx_start_date": trx.trx_start_date, + "trx_end_date": trx.trx_end_date, + "is_salary_related": trx.is_salary_related, + "is_consistent_amount": trx.is_consistent_amount, + "is_salary_type": trx.is_salary_type, + } for trx in rows]) + + return df diff --git a/app/models/db_operations.py b/app/models/db_operations.py index 0517dee..6a20389 100644 --- a/app/models/db_operations.py +++ b/app/models/db_operations.py @@ -3,7 +3,7 @@ Database operations module for salary analytics. """ from sqlalchemy import text -from ..config import BATCH_RESULTS_TABLE +from app.config import BATCH_RESULTS_TABLE from datetime import datetime from app.utils.logger import logger -- 2.34.1 From 93073012e5f14c820d906a097030c3c58fe1616a Mon Sep 17 00:00:00 2001 From: VivianDee <115420678+VivianDee@users.noreply.github.com> Date: Wed, 10 Sep 2025 12:25:16 +0100 Subject: [PATCH 10/19] Update app.log --- app.log | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/app.log b/app.log index a55a9c9..7ce73a7 100644 --- a/app.log +++ b/app.log @@ -407,3 +407,118 @@ 2025-09-10 10:53:25,682 - ERROR - Error loading data: 500: Failed to load data 2025-09-10 10:53:25,682 - INFO - Load data endpoint failed after 0.20 seconds 2025-09-10 10:53:46,324 - INFO - Shutting down Salary Analytics API... +2025-09-10 11:07:49,841 - INFO - generated new fontManager +2025-09-10 11:07:59,771 - INFO - Initializing pipeline... +2025-09-10 11:07:59,774 - INFO - [2025-09-10 11:07:59] Detecting salary... +2025-09-10 11:07:59,774 - INFO - Started autonomous salary detection loop. +2025-09-10 11:07:59,893 - INFO - Server running on hostname: 0b21809edf52 +2025-09-10 11:07:59,894 - INFO - Server IP address: 172.25.0.2 +2025-09-10 11:07:59,895 - INFO - Server is accessible at: +2025-09-10 11:07:59,915 - INFO - - http://localhost:8000 +2025-09-10 11:07:59,917 - INFO - - http://127.0.0.1:8000 +2025-09-10 11:07:59,918 - INFO - - http://172.25.0.2:8000 +2025-09-10 11:07:59,919 - INFO - Pipeline initialized successfully +2025-09-10 11:08:01,268 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 11:08:01,269 - INFO - [2025-09-10 11:08:01] Salary detection complete +2025-09-10 11:08:15,668 - INFO - Initializing SalaryAnalyticsPipeline +2025-09-10 11:08:15,669 - INFO - Starting data loading process +2025-09-10 11:08:15,671 - INFO - No database connection. Attempting to connect... +2025-09-10 11:08:15,672 - INFO - Attempting to connect to database... +2025-09-10 11:08:20,076 - ERROR - Error connecting to database: (oracledb.exceptions.DatabaseError) ORA-00936: missing expression +Help: https://docs.oracle.com/error-help/db/ora-00936/ +[SQL: SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = 'customer_account_transaction_hx')] +(Background on this error at: https://sqlalche.me/e/20/4xp6) +2025-09-10 11:08:20,086 - ERROR - Failed to establish database connection +2025-09-10 11:08:20,087 - ERROR - Failed to load data +2025-09-10 11:08:20,092 - ERROR - Failed to load data +2025-09-10 11:08:20,119 - INFO - Load data endpoint failed after 4.45 seconds +2025-09-10 11:08:20,123 - ERROR - Error loading data: 500: Failed to load data +2025-09-10 11:08:20,124 - INFO - Load data endpoint failed after 4.46 seconds +2025-09-10 11:10:01,280 - INFO - [2025-09-10 11:10:01] Detecting salary... +2025-09-10 11:10:02,367 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 11:10:02,368 - INFO - [2025-09-10 11:10:02] Salary detection complete +2025-09-10 11:12:02,395 - INFO - [2025-09-10 11:12:02] Detecting salary... +2025-09-10 11:12:03,234 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 11:12:03,256 - INFO - [2025-09-10 11:12:03] Salary detection complete +2025-09-10 11:14:03,279 - INFO - [2025-09-10 11:14:03] Detecting salary... +2025-09-10 11:14:04,266 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 11:14:04,267 - INFO - [2025-09-10 11:14:04] Salary detection complete +2025-09-10 11:16:04,268 - INFO - [2025-09-10 11:16:04] Detecting salary... +2025-09-10 11:16:05,133 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 11:16:05,134 - INFO - [2025-09-10 11:16:05] Salary detection complete +2025-09-10 11:18:05,134 - INFO - [2025-09-10 11:18:05] Detecting salary... +2025-09-10 11:18:05,955 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 11:18:05,956 - INFO - [2025-09-10 11:18:05] Salary detection complete +2025-09-10 11:20:05,855 - INFO - [2025-09-10 11:20:05] Detecting salary... +2025-09-10 11:20:06,453 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 11:20:06,463 - INFO - [2025-09-10 11:20:06] Salary detection complete +2025-09-10 11:22:06,512 - INFO - [2025-09-10 11:22:06] Detecting salary... +2025-09-10 11:22:07,087 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 11:22:07,088 - INFO - [2025-09-10 11:22:07] Salary detection complete +2025-09-10 11:24:07,092 - INFO - [2025-09-10 11:24:07] Detecting salary... +2025-09-10 11:24:08,235 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 11:24:08,236 - INFO - [2025-09-10 11:24:08] Salary detection complete +2025-09-10 11:25:13,551 - INFO - Shutting down Salary Analytics API... -- 2.34.1 From ec5f15882397c07096912442a831d515a4c88c78 Mon Sep 17 00:00:00 2001 From: VivianDee <115420678+VivianDee@users.noreply.github.com> Date: Wed, 10 Sep 2025 12:29:55 +0100 Subject: [PATCH 11/19] Update config.py --- app/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app/config.py b/app/config.py index 1fbc04f..525031a 100644 --- a/app/config.py +++ b/app/config.py @@ -40,7 +40,6 @@ DNS = f"(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST={DB_CONFIG['host']})(PORT={DB SQLALCHEMY_DATABASE_URI_INTERNAL = (f"oracle+oracledb://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DNS}") SQLALCHEMY_DATABASE_URI = os.getenv("SQLALCHEMY_DATABASE_URI_FULL", SQLALCHEMY_DATABASE_URI_INTERNAL) -# SQLALCHEMY_DATABASE_URI_FULL = 'oracle+oracledb://FIRSTADVSTG:Pchanged_56789@10.2.110.30:1521/?service_name=firstadv' # SQLAlchemy Configuration # SQLALCHEMY_DATABASE_URI = ( -- 2.34.1 From bc071360cc8c47d0ee4c7155a245f866b43f84bc Mon Sep 17 00:00:00 2001 From: VivianDee <115420678+VivianDee@users.noreply.github.com> Date: Wed, 10 Sep 2025 12:30:36 +0100 Subject: [PATCH 12/19] Update app.log --- app.log | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/app.log b/app.log index 7ce73a7..3928bfe 100644 --- a/app.log +++ b/app.log @@ -522,3 +522,43 @@ Help: https://docs.oracle.com/error-help/db/ora-00936/ 2025-09-10 11:24:08,236 - INFO - [2025-09-10 11:24:08] Salary detection complete 2025-09-10 11:25:13,551 - INFO - Shutting down Salary Analytics API... +2025-09-10 11:25:52,852 - INFO - Initializing pipeline... +2025-09-10 11:25:52,857 - INFO - [2025-09-10 11:25:52] Detecting salary... +2025-09-10 11:25:52,857 - INFO - Started autonomous salary detection loop. +2025-09-10 11:25:52,866 - INFO - Server running on hostname: 0b21809edf52 +2025-09-10 11:25:52,876 - INFO - Server IP address: 172.25.0.2 +2025-09-10 11:25:52,881 - INFO - Server is accessible at: +2025-09-10 11:25:52,882 - INFO - - http://localhost:8000 +2025-09-10 11:25:52,883 - INFO - - http://127.0.0.1:8000 +2025-09-10 11:25:52,884 - INFO - - http://172.25.0.2:8000 +2025-09-10 11:25:52,884 - INFO - Pipeline initialized successfully +2025-09-10 11:25:53,580 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 11:25:53,581 - INFO - [2025-09-10 11:25:53] Salary detection complete +2025-09-10 11:27:53,578 - INFO - [2025-09-10 11:27:53] Detecting salary... +2025-09-10 11:27:54,235 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 11:27:54,235 - INFO - [2025-09-10 11:27:54] Salary detection complete +2025-09-10 11:29:54,235 - INFO - [2025-09-10 11:29:54] Detecting salary... +2025-09-10 11:29:54,827 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: { + "data": [], + "error": {}, + "message": "AutoCall Add Salary Successful", + "status": true, + "statusCode": 200 +} + +2025-09-10 11:29:54,827 - INFO - [2025-09-10 11:29:54] Salary detection complete +2025-09-10 11:30:20,979 - INFO - Shutting down Salary Analytics API... -- 2.34.1 From 02094df42cba5b1604af8461022f62f6584cd142 Mon Sep 17 00:00:00 2001 From: VivianDee <115420678+VivianDee@users.noreply.github.com> Date: Wed, 10 Sep 2025 12:30:46 +0100 Subject: [PATCH 13/19] Update config.py --- app/config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/config.py b/app/config.py index 525031a..35c509b 100644 --- a/app/config.py +++ b/app/config.py @@ -37,8 +37,7 @@ DB_CONFIG = { DNS = f"(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST={DB_CONFIG['host']})(PORT={DB_CONFIG['port']}))(CONNECT_DATA=(SID={DB_CONFIG['sid']})))" # Database Connection -SQLALCHEMY_DATABASE_URI_INTERNAL = (f"oracle+oracledb://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DNS}") -SQLALCHEMY_DATABASE_URI = os.getenv("SQLALCHEMY_DATABASE_URI_FULL", SQLALCHEMY_DATABASE_URI_INTERNAL) +SQLALCHEMY_DATABASE_URI = (f"oracle+oracledb://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DNS}") # SQLAlchemy Configuration -- 2.34.1 From fdd7959370c049e956c38d67cbb1ca8ad204948e Mon Sep 17 00:00:00 2001 From: VivianDee <115420678+VivianDee@users.noreply.github.com> Date: Wed, 10 Sep 2025 12:31:31 +0100 Subject: [PATCH 14/19] Update config.py --- app/config.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/app/config.py b/app/config.py index 35c509b..c83ba68 100644 --- a/app/config.py +++ b/app/config.py @@ -37,14 +37,14 @@ DB_CONFIG = { DNS = f"(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST={DB_CONFIG['host']})(PORT={DB_CONFIG['port']}))(CONNECT_DATA=(SID={DB_CONFIG['sid']})))" # Database Connection -SQLALCHEMY_DATABASE_URI = (f"oracle+oracledb://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DNS}") +# SQLALCHEMY_DATABASE_URI = (f"oracle+oracledb://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DNS}") # SQLAlchemy Configuration -# SQLALCHEMY_DATABASE_URI = ( -# f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@" -# f"{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}" -# ) +SQLALCHEMY_DATABASE_URI = ( + f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@" + f"{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}" +) SQLALCHEMY_TRACK_MODIFICATIONS = False -- 2.34.1 From 495ba93a4ba6ab1124cad4599cea1a2deef4ef4e Mon Sep 17 00:00:00 2001 From: VivianDee <115420678+VivianDee@users.noreply.github.com> Date: Fri, 19 Sep 2025 12:25:42 +0100 Subject: [PATCH 15/19] [add]: First Analytics models --- app.log | 10 +++++++ app/models/account.py | 19 +++++++++++++ app/models/simbrella_customer.py | 37 +++++++++++++++++++++++++ app/models/transaction.py | 46 ++++++++++++++++++++++++++++++++ app/models/transaction_stg.py | 46 ++++++++++++++++++++++++++++++++ 5 files changed, 158 insertions(+) create mode 100644 app/models/account.py create mode 100644 app/models/simbrella_customer.py create mode 100644 app/models/transaction.py create mode 100644 app/models/transaction_stg.py diff --git a/app.log b/app.log index 3928bfe..7d86856 100644 --- a/app.log +++ b/app.log @@ -562,3 +562,13 @@ Help: https://docs.oracle.com/error-help/db/ora-00936/ 2025-09-10 11:29:54,827 - INFO - [2025-09-10 11:29:54] Salary detection complete 2025-09-10 11:30:20,979 - INFO - Shutting down Salary Analytics API... +2025-09-10 11:31:18,351 - INFO - Initializing pipeline... +2025-09-10 11:31:18,375 - INFO - [2025-09-10 11:31:18] Detecting salary... +2025-09-10 11:31:18,376 - INFO - Started autonomous salary detection loop. +2025-09-10 11:31:18,405 - INFO - Server running on hostname: 0b21809edf52 +2025-09-10 11:31:18,447 - INFO - Server IP address: 172.25.0.2 +2025-09-10 11:31:18,528 - INFO - Server is accessible at: +2025-09-10 11:31:18,552 - INFO - - http://localhost:8000 +2025-09-10 11:31:18,552 - INFO - - http://127.0.0.1:8000 +2025-09-10 11:31:18,553 - INFO - - http://172.25.0.2:8000 +2025-09-10 11:31:18,554 - INFO - Pipeline initialized successfully diff --git a/app/models/account.py b/app/models/account.py new file mode 100644 index 0000000..2198dd3 --- /dev/null +++ b/app/models/account.py @@ -0,0 +1,19 @@ +from sqlalchemy import Column, Integer, String, Date, Numeric, CHAR +from app.extensions import db + + +class Account(db.Model): + __tablename__ = "accounts" + + customerid = db.Column(db.String(50), primary_key=True) + accountid = db.Column(db.String(11), nullable=False) + registrationdate = db.Column(db.DateTime) + accountcurrencycode = db.Column(db.String(3)) + schemecode = db.Column(db.String(5)) + lastinflowtransactiondate = db.Column(db.DateTime) + accountstatus = db.Column(db.String(50)) + accountstatusdate = db.Column(db.DateTime) + + + def __repr__(self): + return f"