diff --git a/Dockerfile b/Dockerfile index 2109548..5b599cb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,20 +1,23 @@ FROM python:3.11-slim +# Set the working directory in the container WORKDIR /app +# Copy the current directory contents into the container at /app +COPY . /app + RUN apt-get update && apt-get install -y libpq-dev && rm -rf /var/lib/apt/lists/* COPY requirements.txt . RUN pip install -r requirements.txt -COPY salary_analytics/ ./salary_analytics/ RUN mkdir -p output/csv output/plots output/models -ENV PYTHONPATH=/app -ENV HOST=0.0.0.0 -ENV PORT=8000 + +ENV FLASK_APP=wsgi.py +ENV FLASK_RUN_HOST=0.0.0.0 EXPOSE 8000 -CMD ["uvicorn", "salary_analytics.api:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] \ No newline at end of file +CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:8000", "wsgi:wsgi_app"] \ No newline at end of file diff --git a/salary_analytics/app/__init__.py b/app/__init__.py similarity index 65% rename from salary_analytics/app/__init__.py rename to app/__init__.py index b286361..90bbc1a 100644 --- a/salary_analytics/app/__init__.py +++ b/app/__init__.py @@ -2,6 +2,15 @@ from flask import Flask import os from .extensions import db, migrate + +""" +Salary Analytics Package +A package for analyzing and predicting salary patterns from transaction data. +""" + +__version__ = "0.1.0" + + def create_app(): app = Flask(__name__) app.config.from_object('salary_analytics.config') @@ -11,7 +20,7 @@ def create_app(): migrate.init_app(app, db) # Register blueprints or CLI commands here if needed - from . import commands + from .commands import commands app.cli.add_command(commands.upload_xls_cli) return app \ No newline at end of file diff --git a/salary_analytics/rac_check.py b/app/analytics/integrations/rac_check.py similarity index 90% rename from salary_analytics/rac_check.py rename to app/analytics/integrations/rac_check.py index d8f16d3..86c21ff 100644 --- a/salary_analytics/rac_check.py +++ b/app/analytics/integrations/rac_check.py @@ -1,10 +1,8 @@ from django.conf import settings import httpx import json -from salary_analytics.config import SIMBRELLA_BASE_URL, SIMBRELLA_ENDPOINT_RAC_CHECKS -import logging - -logger = logging.getLogger(__name__) +from app.config import SIMBRELLA_BASE_URL, SIMBRELLA_ENDPOINT_RAC_CHECKS +from app.utils.logger import logger class SimbrellaIntegration: BASE_URL = SIMBRELLA_BASE_URL diff --git a/salary_analytics/salary_detect.py b/app/analytics/integrations/salary_detect.py similarity index 85% rename from salary_analytics/salary_detect.py rename to app/analytics/integrations/salary_detect.py index 0824ae2..684abcb 100644 --- a/salary_analytics/salary_detect.py +++ b/app/analytics/integrations/salary_detect.py @@ -1,11 +1,8 @@ import time -import logging import threading import requests -from .config import SALARY_DETECT_URL, SALARY_DETECT_HEADERS, get_random_salary_payload - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) +from ...config import SALARY_DETECT_URL, SALARY_DETECT_HEADERS, get_random_salary_payload +from app.utils.logger import logger class SalaryDetect: def __init__(self): diff --git a/app/analytics/services/__init__.py b/app/analytics/services/__init__.py new file mode 100644 index 0000000..0f844f8 --- /dev/null +++ b/app/analytics/services/__init__.py @@ -0,0 +1,24 @@ +from .main import SalaryAnalyticsPipeline +from .data_loader import DataLoader +from .keyword_analyzer import KeywordAnalyzer +from .consistent_amount_analyzer import ConsistentAmountAnalyzer +from .transaction_type_analyzer import TransactionTypeAnalyzer +from .salary_earner_analyzer import SalaryEarnerAnalyzer +from .salary_predictor import SalaryPredictor + + +""" +Salary Analytics Package +A package for analyzing and predicting salary patterns from transaction data. +""" + +__version__ = "0.1.0" +__all__ = [ + "SalaryAnalyticsPipeline", + "DataLoader", + "KeywordAnalyzer", + "ConsistentAmountAnalyzer", + "TransactionTypeAnalyzer", + "SalaryEarnerAnalyzer", + "SalaryPredictor" +] diff --git a/salary_analytics/consistent_amount_analyzer.py b/app/analytics/services/consistent_amount_analyzer.py similarity index 100% rename from salary_analytics/consistent_amount_analyzer.py rename to app/analytics/services/consistent_amount_analyzer.py diff --git a/salary_analytics/data_loader.py b/app/analytics/services/data_loader.py similarity index 99% rename from salary_analytics/data_loader.py rename to app/analytics/services/data_loader.py index ec2da46..e11b0e4 100644 --- a/salary_analytics/data_loader.py +++ b/app/analytics/services/data_loader.py @@ -8,8 +8,7 @@ from datetime import datetime import logging import os from .config import DB_CONFIG, TABLE_NAME - -logger = logging.getLogger(__name__) +from app.utils.logger import logger class DataLoader: def __init__(self): diff --git a/salary_analytics/keyword_analyzer.py b/app/analytics/services/keyword_analyzer.py similarity index 100% rename from salary_analytics/keyword_analyzer.py rename to app/analytics/services/keyword_analyzer.py diff --git a/salary_analytics/main.py b/app/analytics/services/main.py similarity index 99% rename from salary_analytics/main.py rename to app/analytics/services/main.py index e2781e8..87482c7 100644 --- a/salary_analytics/main.py +++ b/app/analytics/services/main.py @@ -9,8 +9,7 @@ from .consistent_amount_analyzer import ConsistentAmountAnalyzer from .transaction_type_analyzer import TransactionTypeAnalyzer from .salary_earner_analyzer import SalaryEarnerAnalyzer from .salary_predictor import SalaryPredictor - -logger = logging.getLogger(__name__) +from app.utils.logger import logger class SalaryAnalyticsPipeline: def __init__(self): diff --git a/salary_analytics/salary_earner_analyzer.py b/app/analytics/services/salary_earner_analyzer.py similarity index 97% rename from salary_analytics/salary_earner_analyzer.py rename to app/analytics/services/salary_earner_analyzer.py index b32c995..f17d4ce 100644 --- a/salary_analytics/salary_earner_analyzer.py +++ b/app/analytics/services/salary_earner_analyzer.py @@ -6,15 +6,8 @@ import pandas as pd import matplotlib.pyplot as plt from matplotlib_venn import venn3 from datetime import datetime, timedelta -import logging from .config import MODEL_CONFIG, OUTPUT_PATHS - -# Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) -logger = logging.getLogger(__name__) +from app.utils.logger import logger class SalaryEarnerAnalyzer: def __init__(self, df): diff --git a/salary_analytics/salary_predictor.py b/app/analytics/services/salary_predictor.py similarity index 100% rename from salary_analytics/salary_predictor.py rename to app/analytics/services/salary_predictor.py diff --git a/salary_analytics/transaction_type_analyzer.py b/app/analytics/services/transaction_type_analyzer.py similarity index 100% rename from salary_analytics/transaction_type_analyzer.py rename to app/analytics/services/transaction_type_analyzer.py diff --git a/salary_analytics/api.py b/app/api.py similarity index 98% rename from salary_analytics/api.py rename to app/api.py index cb41a30..feb1fde 100644 --- a/salary_analytics/api.py +++ b/app/api.py @@ -17,20 +17,15 @@ from sqlalchemy import text, Table, Column, Integer, String, Float, DateTime, Me import numpy as np import warnings import time -from .main import SalaryAnalyticsPipeline +from .analytics.services.main import SalaryAnalyticsPipeline from .config import OUTPUT_PATHS, TABLE_NAME, BATCH_RESULTS_TABLE from .data_loader import DataLoader from .salary_predictor import SalaryPredictor from .salary_earner_analyzer import SalaryEarnerAnalyzer from .db_operations import DatabaseOperations -from .salary_detect import SalaryDetect +from .analytics.integrations.salary_detect import SalaryDetect +from app.utils.logger import logger -# Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) -logger = logging.getLogger(__name__) # Suppress warnings warnings.filterwarnings('ignore', category=RuntimeWarning, module='numpy') diff --git a/salary_analytics/app/commands.py b/app/commands/commands.py similarity index 94% rename from salary_analytics/app/commands.py rename to app/commands/commands.py index b90b73e..d055a5b 100644 --- a/salary_analytics/app/commands.py +++ b/app/commands/commands.py @@ -2,8 +2,8 @@ import click import pandas as pd from datetime import datetime from flask.cli import with_appcontext -from salary_analytics.app.extensions import db -from salary_analytics.app.models import RawTransaction +from app.extensions import db +from app.models import RawTransaction @click.group() def commands(): diff --git a/salary_analytics/config.py b/app/config.py similarity index 100% rename from salary_analytics/config.py rename to app/config.py diff --git a/salary_analytics/db_operations.py b/app/db_operations.py similarity index 98% rename from salary_analytics/db_operations.py rename to app/db_operations.py index 9cb317e..f3abf4d 100644 --- a/salary_analytics/db_operations.py +++ b/app/db_operations.py @@ -2,12 +2,10 @@ Database operations module for salary analytics. """ -import logging from sqlalchemy import text from .config import BATCH_RESULTS_TABLE from datetime import datetime - -logger = logging.getLogger(__name__) +from app.utils.logger import logger class DatabaseOperations: def __init__(self, engine): diff --git a/salary_analytics/app/extensions.py b/app/extensions.py similarity index 100% rename from salary_analytics/app/extensions.py rename to app/extensions.py diff --git a/salary_analytics/app/models.py b/app/models/raw_transaction.py similarity index 91% rename from salary_analytics/app/models.py rename to app/models/raw_transaction.py index 72615f9..a34b7d8 100644 --- a/salary_analytics/app/models.py +++ b/app/models/raw_transaction.py @@ -1,4 +1,4 @@ -from .extensions import db +from app.extensions import db class RawTransaction(db.Model): __tablename__ = 'analytics_raw_transactions' diff --git a/app/utils/logger.py b/app/utils/logger.py new file mode 100644 index 0000000..4ee0e98 --- /dev/null +++ b/app/utils/logger.py @@ -0,0 +1,13 @@ +import logging + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + handlers=[ + # logging.StreamHandler(), + logging.FileHandler("app.log", mode='a') # Log to file + ] +) + +logger = logging.getLogger("DetectionService") diff --git a/docker-compose.yml b/docker-compose.yml index b5a05e3..2f701b2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,14 +3,13 @@ services: build: . ports: - "${APP_PORT:-4800}:8000" - volumes: - - ./output:/app/output environment: - - DB_USER=salaryloan - - DB_PASSWORD=salaryloan - - DB_NAME=salaryloan - - DB_PORT=10532 - - DB_HOST=dev-data.simbrellang.net + - FLASK_APP=${FLASK_APP} + - FLASK_ENV=${FLASK_ENV} + - DATABASE_URL=postgresql+psycopg2://${DATABASE_USER}:${DATABASE_PASSWORD}@${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_NAME} + volumes: + - .:/app + - ./output:/app/output restart: unless-stopped networks: - salary_network diff --git a/migrations/env.py b/migrations/env.py index 79a3e8d..fa86225 100644 --- a/migrations/env.py +++ b/migrations/env.py @@ -19,7 +19,7 @@ if config.config_file_name is not None: # from myapp import Base # target_metadata = Base.metadata from flask import current_app -from salary_analytics.app.extensions import db +from app.extensions import db config.set_main_option('sqlalchemy.url', current_app.config.get('SQLALCHEMY_DATABASE_URI')) diff --git a/requirements.txt b/requirements.txt index 68fa9fc..4937b16 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,4 +17,6 @@ openpyxl>=3.0.10 Flask>=2.0.0 Flask-SQLAlchemy>=3.0.0 Flask-Migrate>=4.0.0 -alembic>=1.8.0 \ No newline at end of file +alembic>=1.8.0 +requests>=2.26.0 +gunicorn \ No newline at end of file diff --git a/run.py b/run.py deleted file mode 100644 index 77e5bde..0000000 --- a/run.py +++ /dev/null @@ -1,4 +0,0 @@ -import os -from salary_analytics.app import create_app - -app = create_app() \ No newline at end of file diff --git a/salary_analytics/__init__.py b/salary_analytics/__init__.py deleted file mode 100644 index 1825412..0000000 --- a/salary_analytics/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -""" -Salary Analytics Package -A package for analyzing and predicting salary patterns from transaction data. -""" - -__version__ = "0.1.0" \ No newline at end of file diff --git a/wsgi.py b/wsgi.py new file mode 100644 index 0000000..49f70e1 --- /dev/null +++ b/wsgi.py @@ -0,0 +1,7 @@ +from app import create_app + +app = create_app() + +if __name__ != "__main__": + # Expose WSGI app instance for Gunicorn + wsgi_app = app