Compare commits
20 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 77165e2adf | |||
| 46d2754ad9 | |||
| 43f2ed5252 | |||
| f25297611b | |||
| 495ba93a4b | |||
| fdd7959370 | |||
| 02094df42c | |||
| bc071360cc | |||
| ec5f158823 | |||
| 93073012e5 | |||
| 8236462b83 | |||
| 95732a470d | |||
| 239d19fbac | |||
| ebe40cda19 | |||
| 00d89e460f | |||
| 80cc543cdd | |||
| 4e22161088 | |||
| d9b6a7e92e | |||
| 6de9583aaf | |||
| 2cc3d70f4f |
+11
-5
@@ -1,20 +1,26 @@
|
|||||||
FROM python:3.11-slim
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# Set the working directory in the container
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy the current directory contents into the container at /app
|
||||||
|
COPY . /app
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y libpq-dev && rm -rf /var/lib/apt/lists/*
|
RUN apt-get update && apt-get install -y libpq-dev && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN pip install -r requirements.txt
|
RUN pip install -r requirements.txt
|
||||||
|
|
||||||
COPY salary_analytics/ ./salary_analytics/
|
|
||||||
|
|
||||||
RUN mkdir -p output/csv output/plots output/models
|
RUN mkdir -p output/csv output/plots output/models
|
||||||
|
|
||||||
ENV PYTHONPATH=/app
|
|
||||||
ENV HOST=0.0.0.0
|
# ENV FLASK_APP=wsgi.py
|
||||||
ENV PORT=8000
|
|
||||||
|
ENV FLASK_APP=run.py.
|
||||||
|
ENV FLASK_RUN_HOST=0.0.0.0
|
||||||
|
|
||||||
EXPOSE 8000
|
EXPOSE 8000
|
||||||
|
|
||||||
CMD ["uvicorn", "salary_analytics.api:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
|
# CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:8000", "wsgi:wsgi_app"]
|
||||||
|
CMD ["uvicorn", "run:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
|
||||||
@@ -0,0 +1,644 @@
|
|||||||
|
2025-09-10 10:21:11,016 - INFO - Initializing pipeline...
|
||||||
|
2025-09-10 10:21:11,019 - INFO - [2025-09-10 10:21:11] Detecting salary...
|
||||||
|
2025-09-10 10:21:11,019 - INFO - Started autonomous salary detection loop.
|
||||||
|
2025-09-10 10:21:11,030 - INFO - Server running on hostname: 22bad35c69c3
|
||||||
|
2025-09-10 10:21:11,035 - INFO - Server IP address: 172.25.0.2
|
||||||
|
2025-09-10 10:21:11,036 - INFO - Server is accessible at:
|
||||||
|
2025-09-10 10:21:11,037 - INFO - - http://localhost:8000
|
||||||
|
2025-09-10 10:21:11,039 - INFO - - http://127.0.0.1:8000
|
||||||
|
2025-09-10 10:21:11,040 - INFO - - http://172.25.0.2:8000
|
||||||
|
2025-09-10 10:21:11,041 - INFO - Pipeline initialized successfully
|
||||||
|
2025-09-10 10:21:11,532 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 10:21:11,533 - INFO - [2025-09-10 10:21:11] Salary detection complete
|
||||||
|
2025-09-10 10:21:19,451 - INFO - Initializing SalaryAnalyticsPipeline
|
||||||
|
2025-09-10 10:21:19,452 - INFO - Starting data loading process
|
||||||
|
2025-09-10 10:21:19,453 - INFO - No database connection. Attempting to connect...
|
||||||
|
2025-09-10 10:21:19,455 - INFO - Attempting to connect to database...
|
||||||
|
2025-09-10 10:21:27,271 - INFO - Table customer_account_transaction_hx exists with 5354307 rows
|
||||||
|
2025-09-10 10:21:27,488 - INFO - Connected successfully to database!
|
||||||
|
2025-09-10 10:21:27,715 - INFO - Loading data from table: customer_account_transaction_hx
|
||||||
|
2025-09-10 10:21:28,614 - INFO - Total rows to process: 5354307
|
||||||
|
2025-09-10 10:21:28,852 - INFO - Loading chunk starting at offset 0
|
||||||
|
2025-09-10 10:21:40,251 - INFO - Loading chunk starting at offset 10000
|
||||||
|
2025-09-10 10:21:46,981 - INFO - Loading chunk starting at offset 20000
|
||||||
|
2025-09-10 10:21:57,010 - INFO - Loading chunk starting at offset 30000
|
||||||
|
2025-09-10 10:22:04,792 - INFO - Loading chunk starting at offset 40000
|
||||||
|
2025-09-10 10:22:09,599 - INFO - Loading chunk starting at offset 50000
|
||||||
|
2025-09-10 10:22:15,016 - INFO - Loading chunk starting at offset 60000
|
||||||
|
2025-09-10 10:22:18,613 - INFO - Loading chunk starting at offset 70000
|
||||||
|
2025-09-10 10:22:22,007 - INFO - Loading chunk starting at offset 80000
|
||||||
|
2025-09-10 10:22:28,397 - INFO - Loading chunk starting at offset 90000
|
||||||
|
2025-09-10 10:22:36,081 - INFO - Loading chunk starting at offset 100000
|
||||||
|
2025-09-10 10:22:48,738 - INFO - Loading chunk starting at offset 110000
|
||||||
|
2025-09-10 10:23:01,521 - INFO - Loading chunk starting at offset 120000
|
||||||
|
2025-09-10 10:23:11,532 - INFO - [2025-09-10 10:23:11] Detecting salary...
|
||||||
|
2025-09-10 10:23:13,504 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 10:23:13,504 - INFO - [2025-09-10 10:23:13] Salary detection complete
|
||||||
|
2025-09-10 10:23:16,348 - INFO - Loading chunk starting at offset 130000
|
||||||
|
2025-09-10 10:23:23,259 - INFO - Loading chunk starting at offset 140000
|
||||||
|
2025-09-10 10:23:29,284 - INFO - Loading chunk starting at offset 150000
|
||||||
|
2025-09-10 10:23:41,579 - INFO - Loading chunk starting at offset 160000
|
||||||
|
2025-09-10 10:23:54,788 - INFO - Loading chunk starting at offset 170000
|
||||||
|
2025-09-10 10:24:19,519 - INFO - Loading chunk starting at offset 180000
|
||||||
|
2025-09-10 10:24:31,657 - INFO - Loading chunk starting at offset 190000
|
||||||
|
2025-09-10 10:24:46,130 - INFO - Loading chunk starting at offset 200000
|
||||||
|
2025-09-10 10:24:57,289 - INFO - Loading chunk starting at offset 210000
|
||||||
|
2025-09-10 10:25:07,964 - INFO - Loading chunk starting at offset 220000
|
||||||
|
2025-09-10 10:25:13,503 - INFO - [2025-09-10 10:25:13] Detecting salary...
|
||||||
|
2025-09-10 10:25:15,477 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 10:25:15,478 - INFO - [2025-09-10 10:25:15] Salary detection complete
|
||||||
|
2025-09-10 10:25:17,793 - INFO - Loading chunk starting at offset 230000
|
||||||
|
2025-09-10 10:25:25,026 - INFO - Loading chunk starting at offset 240000
|
||||||
|
2025-09-10 10:25:32,079 - INFO - Loading chunk starting at offset 250000
|
||||||
|
2025-09-10 10:25:39,990 - INFO - Loading chunk starting at offset 260000
|
||||||
|
2025-09-10 10:25:50,492 - INFO - Loading chunk starting at offset 270000
|
||||||
|
2025-09-10 10:26:00,181 - INFO - Loading chunk starting at offset 280000
|
||||||
|
2025-09-10 10:26:10,138 - INFO - Loading chunk starting at offset 290000
|
||||||
|
2025-09-10 10:26:20,437 - INFO - Loading chunk starting at offset 300000
|
||||||
|
2025-09-10 10:26:34,962 - INFO - Loading chunk starting at offset 310000
|
||||||
|
2025-09-10 10:26:46,248 - INFO - Loading chunk starting at offset 320000
|
||||||
|
2025-09-10 10:26:55,275 - INFO - Loading chunk starting at offset 330000
|
||||||
|
2025-09-10 10:27:09,733 - INFO - Loading chunk starting at offset 340000
|
||||||
|
2025-09-10 10:27:15,480 - INFO - [2025-09-10 10:27:15] Detecting salary...
|
||||||
|
2025-09-10 10:27:16,553 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 10:27:16,553 - INFO - [2025-09-10 10:27:16] Salary detection complete
|
||||||
|
2025-09-10 10:27:26,568 - INFO - Loading chunk starting at offset 350000
|
||||||
|
2025-09-10 10:27:36,472 - INFO - Loading chunk starting at offset 360000
|
||||||
|
2025-09-10 10:27:44,909 - INFO - Loading chunk starting at offset 370000
|
||||||
|
2025-09-10 10:27:54,557 - INFO - Loading chunk starting at offset 380000
|
||||||
|
2025-09-10 10:28:00,588 - INFO - Loading chunk starting at offset 390000
|
||||||
|
2025-09-10 10:28:05,957 - INFO - Loading chunk starting at offset 400000
|
||||||
|
2025-09-10 10:28:12,058 - INFO - Loading chunk starting at offset 410000
|
||||||
|
2025-09-10 10:28:19,248 - INFO - Loading chunk starting at offset 420000
|
||||||
|
2025-09-10 10:28:29,938 - INFO - Loading chunk starting at offset 430000
|
||||||
|
2025-09-10 10:28:51,274 - INFO - Loading chunk starting at offset 440000
|
||||||
|
2025-09-10 10:28:57,720 - INFO - Loading chunk starting at offset 450000
|
||||||
|
2025-09-10 10:29:02,117 - INFO - Loading chunk starting at offset 460000
|
||||||
|
2025-09-10 10:29:11,555 - INFO - Loading chunk starting at offset 470000
|
||||||
|
2025-09-10 10:29:16,565 - INFO - [2025-09-10 10:29:16] Detecting salary...
|
||||||
|
2025-09-10 10:29:17,452 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 10:29:17,455 - INFO - [2025-09-10 10:29:17] Salary detection complete
|
||||||
|
2025-09-10 10:29:30,145 - INFO - Loading chunk starting at offset 480000
|
||||||
|
2025-09-10 10:29:39,048 - INFO - Loading chunk starting at offset 490000
|
||||||
|
2025-09-10 10:29:43,412 - INFO - Loading chunk starting at offset 500000
|
||||||
|
2025-09-10 10:29:47,415 - INFO - Loading chunk starting at offset 510000
|
||||||
|
2025-09-10 10:29:52,149 - INFO - Loading chunk starting at offset 520000
|
||||||
|
2025-09-10 10:29:58,331 - INFO - Loading chunk starting at offset 530000
|
||||||
|
2025-09-10 10:30:04,933 - INFO - Loading chunk starting at offset 540000
|
||||||
|
2025-09-10 10:30:11,934 - INFO - Loading chunk starting at offset 550000
|
||||||
|
2025-09-10 10:30:19,315 - INFO - Loading chunk starting at offset 560000
|
||||||
|
2025-09-10 10:30:25,683 - INFO - Loading chunk starting at offset 570000
|
||||||
|
2025-09-10 10:30:33,577 - INFO - Loading chunk starting at offset 580000
|
||||||
|
2025-09-10 10:30:40,363 - INFO - Loading chunk starting at offset 590000
|
||||||
|
2025-09-10 10:30:46,205 - INFO - Loading chunk starting at offset 600000
|
||||||
|
2025-09-10 10:30:53,957 - INFO - Loading chunk starting at offset 610000
|
||||||
|
2025-09-10 10:31:08,083 - INFO - Loading chunk starting at offset 620000
|
||||||
|
2025-09-10 10:31:17,454 - INFO - [2025-09-10 10:31:17] Detecting salary...
|
||||||
|
2025-09-10 10:31:17,989 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 10:31:17,989 - INFO - [2025-09-10 10:31:17] Salary detection complete
|
||||||
|
2025-09-10 10:31:18,901 - INFO - Loading chunk starting at offset 630000
|
||||||
|
2025-09-10 10:31:27,551 - INFO - Loading chunk starting at offset 640000
|
||||||
|
2025-09-10 10:31:33,662 - INFO - Loading chunk starting at offset 650000
|
||||||
|
2025-09-10 10:31:40,107 - INFO - Loading chunk starting at offset 660000
|
||||||
|
2025-09-10 10:31:49,938 - INFO - Loading chunk starting at offset 670000
|
||||||
|
2025-09-10 10:31:57,777 - INFO - Loading chunk starting at offset 680000
|
||||||
|
2025-09-10 10:32:06,629 - INFO - Loading chunk starting at offset 690000
|
||||||
|
2025-09-10 10:32:15,281 - INFO - Loading chunk starting at offset 700000
|
||||||
|
2025-09-10 10:32:25,328 - INFO - Loading chunk starting at offset 710000
|
||||||
|
2025-09-10 10:32:40,639 - INFO - Loading chunk starting at offset 720000
|
||||||
|
2025-09-10 10:32:51,078 - INFO - Loading chunk starting at offset 730000
|
||||||
|
2025-09-10 10:33:03,924 - INFO - Loading chunk starting at offset 740000
|
||||||
|
2025-09-10 10:33:13,007 - INFO - Loading chunk starting at offset 750000
|
||||||
|
2025-09-10 10:33:17,989 - INFO - [2025-09-10 10:33:17] Detecting salary...
|
||||||
|
2025-09-10 10:33:19,698 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 10:33:20,039 - INFO - [2025-09-10 10:33:20] Salary detection complete
|
||||||
|
2025-09-10 10:33:20,857 - INFO - Loading chunk starting at offset 760000
|
||||||
|
2025-09-10 10:33:30,803 - INFO - Loading chunk starting at offset 770000
|
||||||
|
2025-09-10 10:33:38,162 - INFO - Loading chunk starting at offset 780000
|
||||||
|
2025-09-10 10:33:44,504 - INFO - Loading chunk starting at offset 790000
|
||||||
|
2025-09-10 10:33:50,063 - INFO - Loading chunk starting at offset 800000
|
||||||
|
2025-09-10 10:33:57,957 - INFO - Loading chunk starting at offset 810000
|
||||||
|
2025-09-10 10:34:05,256 - INFO - Loading chunk starting at offset 820000
|
||||||
|
2025-09-10 10:34:12,212 - INFO - Loading chunk starting at offset 830000
|
||||||
|
2025-09-10 10:34:20,478 - INFO - Loading chunk starting at offset 840000
|
||||||
|
2025-09-10 10:34:28,018 - INFO - Loading chunk starting at offset 850000
|
||||||
|
2025-09-10 10:34:36,413 - INFO - Loading chunk starting at offset 860000
|
||||||
|
2025-09-10 10:34:50,168 - INFO - Loading chunk starting at offset 870000
|
||||||
|
2025-09-10 10:35:13,867 - INFO - Loading chunk starting at offset 880000
|
||||||
|
2025-09-10 10:35:20,391 - INFO - [2025-09-10 10:35:20] Detecting salary...
|
||||||
|
2025-09-10 10:35:21,791 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 10:35:21,794 - INFO - [2025-09-10 10:35:21] Salary detection complete
|
||||||
|
2025-09-10 10:35:28,918 - INFO - Loading chunk starting at offset 890000
|
||||||
|
2025-09-10 10:35:40,300 - INFO - Loading chunk starting at offset 900000
|
||||||
|
2025-09-10 10:36:05,763 - INFO - Loading chunk starting at offset 910000
|
||||||
|
2025-09-10 10:36:15,199 - INFO - Loading chunk starting at offset 920000
|
||||||
|
2025-09-10 10:36:46,036 - INFO - Loading chunk starting at offset 930000
|
||||||
|
2025-09-10 10:36:53,221 - INFO - Loading chunk starting at offset 940000
|
||||||
|
2025-09-10 10:37:06,754 - INFO - Loading chunk starting at offset 950000
|
||||||
|
2025-09-10 10:37:15,558 - INFO - Loading chunk starting at offset 960000
|
||||||
|
2025-09-10 10:37:21,798 - INFO - [2025-09-10 10:37:21] Detecting salary...
|
||||||
|
2025-09-10 10:37:22,954 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 10:37:22,955 - INFO - [2025-09-10 10:37:22] Salary detection complete
|
||||||
|
2025-09-10 10:37:29,088 - INFO - Loading chunk starting at offset 970000
|
||||||
|
2025-09-10 10:37:54,730 - INFO - Loading chunk starting at offset 980000
|
||||||
|
2025-09-10 10:38:13,667 - INFO - Loading chunk starting at offset 990000
|
||||||
|
2025-09-10 10:38:27,880 - INFO - Loading chunk starting at offset 1000000
|
||||||
|
2025-09-10 10:39:07,546 - INFO - Loading chunk starting at offset 1010000
|
||||||
|
2025-09-10 10:39:17,369 - INFO - Loading chunk starting at offset 1020000
|
||||||
|
2025-09-10 10:39:23,176 - INFO - [2025-09-10 10:39:23] Detecting salary...
|
||||||
|
2025-09-10 10:39:24,444 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 10:39:24,445 - INFO - [2025-09-10 10:39:24] Salary detection complete
|
||||||
|
2025-09-10 10:39:26,048 - INFO - Loading chunk starting at offset 1030000
|
||||||
|
2025-09-10 10:39:45,494 - INFO - Loading chunk starting at offset 1040000
|
||||||
|
2025-09-10 10:39:50,480 - INFO - Loading chunk starting at offset 1050000
|
||||||
|
2025-09-10 10:39:54,793 - INFO - Loading chunk starting at offset 1060000
|
||||||
|
2025-09-10 10:39:59,415 - INFO - Loading chunk starting at offset 1070000
|
||||||
|
2025-09-10 10:40:04,306 - INFO - Loading chunk starting at offset 1080000
|
||||||
|
2025-09-10 10:40:10,517 - INFO - Loading chunk starting at offset 1090000
|
||||||
|
2025-09-10 10:40:15,671 - INFO - Loading chunk starting at offset 1100000
|
||||||
|
2025-09-10 10:40:21,662 - INFO - Loading chunk starting at offset 1110000
|
||||||
|
2025-09-10 10:40:30,551 - INFO - Loading chunk starting at offset 1120000
|
||||||
|
2025-09-10 10:40:47,391 - INFO - Loading chunk starting at offset 1130000
|
||||||
|
2025-09-10 10:40:55,609 - INFO - Loading chunk starting at offset 1140000
|
||||||
|
2025-09-10 10:41:04,496 - INFO - Loading chunk starting at offset 1150000
|
||||||
|
2025-09-10 10:41:14,608 - INFO - Loading chunk starting at offset 1160000
|
||||||
|
2025-09-10 10:41:24,447 - INFO - [2025-09-10 10:41:24] Detecting salary...
|
||||||
|
2025-09-10 10:41:28,537 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 10:41:28,548 - INFO - [2025-09-10 10:41:28] Salary detection complete
|
||||||
|
2025-09-10 10:42:00,867 - INFO - Loading chunk starting at offset 1170000
|
||||||
|
2025-09-10 10:42:13,069 - INFO - Loading chunk starting at offset 1180000
|
||||||
|
2025-09-10 10:42:21,593 - INFO - Loading chunk starting at offset 1190000
|
||||||
|
2025-09-10 10:42:32,011 - INFO - Loading chunk starting at offset 1200000
|
||||||
|
2025-09-10 10:42:37,982 - INFO - Loading chunk starting at offset 1210000
|
||||||
|
2025-09-10 10:42:45,458 - INFO - Loading chunk starting at offset 1220000
|
||||||
|
2025-09-10 10:42:54,545 - INFO - Loading chunk starting at offset 1230000
|
||||||
|
2025-09-10 10:43:15,705 - INFO - Loading chunk starting at offset 1240000
|
||||||
|
2025-09-10 10:43:28,549 - INFO - [2025-09-10 10:43:28] Detecting salary...
|
||||||
|
2025-09-10 10:43:31,640 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 10:43:31,641 - INFO - [2025-09-10 10:43:31] Salary detection complete
|
||||||
|
2025-09-10 10:43:43,344 - INFO - Loading chunk starting at offset 1250000
|
||||||
|
2025-09-10 10:43:50,004 - INFO - Loading chunk starting at offset 1260000
|
||||||
|
2025-09-10 10:43:59,430 - INFO - Loading chunk starting at offset 1270000
|
||||||
|
2025-09-10 10:44:07,478 - INFO - Loading chunk starting at offset 1280000
|
||||||
|
2025-09-10 10:44:18,927 - INFO - Loading chunk starting at offset 1290000
|
||||||
|
2025-09-10 10:44:28,523 - INFO - Loading chunk starting at offset 1300000
|
||||||
|
2025-09-10 10:44:38,629 - INFO - Loading chunk starting at offset 1310000
|
||||||
|
2025-09-10 10:44:56,977 - INFO - Loading chunk starting at offset 1320000
|
||||||
|
2025-09-10 10:45:09,203 - INFO - Loading chunk starting at offset 1330000
|
||||||
|
2025-09-10 10:45:18,657 - INFO - Loading chunk starting at offset 1340000
|
||||||
|
2025-09-10 10:45:27,201 - INFO - Loading chunk starting at offset 1350000
|
||||||
|
2025-09-10 10:45:31,641 - INFO - [2025-09-10 10:45:31] Detecting salary...
|
||||||
|
2025-09-10 10:45:32,261 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 10:45:32,262 - INFO - [2025-09-10 10:45:32] Salary detection complete
|
||||||
|
2025-09-10 10:45:35,308 - INFO - Loading chunk starting at offset 1360000
|
||||||
|
2025-09-10 10:45:44,783 - INFO - Loading chunk starting at offset 1370000
|
||||||
|
2025-09-10 10:45:52,048 - INFO - Loading chunk starting at offset 1380000
|
||||||
|
2025-09-10 10:45:58,491 - INFO - Loading chunk starting at offset 1390000
|
||||||
|
2025-09-10 10:46:13,270 - INFO - Loading chunk starting at offset 1400000
|
||||||
|
2025-09-10 10:46:25,221 - INFO - Loading chunk starting at offset 1410000
|
||||||
|
2025-09-10 10:46:34,358 - INFO - Loading chunk starting at offset 1420000
|
||||||
|
2025-09-10 10:46:38,777 - INFO - Loading chunk starting at offset 1430000
|
||||||
|
2025-09-10 10:46:42,534 - INFO - Loading chunk starting at offset 1440000
|
||||||
|
2025-09-10 10:46:46,147 - INFO - Loading chunk starting at offset 1450000
|
||||||
|
2025-09-10 10:46:50,727 - INFO - Loading chunk starting at offset 1460000
|
||||||
|
2025-09-10 10:46:55,939 - INFO - Loading chunk starting at offset 1470000
|
||||||
|
2025-09-10 10:47:01,696 - INFO - Loading chunk starting at offset 1480000
|
||||||
|
2025-09-10 10:47:07,411 - INFO - Loading chunk starting at offset 1490000
|
||||||
|
2025-09-10 10:47:13,449 - INFO - Loading chunk starting at offset 1500000
|
||||||
|
2025-09-10 10:47:23,058 - INFO - Loading chunk starting at offset 1510000
|
||||||
|
2025-09-10 10:47:30,097 - INFO - Loading chunk starting at offset 1520000
|
||||||
|
2025-09-10 10:47:32,259 - INFO - [2025-09-10 10:47:32] Detecting salary...
|
||||||
|
2025-09-10 10:47:32,923 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 10:47:32,924 - INFO - [2025-09-10 10:47:32] Salary detection complete
|
||||||
|
2025-09-10 10:47:36,423 - INFO - Loading chunk starting at offset 1530000
|
||||||
|
2025-09-10 10:47:45,086 - INFO - Loading chunk starting at offset 1540000
|
||||||
|
2025-09-10 10:47:57,079 - INFO - Loading chunk starting at offset 1550000
|
||||||
|
2025-09-10 10:48:19,741 - INFO - Loading chunk starting at offset 1560000
|
||||||
|
2025-09-10 10:48:41,300 - INFO - Loading chunk starting at offset 1570000
|
||||||
|
2025-09-10 10:48:51,349 - INFO - Loading chunk starting at offset 1580000
|
||||||
|
2025-09-10 10:48:58,892 - INFO - Loading chunk starting at offset 1590000
|
||||||
|
2025-09-10 10:49:04,857 - INFO - Loading chunk starting at offset 1600000
|
||||||
|
2025-09-10 10:49:10,299 - INFO - Loading chunk starting at offset 1610000
|
||||||
|
2025-09-10 10:49:16,650 - INFO - Loading chunk starting at offset 1620000
|
||||||
|
2025-09-10 10:49:23,107 - INFO - Loading chunk starting at offset 1630000
|
||||||
|
2025-09-10 10:49:32,320 - INFO - Loading chunk starting at offset 1640000
|
||||||
|
2025-09-10 10:49:32,927 - INFO - [2025-09-10 10:49:32] Detecting salary...
|
||||||
|
2025-09-10 10:49:33,484 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 10:49:33,485 - INFO - [2025-09-10 10:49:33] Salary detection complete
|
||||||
|
2025-09-10 10:49:40,707 - INFO - Loading chunk starting at offset 1650000
|
||||||
|
2025-09-10 10:49:50,677 - INFO - Loading chunk starting at offset 1660000
|
||||||
|
2025-09-10 10:49:59,664 - INFO - Loading chunk starting at offset 1670000
|
||||||
|
2025-09-10 10:50:07,452 - INFO - Loading chunk starting at offset 1680000
|
||||||
|
2025-09-10 10:50:13,583 - INFO - Loading chunk starting at offset 1690000
|
||||||
|
2025-09-10 10:50:20,858 - INFO - Loading chunk starting at offset 1700000
|
||||||
|
2025-09-10 10:50:27,519 - INFO - Loading chunk starting at offset 1710000
|
||||||
|
2025-09-10 10:50:34,012 - INFO - Loading chunk starting at offset 1720000
|
||||||
|
2025-09-10 10:50:39,628 - INFO - Loading chunk starting at offset 1730000
|
||||||
|
2025-09-10 10:50:50,012 - INFO - Loading chunk starting at offset 1740000
|
||||||
|
2025-09-10 10:51:06,647 - INFO - Loading chunk starting at offset 1750000
|
||||||
|
2025-09-10 10:51:17,401 - INFO - Loading chunk starting at offset 1760000
|
||||||
|
2025-09-10 10:51:26,360 - INFO - Loading chunk starting at offset 1770000
|
||||||
|
2025-09-10 10:51:33,485 - INFO - [2025-09-10 10:51:33] Detecting salary...
|
||||||
|
2025-09-10 10:51:55,392 - INFO - Initializing pipeline...
|
||||||
|
2025-09-10 10:51:55,395 - INFO - [2025-09-10 10:51:55] Detecting salary...
|
||||||
|
2025-09-10 10:51:55,395 - INFO - Started autonomous salary detection loop.
|
||||||
|
2025-09-10 10:51:55,409 - INFO - Server running on hostname: 22bad35c69c3
|
||||||
|
2025-09-10 10:51:55,416 - INFO - Server IP address: 172.25.0.2
|
||||||
|
2025-09-10 10:51:55,424 - INFO - Server is accessible at:
|
||||||
|
2025-09-10 10:51:55,426 - INFO - - http://localhost:8000
|
||||||
|
2025-09-10 10:51:55,444 - INFO - - http://127.0.0.1:8000
|
||||||
|
2025-09-10 10:51:55,455 - INFO - - http://172.25.0.2:8000
|
||||||
|
2025-09-10 10:51:55,456 - INFO - Pipeline initialized successfully
|
||||||
|
2025-09-10 10:51:56,024 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 10:51:56,024 - INFO - [2025-09-10 10:51:56] Salary detection complete
|
||||||
|
2025-09-10 10:52:36,450 - INFO - Initializing SalaryAnalyticsPipeline
|
||||||
|
2025-09-10 10:52:36,451 - INFO - Starting data loading process
|
||||||
|
2025-09-10 10:52:36,452 - INFO - No database connection. Attempting to connect...
|
||||||
|
2025-09-10 10:52:36,452 - INFO - Attempting to connect to database...
|
||||||
|
2025-09-10 10:52:36,538 - ERROR - Error connecting to database: No module named 'oracledb'
|
||||||
|
2025-09-10 10:52:36,539 - ERROR - Failed to establish database connection
|
||||||
|
2025-09-10 10:52:36,539 - ERROR - Failed to load data
|
||||||
|
2025-09-10 10:52:36,540 - ERROR - Failed to load data
|
||||||
|
2025-09-10 10:52:36,541 - INFO - Load data endpoint failed after 0.09 seconds
|
||||||
|
2025-09-10 10:52:36,541 - ERROR - Error loading data: 500: Failed to load data
|
||||||
|
2025-09-10 10:52:36,542 - INFO - Load data endpoint failed after 0.09 seconds
|
||||||
|
2025-09-10 10:52:58,149 - INFO - Shutting down Salary Analytics API...
|
||||||
|
2025-09-10 10:53:10,697 - INFO - generated new fontManager
|
||||||
|
2025-09-10 10:53:16,039 - INFO - Initializing pipeline...
|
||||||
|
2025-09-10 10:53:16,041 - INFO - [2025-09-10 10:53:16] Detecting salary...
|
||||||
|
2025-09-10 10:53:16,042 - INFO - Started autonomous salary detection loop.
|
||||||
|
2025-09-10 10:53:16,055 - INFO - Server running on hostname: 1c5d2376fb2a
|
||||||
|
2025-09-10 10:53:16,056 - INFO - Server IP address: 172.25.0.2
|
||||||
|
2025-09-10 10:53:16,057 - INFO - Server is accessible at:
|
||||||
|
2025-09-10 10:53:16,058 - INFO - - http://localhost:8000
|
||||||
|
2025-09-10 10:53:16,059 - INFO - - http://127.0.0.1:8000
|
||||||
|
2025-09-10 10:53:16,060 - INFO - - http://172.25.0.2:8000
|
||||||
|
2025-09-10 10:53:16,062 - INFO - Pipeline initialized successfully
|
||||||
|
2025-09-10 10:53:16,812 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 10:53:16,813 - INFO - [2025-09-10 10:53:16] Salary detection complete
|
||||||
|
2025-09-10 10:53:25,477 - INFO - Initializing SalaryAnalyticsPipeline
|
||||||
|
2025-09-10 10:53:25,482 - INFO - Starting data loading process
|
||||||
|
2025-09-10 10:53:25,483 - INFO - No database connection. Attempting to connect...
|
||||||
|
2025-09-10 10:53:25,484 - INFO - Attempting to connect to database...
|
||||||
|
2025-09-10 10:53:25,679 - ERROR - Error connecting to database: No module named 'oracledb'
|
||||||
|
2025-09-10 10:53:25,679 - ERROR - Failed to establish database connection
|
||||||
|
2025-09-10 10:53:25,680 - ERROR - Failed to load data
|
||||||
|
2025-09-10 10:53:25,680 - ERROR - Failed to load data
|
||||||
|
2025-09-10 10:53:25,681 - INFO - Load data endpoint failed after 0.20 seconds
|
||||||
|
2025-09-10 10:53:25,682 - ERROR - Error loading data: 500: Failed to load data
|
||||||
|
2025-09-10 10:53:25,682 - INFO - Load data endpoint failed after 0.20 seconds
|
||||||
|
2025-09-10 10:53:46,324 - INFO - Shutting down Salary Analytics API...
|
||||||
|
2025-09-10 11:07:49,841 - INFO - generated new fontManager
|
||||||
|
2025-09-10 11:07:59,771 - INFO - Initializing pipeline...
|
||||||
|
2025-09-10 11:07:59,774 - INFO - [2025-09-10 11:07:59] Detecting salary...
|
||||||
|
2025-09-10 11:07:59,774 - INFO - Started autonomous salary detection loop.
|
||||||
|
2025-09-10 11:07:59,893 - INFO - Server running on hostname: 0b21809edf52
|
||||||
|
2025-09-10 11:07:59,894 - INFO - Server IP address: 172.25.0.2
|
||||||
|
2025-09-10 11:07:59,895 - INFO - Server is accessible at:
|
||||||
|
2025-09-10 11:07:59,915 - INFO - - http://localhost:8000
|
||||||
|
2025-09-10 11:07:59,917 - INFO - - http://127.0.0.1:8000
|
||||||
|
2025-09-10 11:07:59,918 - INFO - - http://172.25.0.2:8000
|
||||||
|
2025-09-10 11:07:59,919 - INFO - Pipeline initialized successfully
|
||||||
|
2025-09-10 11:08:01,268 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 11:08:01,269 - INFO - [2025-09-10 11:08:01] Salary detection complete
|
||||||
|
2025-09-10 11:08:15,668 - INFO - Initializing SalaryAnalyticsPipeline
|
||||||
|
2025-09-10 11:08:15,669 - INFO - Starting data loading process
|
||||||
|
2025-09-10 11:08:15,671 - INFO - No database connection. Attempting to connect...
|
||||||
|
2025-09-10 11:08:15,672 - INFO - Attempting to connect to database...
|
||||||
|
2025-09-10 11:08:20,076 - ERROR - Error connecting to database: (oracledb.exceptions.DatabaseError) ORA-00936: missing expression
|
||||||
|
Help: https://docs.oracle.com/error-help/db/ora-00936/
|
||||||
|
[SQL: SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = 'customer_account_transaction_hx')]
|
||||||
|
(Background on this error at: https://sqlalche.me/e/20/4xp6)
|
||||||
|
2025-09-10 11:08:20,086 - ERROR - Failed to establish database connection
|
||||||
|
2025-09-10 11:08:20,087 - ERROR - Failed to load data
|
||||||
|
2025-09-10 11:08:20,092 - ERROR - Failed to load data
|
||||||
|
2025-09-10 11:08:20,119 - INFO - Load data endpoint failed after 4.45 seconds
|
||||||
|
2025-09-10 11:08:20,123 - ERROR - Error loading data: 500: Failed to load data
|
||||||
|
2025-09-10 11:08:20,124 - INFO - Load data endpoint failed after 4.46 seconds
|
||||||
|
2025-09-10 11:10:01,280 - INFO - [2025-09-10 11:10:01] Detecting salary...
|
||||||
|
2025-09-10 11:10:02,367 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 11:10:02,368 - INFO - [2025-09-10 11:10:02] Salary detection complete
|
||||||
|
2025-09-10 11:12:02,395 - INFO - [2025-09-10 11:12:02] Detecting salary...
|
||||||
|
2025-09-10 11:12:03,234 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 11:12:03,256 - INFO - [2025-09-10 11:12:03] Salary detection complete
|
||||||
|
2025-09-10 11:14:03,279 - INFO - [2025-09-10 11:14:03] Detecting salary...
|
||||||
|
2025-09-10 11:14:04,266 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 11:14:04,267 - INFO - [2025-09-10 11:14:04] Salary detection complete
|
||||||
|
2025-09-10 11:16:04,268 - INFO - [2025-09-10 11:16:04] Detecting salary...
|
||||||
|
2025-09-10 11:16:05,133 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 11:16:05,134 - INFO - [2025-09-10 11:16:05] Salary detection complete
|
||||||
|
2025-09-10 11:18:05,134 - INFO - [2025-09-10 11:18:05] Detecting salary...
|
||||||
|
2025-09-10 11:18:05,955 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 11:18:05,956 - INFO - [2025-09-10 11:18:05] Salary detection complete
|
||||||
|
2025-09-10 11:20:05,855 - INFO - [2025-09-10 11:20:05] Detecting salary...
|
||||||
|
2025-09-10 11:20:06,453 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 11:20:06,463 - INFO - [2025-09-10 11:20:06] Salary detection complete
|
||||||
|
2025-09-10 11:22:06,512 - INFO - [2025-09-10 11:22:06] Detecting salary...
|
||||||
|
2025-09-10 11:22:07,087 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 11:22:07,088 - INFO - [2025-09-10 11:22:07] Salary detection complete
|
||||||
|
2025-09-10 11:24:07,092 - INFO - [2025-09-10 11:24:07] Detecting salary...
|
||||||
|
2025-09-10 11:24:08,235 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 11:24:08,236 - INFO - [2025-09-10 11:24:08] Salary detection complete
|
||||||
|
2025-09-10 11:25:13,551 - INFO - Shutting down Salary Analytics API...
|
||||||
|
2025-09-10 11:25:52,852 - INFO - Initializing pipeline...
|
||||||
|
2025-09-10 11:25:52,857 - INFO - [2025-09-10 11:25:52] Detecting salary...
|
||||||
|
2025-09-10 11:25:52,857 - INFO - Started autonomous salary detection loop.
|
||||||
|
2025-09-10 11:25:52,866 - INFO - Server running on hostname: 0b21809edf52
|
||||||
|
2025-09-10 11:25:52,876 - INFO - Server IP address: 172.25.0.2
|
||||||
|
2025-09-10 11:25:52,881 - INFO - Server is accessible at:
|
||||||
|
2025-09-10 11:25:52,882 - INFO - - http://localhost:8000
|
||||||
|
2025-09-10 11:25:52,883 - INFO - - http://127.0.0.1:8000
|
||||||
|
2025-09-10 11:25:52,884 - INFO - - http://172.25.0.2:8000
|
||||||
|
2025-09-10 11:25:52,884 - INFO - Pipeline initialized successfully
|
||||||
|
2025-09-10 11:25:53,580 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 11:25:53,581 - INFO - [2025-09-10 11:25:53] Salary detection complete
|
||||||
|
2025-09-10 11:27:53,578 - INFO - [2025-09-10 11:27:53] Detecting salary...
|
||||||
|
2025-09-10 11:27:54,235 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 11:27:54,235 - INFO - [2025-09-10 11:27:54] Salary detection complete
|
||||||
|
2025-09-10 11:29:54,235 - INFO - [2025-09-10 11:29:54] Detecting salary...
|
||||||
|
2025-09-10 11:29:54,827 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-10 11:29:54,827 - INFO - [2025-09-10 11:29:54] Salary detection complete
|
||||||
|
2025-09-10 11:30:20,979 - INFO - Shutting down Salary Analytics API...
|
||||||
|
2025-09-10 11:31:18,351 - INFO - Initializing pipeline...
|
||||||
|
2025-09-10 11:31:18,375 - INFO - [2025-09-10 11:31:18] Detecting salary...
|
||||||
|
2025-09-10 11:31:18,376 - INFO - Started autonomous salary detection loop.
|
||||||
|
2025-09-10 11:31:18,405 - INFO - Server running on hostname: 0b21809edf52
|
||||||
|
2025-09-10 11:31:18,447 - INFO - Server IP address: 172.25.0.2
|
||||||
|
2025-09-10 11:31:18,528 - INFO - Server is accessible at:
|
||||||
|
2025-09-10 11:31:18,552 - INFO - - http://localhost:8000
|
||||||
|
2025-09-10 11:31:18,552 - INFO - - http://127.0.0.1:8000
|
||||||
|
2025-09-10 11:31:18,553 - INFO - - http://172.25.0.2:8000
|
||||||
|
2025-09-10 11:31:18,554 - INFO - Pipeline initialized successfully
|
||||||
|
2025-09-19 11:27:20,722 - INFO - Initializing pipeline...
|
||||||
|
2025-09-19 11:27:20,724 - INFO - [2025-09-19 11:27:20] Detecting salary...
|
||||||
|
2025-09-19 11:27:20,724 - INFO - Started autonomous salary detection loop.
|
||||||
|
2025-09-19 11:27:20,750 - INFO - Server running on hostname: 0b21809edf52
|
||||||
|
2025-09-19 11:27:20,752 - INFO - Server IP address: 172.25.0.2
|
||||||
|
2025-09-19 11:27:20,753 - INFO - Server is accessible at:
|
||||||
|
2025-09-19 11:27:20,759 - INFO - - http://localhost:8000
|
||||||
|
2025-09-19 11:27:20,761 - INFO - - http://127.0.0.1:8000
|
||||||
|
2025-09-19 11:27:20,766 - INFO - - http://172.25.0.2:8000
|
||||||
|
2025-09-19 11:27:20,771 - INFO - Pipeline initialized successfully
|
||||||
|
2025-09-19 11:27:24,622 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-19 11:27:24,628 - INFO - [2025-09-19 11:27:24] Salary detection complete
|
||||||
|
2025-09-19 11:29:13,095 - INFO - Shutting down Salary Analytics API...
|
||||||
|
2025-09-24 09:47:38,012 - INFO - Initializing pipeline...
|
||||||
|
2025-09-24 09:47:38,017 - INFO - [2025-09-24 09:47:38] Detecting salary...
|
||||||
|
2025-09-24 09:47:38,024 - INFO - Started autonomous salary detection loop.
|
||||||
|
2025-09-24 09:47:38,077 - INFO - Server running on hostname: 0b21809edf52
|
||||||
|
2025-09-24 09:47:38,079 - INFO - Server IP address: 172.25.0.2
|
||||||
|
2025-09-24 09:47:38,080 - INFO - Server is accessible at:
|
||||||
|
2025-09-24 09:47:38,081 - INFO - - http://localhost:8000
|
||||||
|
2025-09-24 09:47:38,082 - INFO - - http://127.0.0.1:8000
|
||||||
|
2025-09-24 09:47:38,083 - INFO - - http://172.25.0.2:8000
|
||||||
|
2025-09-24 09:47:38,084 - INFO - Pipeline initialized successfully
|
||||||
|
2025-09-24 09:47:41,272 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-24 09:47:41,273 - INFO - [2025-09-24 09:47:41] Salary detection complete
|
||||||
|
2025-09-24 09:49:41,290 - INFO - [2025-09-24 09:49:41] Detecting salary...
|
||||||
|
2025-09-24 09:49:43,791 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-24 09:49:43,791 - INFO - [2025-09-24 09:49:43] Salary detection complete
|
||||||
|
2025-09-24 09:50:21,300 - INFO - Shutting down Salary Analytics API...
|
||||||
|
2025-09-24 09:59:27,300 - INFO - generated new fontManager
|
||||||
|
2025-09-24 09:59:30,871 - INFO - Initializing pipeline...
|
||||||
|
2025-09-24 09:59:30,872 - INFO - [2025-09-24 09:59:30] Detecting salary...
|
||||||
|
2025-09-24 09:59:30,872 - INFO - Started autonomous salary detection loop.
|
||||||
|
2025-09-24 09:59:30,877 - INFO - Server running on hostname: bfe07c2f7da2
|
||||||
|
2025-09-24 09:59:30,878 - INFO - Server IP address: 172.25.0.2
|
||||||
|
2025-09-24 09:59:30,878 - INFO - Server is accessible at:
|
||||||
|
2025-09-24 09:59:30,883 - INFO - - http://localhost:8000
|
||||||
|
2025-09-24 09:59:30,884 - INFO - - http://127.0.0.1:8000
|
||||||
|
2025-09-24 09:59:30,887 - INFO - - http://172.25.0.2:8000
|
||||||
|
2025-09-24 09:59:30,889 - INFO - Pipeline initialized successfully
|
||||||
|
2025-09-24 09:59:32,676 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||||
|
"data": [],
|
||||||
|
"error": {},
|
||||||
|
"message": "AutoCall Add Salary Successful",
|
||||||
|
"status": true,
|
||||||
|
"statusCode": 200
|
||||||
|
}
|
||||||
|
|
||||||
|
2025-09-24 09:59:32,705 - INFO - [2025-09-24 09:59:32] Salary detection complete
|
||||||
@@ -1,17 +1,21 @@
|
|||||||
from flask import Flask
|
from flask import Flask
|
||||||
import os
|
import os
|
||||||
from .extensions import db, migrate
|
from app.extensions import db, migrate
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def create_app():
|
def create_app():
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
app.config.from_object('salary_analytics.config')
|
|
||||||
|
# Load configuration from config.py
|
||||||
|
app.config.from_object('app.config')
|
||||||
|
|
||||||
# Initialize extensions
|
# Initialize extensions
|
||||||
db.init_app(app)
|
db.init_app(app)
|
||||||
migrate.init_app(app, db)
|
migrate.init_app(app, db)
|
||||||
|
|
||||||
# Register blueprints or CLI commands here if needed
|
# Register blueprints or CLI commands here if needed
|
||||||
from . import commands
|
from app.api.commands import commands
|
||||||
app.cli.add_command(commands.upload_xls_cli)
|
app.cli.add_command(commands.upload_xls_cli)
|
||||||
|
|
||||||
return app
|
return app
|
||||||
@@ -2,8 +2,8 @@ import click
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from flask.cli import with_appcontext
|
from flask.cli import with_appcontext
|
||||||
from salary_analytics.app.extensions import db
|
from app.extensions import db
|
||||||
from salary_analytics.app.models import RawTransaction
|
from app.models import RawTransaction
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
def commands():
|
def commands():
|
||||||
@@ -22,20 +22,32 @@ os.makedirs(PLOTS_DIR, exist_ok=True)
|
|||||||
os.makedirs(CSV_DIR, exist_ok=True)
|
os.makedirs(CSV_DIR, exist_ok=True)
|
||||||
os.makedirs(MODEL_DIR, exist_ok=True)
|
os.makedirs(MODEL_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
# Database Configuration
|
# Database Configuration
|
||||||
DB_CONFIG = {
|
DB_CONFIG = {
|
||||||
"user": os.getenv("DB_USER"), # Default value as fallback
|
"user": os.getenv("DATABASE_USER"),
|
||||||
"password": os.getenv("DB_PASSWORD"),
|
"password": os.getenv("DATABASE_PASSWORD"),
|
||||||
"name": os.getenv("DB_NAME"),
|
"name": os.getenv("DATABASE_NAME"),
|
||||||
"port": os.getenv("DB_PORT"),
|
"port": os.getenv("DATABASE_PORT", 10532),
|
||||||
"host": os.getenv("DB_HOST")
|
"host": os.getenv("DATABASE_HOST", "firstadvancedev"),
|
||||||
|
"sid": os.getenv("DATABASE_SID", "FREE"),
|
||||||
|
"service_name": os.getenv("DATABASE_SERVICE_NAME", "firstadv")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# DNS = f"(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST={DB_CONFIG['host']})(PORT={DB_CONFIG['port']}))(CONNECT_DATA=(SID={DB_CONFIG['sid']})))"
|
||||||
|
|
||||||
|
# Database Connection
|
||||||
|
# SQLALCHEMY_DATABASE_URI = (f"oracle+oracledb://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DNS}")
|
||||||
|
SQLALCHEMY_DATABASE_URI = ( f"oracle+oracledb://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/?service_name={DB_CONFIG['service_name']}")
|
||||||
|
|
||||||
|
|
||||||
# SQLAlchemy Configuration
|
# SQLAlchemy Configuration
|
||||||
SQLALCHEMY_DATABASE_URI = (
|
# SQLALCHEMY_DATABASE_URI = (
|
||||||
f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@"
|
# f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@"
|
||||||
f"{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}"
|
# f"{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}"
|
||||||
)
|
# )
|
||||||
|
|
||||||
SQLALCHEMY_TRACK_MODIFICATIONS = False
|
SQLALCHEMY_TRACK_MODIFICATIONS = False
|
||||||
|
|
||||||
# Table Configuration
|
# Table Configuration
|
||||||
@@ -80,7 +92,7 @@ OUTPUT_PATHS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
SIMBRELLA_BASE_URL = os.getenv("SIMBRELLA_BASE_URL", "http://127.0.0.1:6337")
|
SIMBRELLA_BASE_URL = os.getenv("SIMBRELLA_BASE_URL", "http://127.0.0.1:6337")
|
||||||
SIMBRELLA_ENDPOINT_RAC_CHECKS = os.getenv("SIMBRELLA_ENDPOINT_RAC_CHECKS","api/rac-check")
|
SIMBRELLA_ENDPOINT_RAC_CHECKS = os.getenv("SIMBRELLA_ENDPOINT_RAC_CHECKS", "api/rac-check")
|
||||||
|
|
||||||
# Salary Detect Endpoint Config
|
# Salary Detect Endpoint Config
|
||||||
SALARY_DETECT_URL = "http://www.simbrellang.net:5000/autocall/analytic-salary-detect"
|
SALARY_DETECT_URL = "http://www.simbrellang.net:5000/autocall/analytic-salary-detect"
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
from .raw_transaction import RawTransaction
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['RawTransaction']
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
from app.extensions import db
|
||||||
|
|
||||||
|
|
||||||
|
class Account(db.Model):
|
||||||
|
__tablename__ = "accounts"
|
||||||
|
|
||||||
|
customerid = db.Column(db.String(50), primary_key=True)
|
||||||
|
accountid = db.Column(db.String(11), nullable=False)
|
||||||
|
registrationdate = db.Column(db.DateTime)
|
||||||
|
accountcurrencycode = db.Column(db.String(3))
|
||||||
|
schemecode = db.Column(db.String(5))
|
||||||
|
lastinflowtransactiondate = db.Column(db.DateTime)
|
||||||
|
accountstatus = db.Column(db.String(50))
|
||||||
|
accountstatusdate = db.Column(db.DateTime)
|
||||||
|
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<Account {self.accountid}>"
|
||||||
@@ -0,0 +1,97 @@
|
|||||||
|
from sqlalchemy import Column, Integer, String, DateTime, Numeric, Boolean, func
|
||||||
|
from sqlalchemy.orm import declarative_base, Session
|
||||||
|
from datetime import datetime
|
||||||
|
from app.utils.logger import logger
|
||||||
|
from app.extensions import db
|
||||||
|
|
||||||
|
|
||||||
|
class BatchResult(db.Model):
|
||||||
|
__tablename__ = "salary_analytics_batch_results"
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
|
batch_number = Column(Integer, nullable=False)
|
||||||
|
total_batches = Column(Integer, nullable=False)
|
||||||
|
processed_at = Column(DateTime, default=datetime.utcnow)
|
||||||
|
accountid = Column(String, nullable=False)
|
||||||
|
num_months = Column(Integer)
|
||||||
|
least_inflow_6m = Column(Numeric)
|
||||||
|
avg_monthly_salary = Column(Numeric)
|
||||||
|
estimated_next_amount = Column(Numeric)
|
||||||
|
estimated_next_date = Column(DateTime)
|
||||||
|
is_45day_salary = Column(Boolean, default=False)
|
||||||
|
is_2months_salary = Column(Boolean, default=False)
|
||||||
|
status = Column(String, default="success")
|
||||||
|
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def save_batch(cls, batch_number, total_batches, results_df, status="success"):
|
||||||
|
"""Save batch results into DB using ORM bulk insert."""
|
||||||
|
try:
|
||||||
|
results_df["batch_number"] = batch_number
|
||||||
|
results_df["total_batches"] = total_batches
|
||||||
|
results_df["processed_at"] = datetime.utcnow()
|
||||||
|
results_df["status"] = status
|
||||||
|
|
||||||
|
# Normalize boolean columns
|
||||||
|
results_df["is_45day_salary"] = results_df.get("45daysalary", False)
|
||||||
|
results_df["is_2months_salary"] = results_df.get("2monthssalary", False)
|
||||||
|
|
||||||
|
# Convert to list of ORM objects
|
||||||
|
records = [
|
||||||
|
cls(**row)
|
||||||
|
for row in results_df.to_dict("records")
|
||||||
|
]
|
||||||
|
|
||||||
|
db.session.bulk_save_objects(records)
|
||||||
|
db.session.commit()
|
||||||
|
logger.info(f"Saved batch {batch_number} successfully.")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
db.session.rollback()
|
||||||
|
logger.error(f"Error saving batch {batch_number}: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_batch_status(cls, batch_number: int):
|
||||||
|
"""Return summary info about one batch."""
|
||||||
|
try:
|
||||||
|
result = (
|
||||||
|
db.session.query(
|
||||||
|
cls.batch_number,
|
||||||
|
cls.total_batches,
|
||||||
|
cls.processed_at,
|
||||||
|
func.count().label("total_records"),
|
||||||
|
func.sum(func.case((cls.status == "success", 1), else_=0)).label("successful_records"),
|
||||||
|
func.sum(func.case((cls.status == "error", 1), else_=0)).label("failed_records"),
|
||||||
|
)
|
||||||
|
.filter(cls.batch_number == batch_number)
|
||||||
|
.group_by(cls.batch_number, cls.total_batches, cls.processed_at)
|
||||||
|
.order_by(cls.processed_at.desc())
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
return dict(result._mapping) if result else None
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error fetching batch {batch_number} status: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_all_batches(cls):
|
||||||
|
"""Return summaries for all batches."""
|
||||||
|
try:
|
||||||
|
results = (
|
||||||
|
db.session.query(
|
||||||
|
cls.batch_number,
|
||||||
|
cls.total_batches,
|
||||||
|
cls.processed_at,
|
||||||
|
func.count().label("total_records"),
|
||||||
|
func.sum(func.case((cls.status == "success", 1), else_=0)).label("successful_records"),
|
||||||
|
func.sum(func.case((cls.status == "error", 1), else_=0)).label("failed_records"),
|
||||||
|
)
|
||||||
|
.group_by(cls.batch_number, cls.total_batches, cls.processed_at)
|
||||||
|
.order_by(cls.batch_number)
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
return [dict(r._mapping) for r in results]
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error fetching all batches: {str(e)}")
|
||||||
|
return []
|
||||||
@@ -0,0 +1,100 @@
|
|||||||
|
from venv import logger
|
||||||
|
from sqlalchemy import Column, Integer, String, Float, DateTime, ForeignKey
|
||||||
|
from sqlalchemy.orm import relationship
|
||||||
|
from app.extensions import db
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
class CustomerAccountTransactionHx(db.Model):
|
||||||
|
__tablename__ = "customer_account_transaction_hx"
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
|
accountid = Column(String(64), nullable=False, index=True)
|
||||||
|
trx_type = Column(String(50), nullable=False)
|
||||||
|
amount = Column(Float, nullable=False)
|
||||||
|
description = Column(String(255))
|
||||||
|
customer_id = Column(String(64))
|
||||||
|
trx_start_date = Column(DateTime, nullable=False)
|
||||||
|
trx_end_date = Column(DateTime)
|
||||||
|
is_salary_related = Column(Integer, default=0)
|
||||||
|
is_consistent_amount = Column(Integer, default=0)
|
||||||
|
is_salary_type = Column(Integer, default=0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_all(cls):
|
||||||
|
"""Fetch all transactions."""
|
||||||
|
return db.session.query(cls).all()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_rows_count(cls):
|
||||||
|
"""Return total number of transaction rows."""
|
||||||
|
try:
|
||||||
|
count = db.session.query(db.func.count(cls.id)).scalar()
|
||||||
|
return count
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting row count: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_by_account(cls, accountid: str):
|
||||||
|
"""Fetch transactions for a given account."""
|
||||||
|
return db.session.query(cls).filter_by(accountid=accountid).all()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_accounts(cls, limit=None):
|
||||||
|
"""Fetch distinct account IDs."""
|
||||||
|
query = db.session.query(cls.accountid).distinct()
|
||||||
|
if limit:
|
||||||
|
query = query.limit(limit)
|
||||||
|
return [row.accountid for row in query.all()]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def insert_transaction(cls, **kwargs):
|
||||||
|
"""Insert a new transaction."""
|
||||||
|
trx = cls(**kwargs)
|
||||||
|
try:
|
||||||
|
db.session.add(trx)
|
||||||
|
db.session.commit()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error inserting transaction: {str(e)}")
|
||||||
|
return None
|
||||||
|
return trx
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def bulk_insert(cls, transactions: list[dict]):
|
||||||
|
"""Insert multiple transactions at once."""
|
||||||
|
objs = [cls(**trx) for trx in transactions]
|
||||||
|
|
||||||
|
try:
|
||||||
|
db.session.bulk_save_objects(objs)
|
||||||
|
db.session.commit()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in bulk insert: {str(e)}")
|
||||||
|
return None
|
||||||
|
return objs
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_transactions_df(cls, accountids: list[str] = None):
|
||||||
|
"""Return a Pandas DataFrame for ML model preparation."""
|
||||||
|
query = db.session.query(cls)
|
||||||
|
if accountids:
|
||||||
|
query = query.filter(cls.accountid.in_(accountids))
|
||||||
|
rows = query.all()
|
||||||
|
|
||||||
|
|
||||||
|
df = pd.DataFrame([{
|
||||||
|
"id": trx.id,
|
||||||
|
"accountid": trx.accountid,
|
||||||
|
"trx_type": trx.trx_type,
|
||||||
|
"amount": trx.amount,
|
||||||
|
"description": trx.description,
|
||||||
|
"customer_id": trx.customer_id,
|
||||||
|
"trx_start_date": trx.trx_start_date,
|
||||||
|
"trx_end_date": trx.trx_end_date,
|
||||||
|
"is_salary_related": trx.is_salary_related,
|
||||||
|
"is_consistent_amount": trx.is_consistent_amount,
|
||||||
|
"is_salary_type": trx.is_salary_type,
|
||||||
|
} for trx in rows])
|
||||||
|
|
||||||
|
return df
|
||||||
@@ -2,12 +2,10 @@
|
|||||||
Database operations module for salary analytics.
|
Database operations module for salary analytics.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
|
||||||
from sqlalchemy import text
|
from sqlalchemy import text
|
||||||
from .config import BATCH_RESULTS_TABLE
|
from app.config import BATCH_RESULTS_TABLE
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from app.utils.logger import logger
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
class DatabaseOperations:
|
class DatabaseOperations:
|
||||||
def __init__(self, engine):
|
def __init__(self, engine):
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
from .extensions import db
|
from app.extensions import db
|
||||||
|
|
||||||
class RawTransaction(db.Model):
|
class RawTransaction(db.Model):
|
||||||
__tablename__ = 'analytics_raw_transactions'
|
__tablename__ = 'analytics_raw_transactions'
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
from app.extensions import db
|
||||||
|
|
||||||
|
class SimbrellaCustomer(db.Model):
|
||||||
|
__tablename__ = "simbrella_customers"
|
||||||
|
|
||||||
|
customerid = db.Column(db.String(500), primary_key=True)
|
||||||
|
acct_opn_date = db.Column(db.DateTime)
|
||||||
|
gender = db.Column(db.String(20))
|
||||||
|
birth_date = db.Column(db.DateTime)
|
||||||
|
msisdn = db.Column(db.String(100))
|
||||||
|
isbvnvalid = db.Column(db.String(100))
|
||||||
|
datebvnvalidated = db.Column(db.DateTime)
|
||||||
|
iscrmsvalid = db.Column(db.String(100))
|
||||||
|
datecrmsvalidated = db.Column(db.DateTime)
|
||||||
|
iscrcvalid = db.Column(db.String(100))
|
||||||
|
datecrcvalidated = db.Column(db.DateTime)
|
||||||
|
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<SimbrellaCustomer {self.customerid}>"
|
||||||
|
|
||||||
|
|
||||||
|
def to_dict(self):
|
||||||
|
return {
|
||||||
|
"customerid": self.customerid,
|
||||||
|
"acct_opn_date": self.acct_opn_date,
|
||||||
|
"gender": self.gender,
|
||||||
|
"birth_date": self.birth_date,
|
||||||
|
"msisdn": self.msisdn,
|
||||||
|
"isbvnvalid": self.isbvnvalid,
|
||||||
|
"datebvnvalidated": self.datebvnvalidated,
|
||||||
|
"iscrmsvalid": self.iscrmsvalid,
|
||||||
|
"datecrmsvalidated": self.datecrmsvalidated,
|
||||||
|
"iscrcvalid": self.iscrcvalid,
|
||||||
|
"datecrcvalidated": self.datecrcvalidated,
|
||||||
|
}
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
from app.extensions import db
|
||||||
|
|
||||||
|
|
||||||
|
class TempSimbrellaLien(db.Model):
|
||||||
|
__tablename__ = "tmp_simbrella_lien"
|
||||||
|
|
||||||
|
lienid = db.Column(db.String(33), primary_key=True)
|
||||||
|
customerid = db.Column(db.String(500))
|
||||||
|
accountid = db.Column(db.String(11))
|
||||||
|
lien_start_date = db.Column(db.DateTime)
|
||||||
|
action = db.Column(db.String(5))
|
||||||
|
restriction_nature = db.Column(db.String(150))
|
||||||
|
claim_amount = db.Column(db.Numeric(20, 4))
|
||||||
|
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<TempSimbrellaLien {self.lienid}>"
|
||||||
|
|
||||||
|
|
||||||
|
def to_dict(self):
|
||||||
|
return {
|
||||||
|
"lienid": self.lienid,
|
||||||
|
"customerid": self.customerid,
|
||||||
|
"accountid": self.accountid,
|
||||||
|
"lien_start_date": self.lien_start_date,
|
||||||
|
"action": self.action,
|
||||||
|
"restriction_nature": self.restriction_nature,
|
||||||
|
"claim_amount": float(self.claim_amount) if self.claim_amount is not None else None,
|
||||||
|
}
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
from app.extensions import db
|
||||||
|
|
||||||
|
|
||||||
|
class Transaction(db.Model):
|
||||||
|
__tablename__ = "transactions"
|
||||||
|
|
||||||
|
tran_id = db.Column(db.String(50), primary_key=True)
|
||||||
|
cif_id = db.Column(db.String(500))
|
||||||
|
foracid = db.Column(db.String(150))
|
||||||
|
acid = db.Column(db.String(150))
|
||||||
|
tran_date = db.Column(db.DateTime)
|
||||||
|
value_date = db.Column(db.DateTime)
|
||||||
|
pstd_date = db.Column(db.DateTime)
|
||||||
|
tran_sub_type = db.Column(db.String(50))
|
||||||
|
part_tran_type = db.Column(db.String(50))
|
||||||
|
tran_crncy_code = db.Column(db.String(50))
|
||||||
|
tran_amt = db.Column(db.Numeric(38, 0))
|
||||||
|
tran_particular = db.Column(db.String(250))
|
||||||
|
origination_channel = db.Column(db.String(150))
|
||||||
|
reversal_tran_id = db.Column(db.String(50))
|
||||||
|
isreversal = db.Column(db.String(50))
|
||||||
|
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<Transaction {self.tran_id}>"
|
||||||
|
|
||||||
|
|
||||||
|
def to_dict(self):
|
||||||
|
return {
|
||||||
|
"tran_id": self.tran_id,
|
||||||
|
"cif_id": self.cif_id,
|
||||||
|
"foracid": self.foracid,
|
||||||
|
"acid": self.acid,
|
||||||
|
"tran_date": self.tran_date,
|
||||||
|
"value_date": self.value_date,
|
||||||
|
"pstd_date": self.pstd_date,
|
||||||
|
"tran_sub_type": self.tran_sub_type,
|
||||||
|
"part_tran_type": self.part_tran_type,
|
||||||
|
"tran_crncy_code": self.tran_crncy_code,
|
||||||
|
"tran_amt": float(self.tran_amt) if self.tran_amt is not None else None,
|
||||||
|
"tran_particular": self.tran_particular,
|
||||||
|
"origination_channel": self.origination_channel,
|
||||||
|
"reversal_tran_id": self.reversal_tran_id,
|
||||||
|
"isreversal": self.isreversal,
|
||||||
|
}
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
from app.extensions import db
|
||||||
|
|
||||||
|
|
||||||
|
class TransactionStg(db.Model):
|
||||||
|
__tablename__ = "transaction_stg"
|
||||||
|
|
||||||
|
tran_id = db.Column(db.String(50), primary_key=True)
|
||||||
|
cif_id = db.Column(db.String(500))
|
||||||
|
foracid = db.Column(db.String(150))
|
||||||
|
acid = db.Column(db.String(150))
|
||||||
|
tran_date = db.Column(db.String(50)) # staging table keeps as string
|
||||||
|
value_date = db.Column(db.String(50))
|
||||||
|
pstd_date = db.Column(db.String(50))
|
||||||
|
tran_sub_type = db.Column(db.String(50))
|
||||||
|
part_tran_type = db.Column(db.String(50))
|
||||||
|
tran_crncy_code = db.Column(db.String(50))
|
||||||
|
tran_amt = db.Column(db.Numeric(38, 0))
|
||||||
|
tran_particular = db.Column(db.String(250))
|
||||||
|
origination_channel = db.Column(db.String(150))
|
||||||
|
reversal_tran_id = db.Column(db.String(50))
|
||||||
|
isreversal = db.Column(db.String(50))
|
||||||
|
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<TransactionStg {self.tran_id}>"
|
||||||
|
|
||||||
|
|
||||||
|
def to_dict(self):
|
||||||
|
return {
|
||||||
|
"tran_id": self.tran_id,
|
||||||
|
"cif_id": self.cif_id,
|
||||||
|
"foracid": self.foracid,
|
||||||
|
"acid": self.acid,
|
||||||
|
"tran_date": self.tran_date,
|
||||||
|
"value_date": self.value_date,
|
||||||
|
"pstd_date": self.pstd_date,
|
||||||
|
"tran_sub_type": self.tran_sub_type,
|
||||||
|
"part_tran_type": self.part_tran_type,
|
||||||
|
"tran_crncy_code": self.tran_crncy_code,
|
||||||
|
"tran_amt": float(self.tran_amt) if self.tran_amt is not None else None,
|
||||||
|
"tran_particular": self.tran_particular,
|
||||||
|
"origination_channel": self.origination_channel,
|
||||||
|
"reversal_tran_id": self.reversal_tran_id,
|
||||||
|
"isreversal": self.isreversal,
|
||||||
|
}
|
||||||
@@ -0,0 +1,38 @@
|
|||||||
|
from fastapi import FastAPI
|
||||||
|
from app.salary_analytics.routes import analysis, reports, pipeline, load, base, train
|
||||||
|
from app.salary_analytics.middlewares.middleware import add_middlewares
|
||||||
|
from app.salary_analytics.events.lifecycle import register_events
|
||||||
|
from app.utils.logger import logger
|
||||||
|
import socket
|
||||||
|
|
||||||
|
"""
|
||||||
|
Salary Analytics Package
|
||||||
|
A package for analyzing and predicting salary patterns from transaction data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__version__ = "0.1.0"
|
||||||
|
|
||||||
|
|
||||||
|
def create_app() -> FastAPI:
|
||||||
|
app = FastAPI(
|
||||||
|
title="Salary Analytics API",
|
||||||
|
description="API for analyzing and predicting salary patterns from transaction data",
|
||||||
|
version="1.0.0"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Middlewares
|
||||||
|
add_middlewares(app)
|
||||||
|
|
||||||
|
# Events
|
||||||
|
register_events(app)
|
||||||
|
|
||||||
|
# Routers
|
||||||
|
app.include_router(base.router, tags=["Base"])
|
||||||
|
app.include_router(analysis.router, prefix="/analyze", tags=["Analysis"])
|
||||||
|
app.include_router(reports.router, tags=["Reports"])
|
||||||
|
app.include_router(pipeline.router, tags=["Pipeline"])
|
||||||
|
app.include_router(load.router, tags=["Data"])
|
||||||
|
app.include_router(train.router, tags=["Model Training"])
|
||||||
|
|
||||||
|
|
||||||
|
return app
|
||||||
@@ -0,0 +1,64 @@
|
|||||||
|
from app.salary_analytics.services.main import SalaryAnalyticsPipeline
|
||||||
|
from app.salary_analytics.services.data_loader import DataLoader
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalState:
|
||||||
|
def __init__(self):
|
||||||
|
self._pipeline = None
|
||||||
|
self._data_loader = None
|
||||||
|
self.df = None
|
||||||
|
self.salary_predictor = None
|
||||||
|
self.salary_earner_analyzer = None
|
||||||
|
|
||||||
|
# ---- Pipeline ----
|
||||||
|
@property
|
||||||
|
def pipeline(self):
|
||||||
|
if self._pipeline is None:
|
||||||
|
self._pipeline = SalaryAnalyticsPipeline()
|
||||||
|
return self._pipeline
|
||||||
|
|
||||||
|
@pipeline.setter
|
||||||
|
def pipeline(self, value):
|
||||||
|
self._pipeline = value
|
||||||
|
|
||||||
|
# ---- Data Loader ----
|
||||||
|
@property
|
||||||
|
def data_loader(self):
|
||||||
|
if self._data_loader is None:
|
||||||
|
self._data_loader = DataLoader()
|
||||||
|
return self._data_loader
|
||||||
|
|
||||||
|
@data_loader.setter
|
||||||
|
def data_loader(self, value):
|
||||||
|
self._data_loader = value
|
||||||
|
|
||||||
|
# ---- DataFrame ----
|
||||||
|
@property
|
||||||
|
def df(self):
|
||||||
|
return self._df
|
||||||
|
|
||||||
|
@df.setter
|
||||||
|
def df(self, value):
|
||||||
|
self._df = value
|
||||||
|
|
||||||
|
# ---- Salary Predictor ----
|
||||||
|
@property
|
||||||
|
def salary_predictor(self):
|
||||||
|
return self._salary_predictor
|
||||||
|
|
||||||
|
@salary_predictor.setter
|
||||||
|
def salary_predictor(self, value):
|
||||||
|
self._salary_predictor = value
|
||||||
|
|
||||||
|
# ---- Salary Earner Analyzer ----
|
||||||
|
@property
|
||||||
|
def salary_earner_analyzer(self):
|
||||||
|
return self._salary_earner_analyzer
|
||||||
|
|
||||||
|
@salary_earner_analyzer.setter
|
||||||
|
def salary_earner_analyzer(self, value):
|
||||||
|
self._salary_earner_analyzer = value
|
||||||
|
|
||||||
|
|
||||||
|
state = GlobalState()
|
||||||
|
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
import socket
|
||||||
|
from fastapi import FastAPI
|
||||||
|
from app.salary_analytics.integrations.salary_detect import SalaryDetect
|
||||||
|
from app.utils.logger import logger
|
||||||
|
|
||||||
|
salary_detect = SalaryDetect()
|
||||||
|
|
||||||
|
|
||||||
|
def register_events(app: FastAPI):
|
||||||
|
@app.on_event("startup")
|
||||||
|
async def startup_event():
|
||||||
|
"""Initialize the pipeline on startup."""
|
||||||
|
try:
|
||||||
|
logger.info("Initializing pipeline...")
|
||||||
|
|
||||||
|
# Start autonomous salary detection loop
|
||||||
|
salary_detect.start()
|
||||||
|
logger.info("Started autonomous salary detection loop.")
|
||||||
|
|
||||||
|
# Print network information
|
||||||
|
hostname = socket.gethostname()
|
||||||
|
ip_address = socket.gethostbyname(hostname)
|
||||||
|
logger.info(f"Server running on hostname: {hostname}")
|
||||||
|
logger.info(f"Server IP address: {ip_address}")
|
||||||
|
logger.info(f"Server is accessible at:")
|
||||||
|
logger.info(f"- http://localhost:8000")
|
||||||
|
logger.info(f"- http://127.0.0.1:8000")
|
||||||
|
logger.info(f"- http://{ip_address}:8000")
|
||||||
|
logger.info("Pipeline initialized successfully")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during startup: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
@app.on_event("shutdown")
|
||||||
|
async def shutdown_event():
|
||||||
|
logger.info("Shutting down Salary Analytics API...")
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
from fastapi import HTTPException
|
||||||
|
from app.salary_analytics.core.state import state
|
||||||
|
|
||||||
|
|
||||||
|
def check_data_loaded():
|
||||||
|
"""Raise HTTP 400 if no data is loaded into the pipeline."""
|
||||||
|
if state.pipeline.df is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail="No data loaded. Please load data first using the /load-data endpoint."
|
||||||
|
)
|
||||||
|
return True
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
from typing import Optional, Dict, List, Union
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class AnalysisResponse(BaseModel):
|
||||||
|
"""Response model for analysis endpoints."""
|
||||||
|
message: str
|
||||||
|
data: Optional[Dict] = None
|
||||||
|
file_path: Optional[str] = None
|
||||||
|
|
||||||
|
class BatchResponse(BaseModel):
|
||||||
|
"""Response model for batch processing."""
|
||||||
|
batch_number: int
|
||||||
|
total_batches: int
|
||||||
|
processed_rows: int
|
||||||
|
results_path: str
|
||||||
|
message: str
|
||||||
@@ -1,10 +1,8 @@
|
|||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
import httpx
|
import httpx
|
||||||
import json
|
import json
|
||||||
from salary_analytics.config import SIMBRELLA_BASE_URL, SIMBRELLA_ENDPOINT_RAC_CHECKS
|
from app.config import SIMBRELLA_BASE_URL, SIMBRELLA_ENDPOINT_RAC_CHECKS
|
||||||
import logging
|
from app.utils.logger import logger
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
class SimbrellaIntegration:
|
class SimbrellaIntegration:
|
||||||
BASE_URL = SIMBRELLA_BASE_URL
|
BASE_URL = SIMBRELLA_BASE_URL
|
||||||
+2
-5
@@ -1,11 +1,8 @@
|
|||||||
import time
|
import time
|
||||||
import logging
|
|
||||||
import threading
|
import threading
|
||||||
import requests
|
import requests
|
||||||
from .config import SALARY_DETECT_URL, SALARY_DETECT_HEADERS, get_random_salary_payload
|
from app.config import SALARY_DETECT_URL, SALARY_DETECT_HEADERS, get_random_salary_payload
|
||||||
|
from app.utils.logger import logger
|
||||||
logging.basicConfig(level=logging.INFO)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
class SalaryDetect:
|
class SalaryDetect:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from fastapi import FastAPI
|
||||||
|
|
||||||
|
def add_middlewares(app: FastAPI):
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=["*"],
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
@@ -0,0 +1,75 @@
|
|||||||
|
from fastapi import APIRouter, HTTPException
|
||||||
|
from app.salary_analytics.helpers.response_helpers import AnalysisResponse
|
||||||
|
from app.salary_analytics.helpers.data_checks import check_data_loaded
|
||||||
|
from app.utils.logger import logger
|
||||||
|
import time
|
||||||
|
from app.salary_analytics.core.state import state
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/keyword", response_model=AnalysisResponse)
|
||||||
|
async def analyze_keyword():
|
||||||
|
"""Run keyword-based salary transaction analysis."""
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
check_data_loaded()
|
||||||
|
logger.info("Starting keyword analysis...")
|
||||||
|
data = state.pipeline.run_keyword_analysis()
|
||||||
|
logger.info(f"Keyword analysis completed. Found {len(data)} matches")
|
||||||
|
response = AnalysisResponse(
|
||||||
|
message="Keyword analysis completed successfully",
|
||||||
|
data={"count": len(data)}
|
||||||
|
)
|
||||||
|
logger.info(f"Keyword analysis endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in keyword analysis: {str(e)}")
|
||||||
|
logger.info(f"Keyword analysis endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/consistent-amount", response_model=AnalysisResponse)
|
||||||
|
async def analyze_consistent_amount():
|
||||||
|
"""Run consistent amount transaction analysis."""
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
check_data_loaded()
|
||||||
|
logger.info("Starting consistent amount analysis...")
|
||||||
|
data = state.pipeline.run_consistent_amount_analysis()
|
||||||
|
logger.info(f"Consistent amount analysis completed. Found {len(data)} matches")
|
||||||
|
response = AnalysisResponse(
|
||||||
|
message="Consistent amount analysis completed successfully",
|
||||||
|
data={"count": len(data)}
|
||||||
|
)
|
||||||
|
logger.info(f"Consistent amount analysis endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in consistent amount analysis: {str(e)}")
|
||||||
|
logger.info(f"Consistent amount analysis endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/transaction-type", response_model=AnalysisResponse)
|
||||||
|
async def analyze_transaction_type():
|
||||||
|
"""Run transaction type analysis."""
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
check_data_loaded()
|
||||||
|
logger.info("Starting transaction type analysis...")
|
||||||
|
data = state.pipeline.run_transaction_type_analysis()
|
||||||
|
logger.info(f"Transaction type analysis completed. Found {len(data)} matches")
|
||||||
|
response = AnalysisResponse(
|
||||||
|
message="Transaction type analysis completed successfully",
|
||||||
|
data={"count": len(data)}
|
||||||
|
)
|
||||||
|
logger.info(f"Transaction type analysis endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in transaction type analysis: {str(e)}")
|
||||||
|
logger.info(f"Transaction type analysis endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
@@ -0,0 +1,28 @@
|
|||||||
|
from fastapi import APIRouter
|
||||||
|
from app.utils.logger import logger
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
@router.get("/")
|
||||||
|
async def root():
|
||||||
|
"""Root endpoint."""
|
||||||
|
start_time = time.time()
|
||||||
|
logger.info("Root endpoint accessed")
|
||||||
|
response = {"message": "Welcome to Salary Analytics API"}
|
||||||
|
logger.info(f"Root endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/health")
|
||||||
|
async def health_check():
|
||||||
|
"""Health check endpoint."""
|
||||||
|
start_time = time.time()
|
||||||
|
logger.info("Health check endpoint accessed")
|
||||||
|
response = {"status": "healthy"}
|
||||||
|
logger.info(f"Health check completed in {time.time() - start_time:.2f} seconds")
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,74 @@
|
|||||||
|
from fastapi import APIRouter, HTTPException, UploadFile, File
|
||||||
|
from app.salary_analytics.core.state import state
|
||||||
|
from app.utils.logger import logger
|
||||||
|
import tempfile, os, time
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/load-data")
|
||||||
|
async def load_data(source: str = "db", file: Optional[UploadFile] = File(None)):
|
||||||
|
"""
|
||||||
|
Load data from either database or CSV file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source (str): Source of data ('db' or 'csv')
|
||||||
|
file (UploadFile, optional): CSV file to load (required if source is 'csv')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Status of data loading
|
||||||
|
"""
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
if source not in ['db', 'csv']:
|
||||||
|
logger.error(f"Invalid source: {source}")
|
||||||
|
logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'")
|
||||||
|
|
||||||
|
if source == 'csv' and not file:
|
||||||
|
logger.error("No file provided for CSV source")
|
||||||
|
logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=400, detail="File must be provided when loading from CSV")
|
||||||
|
|
||||||
|
if source == 'csv':
|
||||||
|
# Save uploaded file temporarily
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file:
|
||||||
|
content = await file.read()
|
||||||
|
temp_file.write(content)
|
||||||
|
temp_file_path = temp_file.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
success = state.pipeline.load_data(source='csv', file_path=temp_file_path)
|
||||||
|
finally:
|
||||||
|
# Clean up temporary file
|
||||||
|
os.unlink(temp_file_path)
|
||||||
|
else:
|
||||||
|
success = state.pipeline.load_data(source='db')
|
||||||
|
|
||||||
|
if not success:
|
||||||
|
logger.error("Failed to load data")
|
||||||
|
logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=500, detail="Failed to load data")
|
||||||
|
|
||||||
|
response = {
|
||||||
|
"status": "success",
|
||||||
|
"message": f"Successfully loaded {len(state.pipeline.df)} rows of data",
|
||||||
|
"columns": state.pipeline.df.columns.tolist(),
|
||||||
|
"row_count": len(state.pipeline.df)
|
||||||
|
}
|
||||||
|
logger.info(f"Load data endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error loading data: {str(e)}")
|
||||||
|
logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/load-data-with-file")
|
||||||
|
async def get_file_if_csv(source: str, file: Optional[UploadFile] = File(None)):
|
||||||
|
"""Dependency to handle file upload only when source is csv."""
|
||||||
|
if source == 'csv' and not file:
|
||||||
|
raise HTTPException(status_code=400, detail="File must be provided when loading from CSV")
|
||||||
|
return file
|
||||||
@@ -0,0 +1,292 @@
|
|||||||
|
from fastapi import APIRouter, HTTPException
|
||||||
|
from app.salary_analytics.services.main import SalaryAnalyticsPipeline
|
||||||
|
from app.salary_analytics.helpers.response_helpers import AnalysisResponse, BatchResponse
|
||||||
|
from app.salary_analytics.helpers.data_checks import check_data_loaded
|
||||||
|
from app.salary_analytics.services.data_loader import DataLoader
|
||||||
|
from app.salary_analytics.core.state import state
|
||||||
|
from app.models.db_operations import DatabaseOperations
|
||||||
|
from app.config import OUTPUT_PATHS, TABLE_NAME
|
||||||
|
from app.utils.logger import logger
|
||||||
|
from typing import Optional, List, Union
|
||||||
|
from sqlalchemy import text
|
||||||
|
from datetime import datetime
|
||||||
|
import pandas as pd, os, tempfile, time
|
||||||
|
from typing import Optional, Union
|
||||||
|
from fastapi import UploadFile, File
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/run/pipeline", response_model=AnalysisResponse)
|
||||||
|
async def run_full_pipeline():
|
||||||
|
"""Run the complete salary analytics pipeline."""
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
check_data_loaded()
|
||||||
|
logger.info("Starting full pipeline...")
|
||||||
|
success = state.pipeline.run_full_pipeline()
|
||||||
|
if not success:
|
||||||
|
logger.error("Pipeline failed")
|
||||||
|
logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=500, detail="Pipeline failed")
|
||||||
|
|
||||||
|
logger.info("Pipeline completed successfully")
|
||||||
|
response = AnalysisResponse(
|
||||||
|
message="Pipeline completed successfully"
|
||||||
|
)
|
||||||
|
logger.info(f"Full pipeline endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in pipeline: {str(e)}")
|
||||||
|
logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/run/streaming-pipeline", response_model=List[BatchResponse])
|
||||||
|
async def run_streaming_pipeline(
|
||||||
|
source: str = "db",
|
||||||
|
batch_size: int = 10000,
|
||||||
|
file: Optional[Union[UploadFile, str]] = File(None)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Run the complete salary analytics pipeline in batches.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source (str): Source of data ('db' or 'csv')
|
||||||
|
batch_size (int): Number of rows to process in each batch
|
||||||
|
file (UploadFile, optional): CSV file to load (required if source is 'csv')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[BatchResponse]: List of responses for each batch processed
|
||||||
|
"""
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
if source not in ['db', 'csv']:
|
||||||
|
logger.error(f"Invalid source: {source}")
|
||||||
|
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'")
|
||||||
|
|
||||||
|
if source == 'csv' and not file:
|
||||||
|
logger.error("No file provided for CSV source")
|
||||||
|
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=400, detail="File must be provided when loading from CSV")
|
||||||
|
|
||||||
|
# Initialize data loader
|
||||||
|
state.data_loader = DataLoader()
|
||||||
|
state.data_loader.chunk_size = batch_size
|
||||||
|
|
||||||
|
# Create output directory for batch results
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
batch_output_dir = os.path.join(os.path.dirname(OUTPUT_PATHS['final_table']), f"batch_results_{timestamp}")
|
||||||
|
os.makedirs(batch_output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Initialize database operations
|
||||||
|
if not state.data_loader.connect():
|
||||||
|
logger.error("Failed to connect to database")
|
||||||
|
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=500, detail="Failed to connect to database")
|
||||||
|
|
||||||
|
db_ops = DatabaseOperations(state.data_loader.engine)
|
||||||
|
if not db_ops.create_batch_results_table():
|
||||||
|
logger.error("Failed to create batch results table")
|
||||||
|
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=500, detail="Failed to create batch results table")
|
||||||
|
|
||||||
|
responses = []
|
||||||
|
batch_number = 0
|
||||||
|
batch_start_time = time.time()
|
||||||
|
|
||||||
|
def preprocess_chunk(chunk):
|
||||||
|
"""Preprocess a chunk of data with the same logic as DataLoader."""
|
||||||
|
# Convert dates
|
||||||
|
chunk['trx_start_date'] = pd.to_datetime(chunk['trx_start_date'])
|
||||||
|
chunk['trx_end_date'] = pd.to_datetime(chunk['trx_end_date'])
|
||||||
|
|
||||||
|
# Rename columns
|
||||||
|
chunk = chunk.rename(columns={
|
||||||
|
'd1': 'trx_type',
|
||||||
|
'd2': 'trx_subtype',
|
||||||
|
'd3': 'initiated_by',
|
||||||
|
'd4': 'customer_id'
|
||||||
|
})
|
||||||
|
|
||||||
|
chunk = chunk.dropna()
|
||||||
|
|
||||||
|
return chunk
|
||||||
|
|
||||||
|
if source == 'csv':
|
||||||
|
# Save uploaded file temporarily
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file:
|
||||||
|
content = await file.read()
|
||||||
|
temp_file.write(content)
|
||||||
|
temp_file_path = temp_file.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Process CSV in chunks
|
||||||
|
for chunk in pd.read_csv(temp_file_path, chunksize=batch_size):
|
||||||
|
batch_number += 1
|
||||||
|
logger.info(f"Processing batch {batch_number}")
|
||||||
|
|
||||||
|
# Preprocess chunk
|
||||||
|
chunk = preprocess_chunk(chunk)
|
||||||
|
|
||||||
|
# Run pipeline on chunk
|
||||||
|
state.pipeline = SalaryAnalyticsPipeline()
|
||||||
|
state.pipeline.df = chunk
|
||||||
|
|
||||||
|
try:
|
||||||
|
batch_start_time = time.time()
|
||||||
|
# Run analyses
|
||||||
|
state.pipeline.run_keyword_analysis()
|
||||||
|
state.pipeline.run_consistent_amount_analysis()
|
||||||
|
state.pipeline.run_transaction_type_analysis()
|
||||||
|
|
||||||
|
# Generate reports
|
||||||
|
reports = state.pipeline.generate_salary_earner_reports()
|
||||||
|
|
||||||
|
# Add batch metadata to results
|
||||||
|
results_df = reports['final_table'].copy()
|
||||||
|
results_df['batch_number'] = batch_number
|
||||||
|
results_df['total_batches'] = -1 # Unknown for CSV
|
||||||
|
results_df['processed_at'] = datetime.now()
|
||||||
|
|
||||||
|
# Save batch results to CSV
|
||||||
|
batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv")
|
||||||
|
results_df.to_csv(batch_results_path, index=False)
|
||||||
|
|
||||||
|
# Save to database
|
||||||
|
db_ops.save_batch_to_db(
|
||||||
|
batch_number=batch_number,
|
||||||
|
total_batches=-1, # Unknown for CSV
|
||||||
|
results_df=results_df,
|
||||||
|
status="success"
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Batch {batch_number} processed in {time.time() - batch_start_time:.2f} seconds")
|
||||||
|
|
||||||
|
responses.append(BatchResponse(
|
||||||
|
batch_number=batch_number,
|
||||||
|
total_batches=-1, # Unknown for CSV
|
||||||
|
processed_rows=len(chunk),
|
||||||
|
results_path=batch_results_path,
|
||||||
|
message=f"Successfully processed batch {batch_number}"
|
||||||
|
))
|
||||||
|
except Exception as e:
|
||||||
|
error_message = str(e)
|
||||||
|
logger.error(f"Error processing batch {batch_number}: {error_message}")
|
||||||
|
|
||||||
|
# Save error to database
|
||||||
|
db_ops.save_batch_to_db(
|
||||||
|
batch_number=batch_number,
|
||||||
|
total_batches=-1,
|
||||||
|
results_df=pd.DataFrame(), # Empty DataFrame for error case
|
||||||
|
status="error"
|
||||||
|
)
|
||||||
|
|
||||||
|
responses.append(BatchResponse(
|
||||||
|
batch_number=batch_number,
|
||||||
|
total_batches=-1,
|
||||||
|
processed_rows=len(chunk),
|
||||||
|
results_path="",
|
||||||
|
message=f"Error processing batch {batch_number}: {error_message}"
|
||||||
|
))
|
||||||
|
finally:
|
||||||
|
# Clean up temporary file
|
||||||
|
os.unlink(temp_file_path)
|
||||||
|
else:
|
||||||
|
# Process database in chunks
|
||||||
|
if not state.data_loader.connect():
|
||||||
|
raise HTTPException(status_code=500, detail="Failed to connect to database")
|
||||||
|
|
||||||
|
# Get total row count
|
||||||
|
with state.data_loader.engine.connect() as conn:
|
||||||
|
count_query = text(f"SELECT COUNT(*) FROM {TABLE_NAME}")
|
||||||
|
total_rows = conn.execute(count_query).scalar()
|
||||||
|
|
||||||
|
total_batches = (total_rows + batch_size - 1) // batch_size
|
||||||
|
offset = 0
|
||||||
|
|
||||||
|
while offset < total_rows:
|
||||||
|
batch_number += 1
|
||||||
|
logger.info(f"Processing batch {batch_number} of {total_batches}")
|
||||||
|
|
||||||
|
# Load chunk from database
|
||||||
|
query = f"SELECT * FROM {TABLE_NAME} LIMIT {batch_size} OFFSET {offset}"
|
||||||
|
chunk = pd.read_sql(query, state.data_loader.engine)
|
||||||
|
|
||||||
|
if chunk.empty:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Preprocess chunk
|
||||||
|
chunk = preprocess_chunk(chunk)
|
||||||
|
|
||||||
|
# Run pipeline on chunk
|
||||||
|
pipeline = SalaryAnalyticsPipeline()
|
||||||
|
state.pipeline.df = chunk
|
||||||
|
|
||||||
|
try:
|
||||||
|
batch_start_time = time.time()
|
||||||
|
# Run analyses
|
||||||
|
state.pipeline.run_keyword_analysis()
|
||||||
|
state.pipeline.run_consistent_amount_analysis()
|
||||||
|
state.pipeline.run_transaction_type_analysis()
|
||||||
|
|
||||||
|
# Generate reports
|
||||||
|
reports = state.pipeline.generate_salary_earner_reports()
|
||||||
|
|
||||||
|
# Add batch metadata to results
|
||||||
|
results_df = reports['final_table'].copy()
|
||||||
|
results_df['batch_number'] = batch_number
|
||||||
|
results_df['total_batches'] = total_batches
|
||||||
|
results_df['processed_at'] = datetime.now()
|
||||||
|
|
||||||
|
# Save batch results to CSV
|
||||||
|
batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv")
|
||||||
|
results_df.to_csv(batch_results_path, index=False)
|
||||||
|
|
||||||
|
# Save to database
|
||||||
|
db_ops.save_batch_to_db(
|
||||||
|
batch_number=batch_number,
|
||||||
|
total_batches=total_batches,
|
||||||
|
results_df=results_df,
|
||||||
|
status="success"
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Batch {batch_number} of {total_batches} processed in {time.time() - batch_start_time:.2f} seconds")
|
||||||
|
|
||||||
|
responses.append(BatchResponse(
|
||||||
|
batch_number=batch_number,
|
||||||
|
total_batches=total_batches,
|
||||||
|
processed_rows=len(chunk),
|
||||||
|
results_path=batch_results_path,
|
||||||
|
message=f"Successfully processed batch {batch_number} of {total_batches}"
|
||||||
|
))
|
||||||
|
except Exception as e:
|
||||||
|
error_message = str(e)
|
||||||
|
logger.error(f"Error processing batch {batch_number}: {error_message}")
|
||||||
|
|
||||||
|
# Save error to database
|
||||||
|
db_ops.save_batch_to_db(
|
||||||
|
batch_number=batch_number,
|
||||||
|
total_batches=total_batches,
|
||||||
|
results_df=pd.DataFrame(), # Empty DataFrame for error case
|
||||||
|
status="error"
|
||||||
|
)
|
||||||
|
|
||||||
|
responses.append(BatchResponse(
|
||||||
|
batch_number=batch_number,
|
||||||
|
total_batches=total_batches,
|
||||||
|
processed_rows=len(chunk),
|
||||||
|
results_path="",
|
||||||
|
message=f"Error processing batch {batch_number}: {error_message}"
|
||||||
|
))
|
||||||
|
|
||||||
|
offset += batch_size
|
||||||
|
|
||||||
|
logger.info(f"Streaming pipeline endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||||
|
return responses
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in streaming pipeline: {str(e)}")
|
||||||
|
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
@@ -0,0 +1,78 @@
|
|||||||
|
from fastapi import APIRouter, HTTPException, BackgroundTasks
|
||||||
|
from fastapi.responses import FileResponse
|
||||||
|
from app.salary_analytics.helpers.response_helpers import AnalysisResponse
|
||||||
|
from app.salary_analytics.helpers.data_checks import check_data_loaded
|
||||||
|
from app.salary_analytics.core.state import state
|
||||||
|
from app.config import OUTPUT_PATHS
|
||||||
|
from app.utils.logger import logger
|
||||||
|
import os, time
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/generate/reports", response_model=AnalysisResponse)
|
||||||
|
async def generate_reports(background_tasks: BackgroundTasks):
|
||||||
|
"""Generate salary earner reports."""
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
check_data_loaded()
|
||||||
|
logger.info("Starting report generation...")
|
||||||
|
reports = state.pipeline.generate_salary_earner_reports()
|
||||||
|
logger.info("Reports generated successfully")
|
||||||
|
response = AnalysisResponse(
|
||||||
|
message="Reports generated successfully",
|
||||||
|
data={
|
||||||
|
"verified_salary_earners": len(reports['final_table']),
|
||||||
|
"likely_salary_earners": len(reports['likely_salary_earner']),
|
||||||
|
"high_earners": reports['total_high_earners']
|
||||||
|
}
|
||||||
|
)
|
||||||
|
logger.info(f"Report generation endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in report generation: {str(e)}")
|
||||||
|
logger.info(f"Report generation endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/download/{report_type}")
|
||||||
|
async def download_report(report_type: str):
|
||||||
|
"""Download generated reports."""
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
check_data_loaded()
|
||||||
|
logger.info(f"Attempting to download report: {report_type}")
|
||||||
|
file_paths = {
|
||||||
|
"high_earners": OUTPUT_PATHS["high_earner_details"],
|
||||||
|
"likely_earners": OUTPUT_PATHS["likely_salary_earner"],
|
||||||
|
"final_table": OUTPUT_PATHS["final_table"],
|
||||||
|
"consistent_plot": OUTPUT_PATHS["consistent_earners_plot"],
|
||||||
|
"inconsistent_plot": OUTPUT_PATHS["inconsistent_earners_plot"],
|
||||||
|
"hypothesis_plot": OUTPUT_PATHS["hypothesis_overlap_plot"]
|
||||||
|
}
|
||||||
|
|
||||||
|
if report_type not in file_paths:
|
||||||
|
logger.error(f"Report type not found: {report_type}")
|
||||||
|
logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=404, detail="Report type not found")
|
||||||
|
|
||||||
|
file_path = file_paths[report_type]
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
logger.error(f"Report file not found: {file_path}")
|
||||||
|
logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=404, detail="Report file not found")
|
||||||
|
|
||||||
|
logger.info(f"Successfully found report file: {file_path}")
|
||||||
|
response = FileResponse(
|
||||||
|
path=file_path,
|
||||||
|
filename=os.path.basename(file_path),
|
||||||
|
media_type="application/octet-stream"
|
||||||
|
)
|
||||||
|
logger.info(f"Download endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error downloading report: {str(e)}")
|
||||||
|
logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
@@ -0,0 +1,33 @@
|
|||||||
|
import time
|
||||||
|
import logging
|
||||||
|
from fastapi import APIRouter, HTTPException
|
||||||
|
from app.salary_analytics.services.main import SalaryAnalyticsPipeline
|
||||||
|
from app.salary_analytics.helpers.data_checks import check_data_loaded
|
||||||
|
from app.salary_analytics.helpers.response_helpers import AnalysisResponse
|
||||||
|
from app.salary_analytics.core.state import state
|
||||||
|
from app.utils.logger import logger
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/train/models", response_model=AnalysisResponse)
|
||||||
|
async def train_models():
|
||||||
|
"""Train salary prediction models."""
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
check_data_loaded()
|
||||||
|
logger.info("Starting model training...")
|
||||||
|
state.pipeline.train_salary_prediction_models()
|
||||||
|
logger.info("Models trained successfully")
|
||||||
|
response = AnalysisResponse(
|
||||||
|
message="Models trained successfully"
|
||||||
|
)
|
||||||
|
logger.info(f"Model training endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in model training: {str(e)}")
|
||||||
|
logger.info(f"Model training endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
from .main import SalaryAnalyticsPipeline
|
||||||
|
from .data_loader import DataLoader
|
||||||
|
from .keyword_analyzer import KeywordAnalyzer
|
||||||
|
from .consistent_amount_analyzer import ConsistentAmountAnalyzer
|
||||||
|
from .transaction_type_analyzer import TransactionTypeAnalyzer
|
||||||
|
from .salary_earner_analyzer import SalaryEarnerAnalyzer
|
||||||
|
from .salary_predictor import SalaryPredictor
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Salary Analytics Package
|
||||||
|
A package for analyzing and predicting salary patterns from transaction data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__version__ = "0.1.0"
|
||||||
|
__all__ = [
|
||||||
|
"SalaryAnalyticsPipeline",
|
||||||
|
"DataLoader",
|
||||||
|
"KeywordAnalyzer",
|
||||||
|
"ConsistentAmountAnalyzer",
|
||||||
|
"TransactionTypeAnalyzer",
|
||||||
|
"SalaryEarnerAnalyzer",
|
||||||
|
"SalaryPredictor"
|
||||||
|
]
|
||||||
+1
-1
@@ -3,7 +3,7 @@ Consistent amount transaction analysis module.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from .config import MODEL_CONFIG
|
from app.config import MODEL_CONFIG
|
||||||
|
|
||||||
class ConsistentAmountAnalyzer:
|
class ConsistentAmountAnalyzer:
|
||||||
def __init__(self, df):
|
def __init__(self, df):
|
||||||
@@ -7,9 +7,8 @@ import pandas as pd
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
from .config import DB_CONFIG, TABLE_NAME
|
from app.config import SQLALCHEMY_DATABASE_URI, TABLE_NAME
|
||||||
|
from app.utils.logger import logger
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
class DataLoader:
|
class DataLoader:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@@ -21,8 +20,7 @@ class DataLoader:
|
|||||||
"""Establish database connection."""
|
"""Establish database connection."""
|
||||||
try:
|
try:
|
||||||
logger.info("Attempting to connect to database...")
|
logger.info("Attempting to connect to database...")
|
||||||
DATABASE_URL = f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}"
|
self.engine = create_engine(SQLALCHEMY_DATABASE_URI)
|
||||||
self.engine = create_engine(DATABASE_URL)
|
|
||||||
with self.engine.connect() as conn:
|
with self.engine.connect() as conn:
|
||||||
# First check if table exists
|
# First check if table exists
|
||||||
check_table = text(f"SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = '{TABLE_NAME}')")
|
check_table = text(f"SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = '{TABLE_NAME}')")
|
||||||
+1
-1
@@ -4,7 +4,7 @@ Keyword-based salary transaction analysis module.
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from .config import SALARY_KEYWORDS
|
from app.config import SALARY_KEYWORDS
|
||||||
|
|
||||||
class KeywordAnalyzer:
|
class KeywordAnalyzer:
|
||||||
def __init__(self, df):
|
def __init__(self, df):
|
||||||
@@ -9,8 +9,7 @@ from .consistent_amount_analyzer import ConsistentAmountAnalyzer
|
|||||||
from .transaction_type_analyzer import TransactionTypeAnalyzer
|
from .transaction_type_analyzer import TransactionTypeAnalyzer
|
||||||
from .salary_earner_analyzer import SalaryEarnerAnalyzer
|
from .salary_earner_analyzer import SalaryEarnerAnalyzer
|
||||||
from .salary_predictor import SalaryPredictor
|
from .salary_predictor import SalaryPredictor
|
||||||
|
from app.utils.logger import logger
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
class SalaryAnalyticsPipeline:
|
class SalaryAnalyticsPipeline:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
+2
-9
@@ -6,15 +6,8 @@ import pandas as pd
|
|||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from matplotlib_venn import venn3
|
from matplotlib_venn import venn3
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import logging
|
from app.config import MODEL_CONFIG, OUTPUT_PATHS
|
||||||
from .config import MODEL_CONFIG, OUTPUT_PATHS
|
from app.utils.logger import logger
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
||||||
)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
class SalaryEarnerAnalyzer:
|
class SalaryEarnerAnalyzer:
|
||||||
def __init__(self, df):
|
def __init__(self, df):
|
||||||
+1
-1
@@ -9,7 +9,7 @@ from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
|||||||
from sklearn.ensemble import RandomForestRegressor
|
from sklearn.ensemble import RandomForestRegressor
|
||||||
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
|
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
|
||||||
from joblib import dump
|
from joblib import dump
|
||||||
from .config import OUTPUT_PATHS
|
from app.config import OUTPUT_PATHS
|
||||||
|
|
||||||
class SalaryPredictor:
|
class SalaryPredictor:
|
||||||
def __init__(self, df):
|
def __init__(self, df):
|
||||||
+1
-1
@@ -3,7 +3,7 @@ Transaction type analysis module.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from .config import MODEL_CONFIG
|
from app.config import MODEL_CONFIG
|
||||||
|
|
||||||
class TransactionTypeAnalyzer:
|
class TransactionTypeAnalyzer:
|
||||||
def __init__(self, df):
|
def __init__(self, df):
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
|
handlers=[
|
||||||
|
# logging.StreamHandler(),
|
||||||
|
logging.FileHandler("app.log", mode='a') # Log to file
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger("DetectionService")
|
||||||
+8
-7
@@ -1,16 +1,17 @@
|
|||||||
services:
|
services:
|
||||||
digifi-analytics:
|
digifi-analytics:
|
||||||
build: .
|
build: .
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
ports:
|
ports:
|
||||||
- "${APP_PORT:-4800}:8000"
|
- "${APP_PORT:-4800}:8000"
|
||||||
volumes:
|
|
||||||
- ./output:/app/output
|
|
||||||
environment:
|
environment:
|
||||||
- DB_USER=salaryloan
|
- FLASK_APP=${FLASK_APP}
|
||||||
- DB_PASSWORD=salaryloan
|
- FLASK_ENV=${FLASK_ENV}
|
||||||
- DB_NAME=salaryloan
|
- DATABASE_URL=postgresql+psycopg2://${DATABASE_USER}:${DATABASE_PASSWORD}@${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_NAME}
|
||||||
- DB_PORT=10532
|
volumes:
|
||||||
- DB_HOST=dev-data.simbrellang.net
|
- .:/app
|
||||||
|
- ./output:/app/output
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
networks:
|
networks:
|
||||||
- salary_network
|
- salary_network
|
||||||
|
|||||||
+1
-1
@@ -19,7 +19,7 @@ if config.config_file_name is not None:
|
|||||||
# from myapp import Base
|
# from myapp import Base
|
||||||
# target_metadata = Base.metadata
|
# target_metadata = Base.metadata
|
||||||
from flask import current_app
|
from flask import current_app
|
||||||
from salary_analytics.app.extensions import db
|
from app.extensions import db
|
||||||
|
|
||||||
config.set_main_option('sqlalchemy.url',
|
config.set_main_option('sqlalchemy.url',
|
||||||
current_app.config.get('SQLALCHEMY_DATABASE_URI'))
|
current_app.config.get('SQLALCHEMY_DATABASE_URI'))
|
||||||
|
|||||||
+7
-1
@@ -1,4 +1,8 @@
|
|||||||
|
# Database and ORM
|
||||||
sqlalchemy>=2.0.0
|
sqlalchemy>=2.0.0
|
||||||
|
oracledb>=1.0.0
|
||||||
|
|
||||||
|
|
||||||
pandas>=1.5.0
|
pandas>=1.5.0
|
||||||
numpy>=1.21.0
|
numpy>=1.21.0
|
||||||
matplotlib>=3.5.0
|
matplotlib>=3.5.0
|
||||||
@@ -17,4 +21,6 @@ openpyxl>=3.0.10
|
|||||||
Flask>=2.0.0
|
Flask>=2.0.0
|
||||||
Flask-SQLAlchemy>=3.0.0
|
Flask-SQLAlchemy>=3.0.0
|
||||||
Flask-Migrate>=4.0.0
|
Flask-Migrate>=4.0.0
|
||||||
alembic>=1.8.0
|
alembic>=1.8.0
|
||||||
|
requests>=2.26.0
|
||||||
|
gunicorn
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
import os
|
import os
|
||||||
from salary_analytics.app import create_app
|
from app.salary_analytics import create_app
|
||||||
|
|
||||||
app = create_app()
|
app = create_app()
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
"""
|
|
||||||
Salary Analytics Package
|
|
||||||
A package for analyzing and predicting salary patterns from transaction data.
|
|
||||||
"""
|
|
||||||
|
|
||||||
__version__ = "0.1.0"
|
|
||||||
@@ -1,605 +0,0 @@
|
|||||||
"""
|
|
||||||
FastAPI application for salary analytics.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from fastapi import FastAPI, HTTPException, BackgroundTasks, UploadFile, File, Depends
|
|
||||||
from fastapi.responses import FileResponse
|
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
|
||||||
from pydantic import BaseModel
|
|
||||||
from typing import Optional, Dict, List, Union
|
|
||||||
import os
|
|
||||||
import socket
|
|
||||||
import logging
|
|
||||||
import pandas as pd
|
|
||||||
import tempfile
|
|
||||||
from datetime import datetime
|
|
||||||
from sqlalchemy import text, Table, Column, Integer, String, Float, DateTime, MetaData
|
|
||||||
import numpy as np
|
|
||||||
import warnings
|
|
||||||
import time
|
|
||||||
from .main import SalaryAnalyticsPipeline
|
|
||||||
from .config import OUTPUT_PATHS, TABLE_NAME, BATCH_RESULTS_TABLE
|
|
||||||
from .data_loader import DataLoader
|
|
||||||
from .salary_predictor import SalaryPredictor
|
|
||||||
from .salary_earner_analyzer import SalaryEarnerAnalyzer
|
|
||||||
from .db_operations import DatabaseOperations
|
|
||||||
from .salary_detect import SalaryDetect
|
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
||||||
)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Suppress warnings
|
|
||||||
warnings.filterwarnings('ignore', category=RuntimeWarning, module='numpy')
|
|
||||||
pd.options.mode.chained_assignment = None
|
|
||||||
|
|
||||||
app = FastAPI(
|
|
||||||
title="Salary Analytics API",
|
|
||||||
description="API for analyzing and predicting salary patterns from transaction data",
|
|
||||||
version="1.0.0"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Add CORS middleware
|
|
||||||
app.add_middleware(
|
|
||||||
CORSMiddleware,
|
|
||||||
allow_origins=["*"], # Allows all origins
|
|
||||||
allow_credentials=True,
|
|
||||||
allow_methods=["*"], # Allows all methods
|
|
||||||
allow_headers=["*"], # Allows all headers
|
|
||||||
)
|
|
||||||
|
|
||||||
# Global pipeline instance
|
|
||||||
pipeline = SalaryAnalyticsPipeline()
|
|
||||||
|
|
||||||
# Global variables to store loaded data and models
|
|
||||||
data_loader = None
|
|
||||||
df = None
|
|
||||||
salary_predictor = None
|
|
||||||
salary_earner_analyzer = None
|
|
||||||
|
|
||||||
salary_detect = SalaryDetect()
|
|
||||||
|
|
||||||
class AnalysisResponse(BaseModel):
|
|
||||||
"""Response model for analysis endpoints."""
|
|
||||||
message: str
|
|
||||||
data: Optional[Dict] = None
|
|
||||||
file_path: Optional[str] = None
|
|
||||||
|
|
||||||
class BatchResponse(BaseModel):
|
|
||||||
"""Response model for batch processing."""
|
|
||||||
batch_number: int
|
|
||||||
total_batches: int
|
|
||||||
processed_rows: int
|
|
||||||
results_path: str
|
|
||||||
message: str
|
|
||||||
|
|
||||||
def check_data_loaded():
|
|
||||||
"""Check if data is loaded before running analytics."""
|
|
||||||
if pipeline.df is None:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=400,
|
|
||||||
detail="No data loaded. Please load data first using the /load-data endpoint."
|
|
||||||
)
|
|
||||||
|
|
||||||
@app.on_event("startup")
|
|
||||||
async def startup_event():
|
|
||||||
"""Initialize the pipeline on startup."""
|
|
||||||
try:
|
|
||||||
logger.info("Initializing pipeline...")
|
|
||||||
|
|
||||||
# Start autonomous salary detection loop
|
|
||||||
salary_detect.start()
|
|
||||||
logger.info("Started autonomous salary detection loop.")
|
|
||||||
|
|
||||||
# Print network information
|
|
||||||
hostname = socket.gethostname()
|
|
||||||
ip_address = socket.gethostbyname(hostname)
|
|
||||||
logger.info(f"Server running on hostname: {hostname}")
|
|
||||||
logger.info(f"Server IP address: {ip_address}")
|
|
||||||
logger.info(f"Server is accessible at:")
|
|
||||||
logger.info(f"- http://localhost:8000")
|
|
||||||
logger.info(f"- http://127.0.0.1:8000")
|
|
||||||
logger.info(f"- http://{ip_address}:8000")
|
|
||||||
logger.info("Pipeline initialized successfully")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error during startup: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
@app.get("/")
|
|
||||||
async def root():
|
|
||||||
"""Root endpoint."""
|
|
||||||
start_time = time.time()
|
|
||||||
logger.info("Root endpoint accessed")
|
|
||||||
response = {"message": "Welcome to Salary Analytics API"}
|
|
||||||
logger.info(f"Root endpoint completed in {time.time() - start_time:.2f} seconds")
|
|
||||||
return response
|
|
||||||
|
|
||||||
@app.get("/health")
|
|
||||||
async def health_check():
|
|
||||||
"""Health check endpoint."""
|
|
||||||
start_time = time.time()
|
|
||||||
logger.info("Health check endpoint accessed")
|
|
||||||
response = {"status": "healthy"}
|
|
||||||
logger.info(f"Health check completed in {time.time() - start_time:.2f} seconds")
|
|
||||||
return response
|
|
||||||
|
|
||||||
@app.post("/analyze/keyword", response_model=AnalysisResponse)
|
|
||||||
async def analyze_keyword():
|
|
||||||
"""Run keyword-based salary transaction analysis."""
|
|
||||||
start_time = time.time()
|
|
||||||
try:
|
|
||||||
check_data_loaded()
|
|
||||||
logger.info("Starting keyword analysis...")
|
|
||||||
data = pipeline.run_keyword_analysis()
|
|
||||||
logger.info(f"Keyword analysis completed. Found {len(data)} matches")
|
|
||||||
response = AnalysisResponse(
|
|
||||||
message="Keyword analysis completed successfully",
|
|
||||||
data={"count": len(data)}
|
|
||||||
)
|
|
||||||
logger.info(f"Keyword analysis endpoint completed in {time.time() - start_time:.2f} seconds")
|
|
||||||
return response
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in keyword analysis: {str(e)}")
|
|
||||||
logger.info(f"Keyword analysis endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
@app.post("/analyze/consistent-amount", response_model=AnalysisResponse)
|
|
||||||
async def analyze_consistent_amount():
|
|
||||||
"""Run consistent amount transaction analysis."""
|
|
||||||
start_time = time.time()
|
|
||||||
try:
|
|
||||||
check_data_loaded()
|
|
||||||
logger.info("Starting consistent amount analysis...")
|
|
||||||
data = pipeline.run_consistent_amount_analysis()
|
|
||||||
logger.info(f"Consistent amount analysis completed. Found {len(data)} matches")
|
|
||||||
response = AnalysisResponse(
|
|
||||||
message="Consistent amount analysis completed successfully",
|
|
||||||
data={"count": len(data)}
|
|
||||||
)
|
|
||||||
logger.info(f"Consistent amount analysis endpoint completed in {time.time() - start_time:.2f} seconds")
|
|
||||||
return response
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in consistent amount analysis: {str(e)}")
|
|
||||||
logger.info(f"Consistent amount analysis endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
@app.post("/analyze/transaction-type", response_model=AnalysisResponse)
|
|
||||||
async def analyze_transaction_type():
|
|
||||||
"""Run transaction type analysis."""
|
|
||||||
start_time = time.time()
|
|
||||||
try:
|
|
||||||
check_data_loaded()
|
|
||||||
logger.info("Starting transaction type analysis...")
|
|
||||||
data = pipeline.run_transaction_type_analysis()
|
|
||||||
logger.info(f"Transaction type analysis completed. Found {len(data)} matches")
|
|
||||||
response = AnalysisResponse(
|
|
||||||
message="Transaction type analysis completed successfully",
|
|
||||||
data={"count": len(data)}
|
|
||||||
)
|
|
||||||
logger.info(f"Transaction type analysis endpoint completed in {time.time() - start_time:.2f} seconds")
|
|
||||||
return response
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in transaction type analysis: {str(e)}")
|
|
||||||
logger.info(f"Transaction type analysis endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
@app.post("/generate/reports", response_model=AnalysisResponse)
|
|
||||||
async def generate_reports(background_tasks: BackgroundTasks):
|
|
||||||
"""Generate salary earner reports."""
|
|
||||||
start_time = time.time()
|
|
||||||
try:
|
|
||||||
check_data_loaded()
|
|
||||||
logger.info("Starting report generation...")
|
|
||||||
reports = pipeline.generate_salary_earner_reports()
|
|
||||||
logger.info("Reports generated successfully")
|
|
||||||
response = AnalysisResponse(
|
|
||||||
message="Reports generated successfully",
|
|
||||||
data={
|
|
||||||
"verified_salary_earners": len(reports['final_table']),
|
|
||||||
"likely_salary_earners": len(reports['likely_salary_earner']),
|
|
||||||
"high_earners": reports['total_high_earners']
|
|
||||||
}
|
|
||||||
)
|
|
||||||
logger.info(f"Report generation endpoint completed in {time.time() - start_time:.2f} seconds")
|
|
||||||
return response
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in report generation: {str(e)}")
|
|
||||||
logger.info(f"Report generation endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
@app.post("/train/models", response_model=AnalysisResponse)
|
|
||||||
async def train_models():
|
|
||||||
"""Train salary prediction models."""
|
|
||||||
start_time = time.time()
|
|
||||||
try:
|
|
||||||
check_data_loaded()
|
|
||||||
logger.info("Starting model training...")
|
|
||||||
pipeline.train_salary_prediction_models()
|
|
||||||
logger.info("Models trained successfully")
|
|
||||||
response = AnalysisResponse(
|
|
||||||
message="Models trained successfully"
|
|
||||||
)
|
|
||||||
logger.info(f"Model training endpoint completed in {time.time() - start_time:.2f} seconds")
|
|
||||||
return response
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in model training: {str(e)}")
|
|
||||||
logger.info(f"Model training endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
@app.get("/download/{report_type}")
|
|
||||||
async def download_report(report_type: str):
|
|
||||||
"""Download generated reports."""
|
|
||||||
start_time = time.time()
|
|
||||||
try:
|
|
||||||
check_data_loaded()
|
|
||||||
logger.info(f"Attempting to download report: {report_type}")
|
|
||||||
file_paths = {
|
|
||||||
"high_earners": OUTPUT_PATHS["high_earner_details"],
|
|
||||||
"likely_earners": OUTPUT_PATHS["likely_salary_earner"],
|
|
||||||
"final_table": OUTPUT_PATHS["final_table"],
|
|
||||||
"consistent_plot": OUTPUT_PATHS["consistent_earners_plot"],
|
|
||||||
"inconsistent_plot": OUTPUT_PATHS["inconsistent_earners_plot"],
|
|
||||||
"hypothesis_plot": OUTPUT_PATHS["hypothesis_overlap_plot"]
|
|
||||||
}
|
|
||||||
|
|
||||||
if report_type not in file_paths:
|
|
||||||
logger.error(f"Report type not found: {report_type}")
|
|
||||||
logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=404, detail="Report type not found")
|
|
||||||
|
|
||||||
file_path = file_paths[report_type]
|
|
||||||
if not os.path.exists(file_path):
|
|
||||||
logger.error(f"Report file not found: {file_path}")
|
|
||||||
logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=404, detail="Report file not found")
|
|
||||||
|
|
||||||
logger.info(f"Successfully found report file: {file_path}")
|
|
||||||
response = FileResponse(
|
|
||||||
path=file_path,
|
|
||||||
filename=os.path.basename(file_path),
|
|
||||||
media_type="application/octet-stream"
|
|
||||||
)
|
|
||||||
logger.info(f"Download endpoint completed in {time.time() - start_time:.2f} seconds")
|
|
||||||
return response
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error downloading report: {str(e)}")
|
|
||||||
logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
@app.post("/run/pipeline", response_model=AnalysisResponse)
|
|
||||||
async def run_full_pipeline():
|
|
||||||
"""Run the complete salary analytics pipeline."""
|
|
||||||
start_time = time.time()
|
|
||||||
try:
|
|
||||||
check_data_loaded()
|
|
||||||
logger.info("Starting full pipeline...")
|
|
||||||
success = pipeline.run_full_pipeline()
|
|
||||||
if not success:
|
|
||||||
logger.error("Pipeline failed")
|
|
||||||
logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=500, detail="Pipeline failed")
|
|
||||||
|
|
||||||
logger.info("Pipeline completed successfully")
|
|
||||||
response = AnalysisResponse(
|
|
||||||
message="Pipeline completed successfully"
|
|
||||||
)
|
|
||||||
logger.info(f"Full pipeline endpoint completed in {time.time() - start_time:.2f} seconds")
|
|
||||||
return response
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in pipeline: {str(e)}")
|
|
||||||
logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
@app.post("/load-data")
|
|
||||||
async def load_data(source: str = "db", file: Optional[UploadFile] = File(None)):
|
|
||||||
"""
|
|
||||||
Load data from either database or CSV file.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
source (str): Source of data ('db' or 'csv')
|
|
||||||
file (UploadFile, optional): CSV file to load (required if source is 'csv')
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict: Status of data loading
|
|
||||||
"""
|
|
||||||
start_time = time.time()
|
|
||||||
try:
|
|
||||||
if source not in ['db', 'csv']:
|
|
||||||
logger.error(f"Invalid source: {source}")
|
|
||||||
logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'")
|
|
||||||
|
|
||||||
if source == 'csv' and not file:
|
|
||||||
logger.error("No file provided for CSV source")
|
|
||||||
logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=400, detail="File must be provided when loading from CSV")
|
|
||||||
|
|
||||||
if source == 'csv':
|
|
||||||
# Save uploaded file temporarily
|
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file:
|
|
||||||
content = await file.read()
|
|
||||||
temp_file.write(content)
|
|
||||||
temp_file_path = temp_file.name
|
|
||||||
|
|
||||||
try:
|
|
||||||
success = pipeline.load_data(source='csv', file_path=temp_file_path)
|
|
||||||
finally:
|
|
||||||
# Clean up temporary file
|
|
||||||
os.unlink(temp_file_path)
|
|
||||||
else:
|
|
||||||
success = pipeline.load_data(source='db')
|
|
||||||
|
|
||||||
if not success:
|
|
||||||
logger.error("Failed to load data")
|
|
||||||
logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=500, detail="Failed to load data")
|
|
||||||
|
|
||||||
response = {
|
|
||||||
"status": "success",
|
|
||||||
"message": f"Successfully loaded {len(pipeline.df)} rows of data",
|
|
||||||
"columns": pipeline.df.columns.tolist(),
|
|
||||||
"row_count": len(pipeline.df)
|
|
||||||
}
|
|
||||||
logger.info(f"Load data endpoint completed in {time.time() - start_time:.2f} seconds")
|
|
||||||
return response
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error loading data: {str(e)}")
|
|
||||||
logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
async def get_file_if_csv(source: str, file: Optional[UploadFile] = File(None)):
|
|
||||||
"""Dependency to handle file upload only when source is csv."""
|
|
||||||
if source == 'csv' and not file:
|
|
||||||
raise HTTPException(status_code=400, detail="File must be provided when loading from CSV")
|
|
||||||
return file
|
|
||||||
|
|
||||||
@app.post("/run/streaming-pipeline", response_model=List[BatchResponse])
|
|
||||||
async def run_streaming_pipeline(
|
|
||||||
source: str = "db",
|
|
||||||
batch_size: int = 10000,
|
|
||||||
file: Optional[Union[UploadFile, str]] = File(None)
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Run the complete salary analytics pipeline in batches.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
source (str): Source of data ('db' or 'csv')
|
|
||||||
batch_size (int): Number of rows to process in each batch
|
|
||||||
file (UploadFile, optional): CSV file to load (required if source is 'csv')
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List[BatchResponse]: List of responses for each batch processed
|
|
||||||
"""
|
|
||||||
start_time = time.time()
|
|
||||||
try:
|
|
||||||
if source not in ['db', 'csv']:
|
|
||||||
logger.error(f"Invalid source: {source}")
|
|
||||||
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'")
|
|
||||||
|
|
||||||
if source == 'csv' and not file:
|
|
||||||
logger.error("No file provided for CSV source")
|
|
||||||
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=400, detail="File must be provided when loading from CSV")
|
|
||||||
|
|
||||||
# Initialize data loader
|
|
||||||
data_loader = DataLoader()
|
|
||||||
data_loader.chunk_size = batch_size
|
|
||||||
|
|
||||||
# Create output directory for batch results
|
|
||||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
||||||
batch_output_dir = os.path.join(os.path.dirname(OUTPUT_PATHS['final_table']), f"batch_results_{timestamp}")
|
|
||||||
os.makedirs(batch_output_dir, exist_ok=True)
|
|
||||||
|
|
||||||
# Initialize database operations
|
|
||||||
if not data_loader.connect():
|
|
||||||
logger.error("Failed to connect to database")
|
|
||||||
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=500, detail="Failed to connect to database")
|
|
||||||
|
|
||||||
db_ops = DatabaseOperations(data_loader.engine)
|
|
||||||
if not db_ops.create_batch_results_table():
|
|
||||||
logger.error("Failed to create batch results table")
|
|
||||||
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=500, detail="Failed to create batch results table")
|
|
||||||
|
|
||||||
responses = []
|
|
||||||
batch_number = 0
|
|
||||||
batch_start_time = time.time()
|
|
||||||
|
|
||||||
def preprocess_chunk(chunk):
|
|
||||||
"""Preprocess a chunk of data with the same logic as DataLoader."""
|
|
||||||
# Convert dates
|
|
||||||
chunk['trx_start_date'] = pd.to_datetime(chunk['trx_start_date'])
|
|
||||||
chunk['trx_end_date'] = pd.to_datetime(chunk['trx_end_date'])
|
|
||||||
|
|
||||||
# Rename columns
|
|
||||||
chunk = chunk.rename(columns={
|
|
||||||
'd1': 'trx_type',
|
|
||||||
'd2': 'trx_subtype',
|
|
||||||
'd3': 'initiated_by',
|
|
||||||
'd4': 'customer_id'
|
|
||||||
})
|
|
||||||
|
|
||||||
chunk = chunk.dropna()
|
|
||||||
|
|
||||||
return chunk
|
|
||||||
|
|
||||||
if source == 'csv':
|
|
||||||
# Save uploaded file temporarily
|
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file:
|
|
||||||
content = await file.read()
|
|
||||||
temp_file.write(content)
|
|
||||||
temp_file_path = temp_file.name
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Process CSV in chunks
|
|
||||||
for chunk in pd.read_csv(temp_file_path, chunksize=batch_size):
|
|
||||||
batch_number += 1
|
|
||||||
logger.info(f"Processing batch {batch_number}")
|
|
||||||
|
|
||||||
# Preprocess chunk
|
|
||||||
chunk = preprocess_chunk(chunk)
|
|
||||||
|
|
||||||
# Run pipeline on chunk
|
|
||||||
pipeline = SalaryAnalyticsPipeline()
|
|
||||||
pipeline.df = chunk
|
|
||||||
|
|
||||||
try:
|
|
||||||
batch_start_time = time.time()
|
|
||||||
# Run analyses
|
|
||||||
pipeline.run_keyword_analysis()
|
|
||||||
pipeline.run_consistent_amount_analysis()
|
|
||||||
pipeline.run_transaction_type_analysis()
|
|
||||||
|
|
||||||
# Generate reports
|
|
||||||
reports = pipeline.generate_salary_earner_reports()
|
|
||||||
|
|
||||||
# Add batch metadata to results
|
|
||||||
results_df = reports['final_table'].copy()
|
|
||||||
results_df['batch_number'] = batch_number
|
|
||||||
results_df['total_batches'] = -1 # Unknown for CSV
|
|
||||||
results_df['processed_at'] = datetime.now()
|
|
||||||
|
|
||||||
# Save batch results to CSV
|
|
||||||
batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv")
|
|
||||||
results_df.to_csv(batch_results_path, index=False)
|
|
||||||
|
|
||||||
# Save to database
|
|
||||||
db_ops.save_batch_to_db(
|
|
||||||
batch_number=batch_number,
|
|
||||||
total_batches=-1, # Unknown for CSV
|
|
||||||
results_df=results_df,
|
|
||||||
status="success"
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"Batch {batch_number} processed in {time.time() - batch_start_time:.2f} seconds")
|
|
||||||
|
|
||||||
responses.append(BatchResponse(
|
|
||||||
batch_number=batch_number,
|
|
||||||
total_batches=-1, # Unknown for CSV
|
|
||||||
processed_rows=len(chunk),
|
|
||||||
results_path=batch_results_path,
|
|
||||||
message=f"Successfully processed batch {batch_number}"
|
|
||||||
))
|
|
||||||
except Exception as e:
|
|
||||||
error_message = str(e)
|
|
||||||
logger.error(f"Error processing batch {batch_number}: {error_message}")
|
|
||||||
|
|
||||||
# Save error to database
|
|
||||||
db_ops.save_batch_to_db(
|
|
||||||
batch_number=batch_number,
|
|
||||||
total_batches=-1,
|
|
||||||
results_df=pd.DataFrame(), # Empty DataFrame for error case
|
|
||||||
status="error"
|
|
||||||
)
|
|
||||||
|
|
||||||
responses.append(BatchResponse(
|
|
||||||
batch_number=batch_number,
|
|
||||||
total_batches=-1,
|
|
||||||
processed_rows=len(chunk),
|
|
||||||
results_path="",
|
|
||||||
message=f"Error processing batch {batch_number}: {error_message}"
|
|
||||||
))
|
|
||||||
finally:
|
|
||||||
# Clean up temporary file
|
|
||||||
os.unlink(temp_file_path)
|
|
||||||
else:
|
|
||||||
# Process database in chunks
|
|
||||||
if not data_loader.connect():
|
|
||||||
raise HTTPException(status_code=500, detail="Failed to connect to database")
|
|
||||||
|
|
||||||
# Get total row count
|
|
||||||
with data_loader.engine.connect() as conn:
|
|
||||||
count_query = text(f"SELECT COUNT(*) FROM {TABLE_NAME}")
|
|
||||||
total_rows = conn.execute(count_query).scalar()
|
|
||||||
|
|
||||||
total_batches = (total_rows + batch_size - 1) // batch_size
|
|
||||||
offset = 0
|
|
||||||
|
|
||||||
while offset < total_rows:
|
|
||||||
batch_number += 1
|
|
||||||
logger.info(f"Processing batch {batch_number} of {total_batches}")
|
|
||||||
|
|
||||||
# Load chunk from database
|
|
||||||
query = f"SELECT * FROM {TABLE_NAME} LIMIT {batch_size} OFFSET {offset}"
|
|
||||||
chunk = pd.read_sql(query, data_loader.engine)
|
|
||||||
|
|
||||||
if chunk.empty:
|
|
||||||
break
|
|
||||||
|
|
||||||
# Preprocess chunk
|
|
||||||
chunk = preprocess_chunk(chunk)
|
|
||||||
|
|
||||||
# Run pipeline on chunk
|
|
||||||
pipeline = SalaryAnalyticsPipeline()
|
|
||||||
pipeline.df = chunk
|
|
||||||
|
|
||||||
try:
|
|
||||||
batch_start_time = time.time()
|
|
||||||
# Run analyses
|
|
||||||
pipeline.run_keyword_analysis()
|
|
||||||
pipeline.run_consistent_amount_analysis()
|
|
||||||
pipeline.run_transaction_type_analysis()
|
|
||||||
|
|
||||||
# Generate reports
|
|
||||||
reports = pipeline.generate_salary_earner_reports()
|
|
||||||
|
|
||||||
# Add batch metadata to results
|
|
||||||
results_df = reports['final_table'].copy()
|
|
||||||
results_df['batch_number'] = batch_number
|
|
||||||
results_df['total_batches'] = total_batches
|
|
||||||
results_df['processed_at'] = datetime.now()
|
|
||||||
|
|
||||||
# Save batch results to CSV
|
|
||||||
batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv")
|
|
||||||
results_df.to_csv(batch_results_path, index=False)
|
|
||||||
|
|
||||||
# Save to database
|
|
||||||
db_ops.save_batch_to_db(
|
|
||||||
batch_number=batch_number,
|
|
||||||
total_batches=total_batches,
|
|
||||||
results_df=results_df,
|
|
||||||
status="success"
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"Batch {batch_number} of {total_batches} processed in {time.time() - batch_start_time:.2f} seconds")
|
|
||||||
|
|
||||||
responses.append(BatchResponse(
|
|
||||||
batch_number=batch_number,
|
|
||||||
total_batches=total_batches,
|
|
||||||
processed_rows=len(chunk),
|
|
||||||
results_path=batch_results_path,
|
|
||||||
message=f"Successfully processed batch {batch_number} of {total_batches}"
|
|
||||||
))
|
|
||||||
except Exception as e:
|
|
||||||
error_message = str(e)
|
|
||||||
logger.error(f"Error processing batch {batch_number}: {error_message}")
|
|
||||||
|
|
||||||
# Save error to database
|
|
||||||
db_ops.save_batch_to_db(
|
|
||||||
batch_number=batch_number,
|
|
||||||
total_batches=total_batches,
|
|
||||||
results_df=pd.DataFrame(), # Empty DataFrame for error case
|
|
||||||
status="error"
|
|
||||||
)
|
|
||||||
|
|
||||||
responses.append(BatchResponse(
|
|
||||||
batch_number=batch_number,
|
|
||||||
total_batches=total_batches,
|
|
||||||
processed_rows=len(chunk),
|
|
||||||
results_path="",
|
|
||||||
message=f"Error processing batch {batch_number}: {error_message}"
|
|
||||||
))
|
|
||||||
|
|
||||||
offset += batch_size
|
|
||||||
|
|
||||||
logger.info(f"Streaming pipeline endpoint completed in {time.time() - start_time:.2f} seconds")
|
|
||||||
return responses
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in streaming pipeline: {str(e)}")
|
|
||||||
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
Reference in New Issue
Block a user