[add]: oracle database migrations #2
+11
-5
@@ -1,20 +1,26 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Set the working directory in the container
|
||||
WORKDIR /app
|
||||
|
||||
# Copy the current directory contents into the container at /app
|
||||
COPY . /app
|
||||
|
||||
RUN apt-get update && apt-get install -y libpq-dev && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
COPY salary_analytics/ ./salary_analytics/
|
||||
|
||||
RUN mkdir -p output/csv output/plots output/models
|
||||
|
||||
ENV PYTHONPATH=/app
|
||||
ENV HOST=0.0.0.0
|
||||
ENV PORT=8000
|
||||
|
||||
# ENV FLASK_APP=wsgi.py
|
||||
|
||||
ENV FLASK_APP=run.py.
|
||||
ENV FLASK_RUN_HOST=0.0.0.0
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
CMD ["uvicorn", "salary_analytics.api:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
|
||||
# CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:8000", "wsgi:wsgi_app"]
|
||||
CMD ["uvicorn", "run:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
|
||||
@@ -0,0 +1,644 @@
|
||||
2025-09-10 10:21:11,016 - INFO - Initializing pipeline...
|
||||
2025-09-10 10:21:11,019 - INFO - [2025-09-10 10:21:11] Detecting salary...
|
||||
2025-09-10 10:21:11,019 - INFO - Started autonomous salary detection loop.
|
||||
2025-09-10 10:21:11,030 - INFO - Server running on hostname: 22bad35c69c3
|
||||
2025-09-10 10:21:11,035 - INFO - Server IP address: 172.25.0.2
|
||||
2025-09-10 10:21:11,036 - INFO - Server is accessible at:
|
||||
2025-09-10 10:21:11,037 - INFO - - http://localhost:8000
|
||||
2025-09-10 10:21:11,039 - INFO - - http://127.0.0.1:8000
|
||||
2025-09-10 10:21:11,040 - INFO - - http://172.25.0.2:8000
|
||||
2025-09-10 10:21:11,041 - INFO - Pipeline initialized successfully
|
||||
2025-09-10 10:21:11,532 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 10:21:11,533 - INFO - [2025-09-10 10:21:11] Salary detection complete
|
||||
2025-09-10 10:21:19,451 - INFO - Initializing SalaryAnalyticsPipeline
|
||||
2025-09-10 10:21:19,452 - INFO - Starting data loading process
|
||||
2025-09-10 10:21:19,453 - INFO - No database connection. Attempting to connect...
|
||||
2025-09-10 10:21:19,455 - INFO - Attempting to connect to database...
|
||||
2025-09-10 10:21:27,271 - INFO - Table customer_account_transaction_hx exists with 5354307 rows
|
||||
2025-09-10 10:21:27,488 - INFO - Connected successfully to database!
|
||||
2025-09-10 10:21:27,715 - INFO - Loading data from table: customer_account_transaction_hx
|
||||
2025-09-10 10:21:28,614 - INFO - Total rows to process: 5354307
|
||||
2025-09-10 10:21:28,852 - INFO - Loading chunk starting at offset 0
|
||||
2025-09-10 10:21:40,251 - INFO - Loading chunk starting at offset 10000
|
||||
2025-09-10 10:21:46,981 - INFO - Loading chunk starting at offset 20000
|
||||
2025-09-10 10:21:57,010 - INFO - Loading chunk starting at offset 30000
|
||||
2025-09-10 10:22:04,792 - INFO - Loading chunk starting at offset 40000
|
||||
2025-09-10 10:22:09,599 - INFO - Loading chunk starting at offset 50000
|
||||
2025-09-10 10:22:15,016 - INFO - Loading chunk starting at offset 60000
|
||||
2025-09-10 10:22:18,613 - INFO - Loading chunk starting at offset 70000
|
||||
2025-09-10 10:22:22,007 - INFO - Loading chunk starting at offset 80000
|
||||
2025-09-10 10:22:28,397 - INFO - Loading chunk starting at offset 90000
|
||||
2025-09-10 10:22:36,081 - INFO - Loading chunk starting at offset 100000
|
||||
2025-09-10 10:22:48,738 - INFO - Loading chunk starting at offset 110000
|
||||
2025-09-10 10:23:01,521 - INFO - Loading chunk starting at offset 120000
|
||||
2025-09-10 10:23:11,532 - INFO - [2025-09-10 10:23:11] Detecting salary...
|
||||
2025-09-10 10:23:13,504 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 10:23:13,504 - INFO - [2025-09-10 10:23:13] Salary detection complete
|
||||
2025-09-10 10:23:16,348 - INFO - Loading chunk starting at offset 130000
|
||||
2025-09-10 10:23:23,259 - INFO - Loading chunk starting at offset 140000
|
||||
2025-09-10 10:23:29,284 - INFO - Loading chunk starting at offset 150000
|
||||
2025-09-10 10:23:41,579 - INFO - Loading chunk starting at offset 160000
|
||||
2025-09-10 10:23:54,788 - INFO - Loading chunk starting at offset 170000
|
||||
2025-09-10 10:24:19,519 - INFO - Loading chunk starting at offset 180000
|
||||
2025-09-10 10:24:31,657 - INFO - Loading chunk starting at offset 190000
|
||||
2025-09-10 10:24:46,130 - INFO - Loading chunk starting at offset 200000
|
||||
2025-09-10 10:24:57,289 - INFO - Loading chunk starting at offset 210000
|
||||
2025-09-10 10:25:07,964 - INFO - Loading chunk starting at offset 220000
|
||||
2025-09-10 10:25:13,503 - INFO - [2025-09-10 10:25:13] Detecting salary...
|
||||
2025-09-10 10:25:15,477 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 10:25:15,478 - INFO - [2025-09-10 10:25:15] Salary detection complete
|
||||
2025-09-10 10:25:17,793 - INFO - Loading chunk starting at offset 230000
|
||||
2025-09-10 10:25:25,026 - INFO - Loading chunk starting at offset 240000
|
||||
2025-09-10 10:25:32,079 - INFO - Loading chunk starting at offset 250000
|
||||
2025-09-10 10:25:39,990 - INFO - Loading chunk starting at offset 260000
|
||||
2025-09-10 10:25:50,492 - INFO - Loading chunk starting at offset 270000
|
||||
2025-09-10 10:26:00,181 - INFO - Loading chunk starting at offset 280000
|
||||
2025-09-10 10:26:10,138 - INFO - Loading chunk starting at offset 290000
|
||||
2025-09-10 10:26:20,437 - INFO - Loading chunk starting at offset 300000
|
||||
2025-09-10 10:26:34,962 - INFO - Loading chunk starting at offset 310000
|
||||
2025-09-10 10:26:46,248 - INFO - Loading chunk starting at offset 320000
|
||||
2025-09-10 10:26:55,275 - INFO - Loading chunk starting at offset 330000
|
||||
2025-09-10 10:27:09,733 - INFO - Loading chunk starting at offset 340000
|
||||
2025-09-10 10:27:15,480 - INFO - [2025-09-10 10:27:15] Detecting salary...
|
||||
2025-09-10 10:27:16,553 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 10:27:16,553 - INFO - [2025-09-10 10:27:16] Salary detection complete
|
||||
2025-09-10 10:27:26,568 - INFO - Loading chunk starting at offset 350000
|
||||
2025-09-10 10:27:36,472 - INFO - Loading chunk starting at offset 360000
|
||||
2025-09-10 10:27:44,909 - INFO - Loading chunk starting at offset 370000
|
||||
2025-09-10 10:27:54,557 - INFO - Loading chunk starting at offset 380000
|
||||
2025-09-10 10:28:00,588 - INFO - Loading chunk starting at offset 390000
|
||||
2025-09-10 10:28:05,957 - INFO - Loading chunk starting at offset 400000
|
||||
2025-09-10 10:28:12,058 - INFO - Loading chunk starting at offset 410000
|
||||
2025-09-10 10:28:19,248 - INFO - Loading chunk starting at offset 420000
|
||||
2025-09-10 10:28:29,938 - INFO - Loading chunk starting at offset 430000
|
||||
2025-09-10 10:28:51,274 - INFO - Loading chunk starting at offset 440000
|
||||
2025-09-10 10:28:57,720 - INFO - Loading chunk starting at offset 450000
|
||||
2025-09-10 10:29:02,117 - INFO - Loading chunk starting at offset 460000
|
||||
2025-09-10 10:29:11,555 - INFO - Loading chunk starting at offset 470000
|
||||
2025-09-10 10:29:16,565 - INFO - [2025-09-10 10:29:16] Detecting salary...
|
||||
2025-09-10 10:29:17,452 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 10:29:17,455 - INFO - [2025-09-10 10:29:17] Salary detection complete
|
||||
2025-09-10 10:29:30,145 - INFO - Loading chunk starting at offset 480000
|
||||
2025-09-10 10:29:39,048 - INFO - Loading chunk starting at offset 490000
|
||||
2025-09-10 10:29:43,412 - INFO - Loading chunk starting at offset 500000
|
||||
2025-09-10 10:29:47,415 - INFO - Loading chunk starting at offset 510000
|
||||
2025-09-10 10:29:52,149 - INFO - Loading chunk starting at offset 520000
|
||||
2025-09-10 10:29:58,331 - INFO - Loading chunk starting at offset 530000
|
||||
2025-09-10 10:30:04,933 - INFO - Loading chunk starting at offset 540000
|
||||
2025-09-10 10:30:11,934 - INFO - Loading chunk starting at offset 550000
|
||||
2025-09-10 10:30:19,315 - INFO - Loading chunk starting at offset 560000
|
||||
2025-09-10 10:30:25,683 - INFO - Loading chunk starting at offset 570000
|
||||
2025-09-10 10:30:33,577 - INFO - Loading chunk starting at offset 580000
|
||||
2025-09-10 10:30:40,363 - INFO - Loading chunk starting at offset 590000
|
||||
2025-09-10 10:30:46,205 - INFO - Loading chunk starting at offset 600000
|
||||
2025-09-10 10:30:53,957 - INFO - Loading chunk starting at offset 610000
|
||||
2025-09-10 10:31:08,083 - INFO - Loading chunk starting at offset 620000
|
||||
2025-09-10 10:31:17,454 - INFO - [2025-09-10 10:31:17] Detecting salary...
|
||||
2025-09-10 10:31:17,989 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 10:31:17,989 - INFO - [2025-09-10 10:31:17] Salary detection complete
|
||||
2025-09-10 10:31:18,901 - INFO - Loading chunk starting at offset 630000
|
||||
2025-09-10 10:31:27,551 - INFO - Loading chunk starting at offset 640000
|
||||
2025-09-10 10:31:33,662 - INFO - Loading chunk starting at offset 650000
|
||||
2025-09-10 10:31:40,107 - INFO - Loading chunk starting at offset 660000
|
||||
2025-09-10 10:31:49,938 - INFO - Loading chunk starting at offset 670000
|
||||
2025-09-10 10:31:57,777 - INFO - Loading chunk starting at offset 680000
|
||||
2025-09-10 10:32:06,629 - INFO - Loading chunk starting at offset 690000
|
||||
2025-09-10 10:32:15,281 - INFO - Loading chunk starting at offset 700000
|
||||
2025-09-10 10:32:25,328 - INFO - Loading chunk starting at offset 710000
|
||||
2025-09-10 10:32:40,639 - INFO - Loading chunk starting at offset 720000
|
||||
2025-09-10 10:32:51,078 - INFO - Loading chunk starting at offset 730000
|
||||
2025-09-10 10:33:03,924 - INFO - Loading chunk starting at offset 740000
|
||||
2025-09-10 10:33:13,007 - INFO - Loading chunk starting at offset 750000
|
||||
2025-09-10 10:33:17,989 - INFO - [2025-09-10 10:33:17] Detecting salary...
|
||||
2025-09-10 10:33:19,698 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 10:33:20,039 - INFO - [2025-09-10 10:33:20] Salary detection complete
|
||||
2025-09-10 10:33:20,857 - INFO - Loading chunk starting at offset 760000
|
||||
2025-09-10 10:33:30,803 - INFO - Loading chunk starting at offset 770000
|
||||
2025-09-10 10:33:38,162 - INFO - Loading chunk starting at offset 780000
|
||||
2025-09-10 10:33:44,504 - INFO - Loading chunk starting at offset 790000
|
||||
2025-09-10 10:33:50,063 - INFO - Loading chunk starting at offset 800000
|
||||
2025-09-10 10:33:57,957 - INFO - Loading chunk starting at offset 810000
|
||||
2025-09-10 10:34:05,256 - INFO - Loading chunk starting at offset 820000
|
||||
2025-09-10 10:34:12,212 - INFO - Loading chunk starting at offset 830000
|
||||
2025-09-10 10:34:20,478 - INFO - Loading chunk starting at offset 840000
|
||||
2025-09-10 10:34:28,018 - INFO - Loading chunk starting at offset 850000
|
||||
2025-09-10 10:34:36,413 - INFO - Loading chunk starting at offset 860000
|
||||
2025-09-10 10:34:50,168 - INFO - Loading chunk starting at offset 870000
|
||||
2025-09-10 10:35:13,867 - INFO - Loading chunk starting at offset 880000
|
||||
2025-09-10 10:35:20,391 - INFO - [2025-09-10 10:35:20] Detecting salary...
|
||||
2025-09-10 10:35:21,791 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 10:35:21,794 - INFO - [2025-09-10 10:35:21] Salary detection complete
|
||||
2025-09-10 10:35:28,918 - INFO - Loading chunk starting at offset 890000
|
||||
2025-09-10 10:35:40,300 - INFO - Loading chunk starting at offset 900000
|
||||
2025-09-10 10:36:05,763 - INFO - Loading chunk starting at offset 910000
|
||||
2025-09-10 10:36:15,199 - INFO - Loading chunk starting at offset 920000
|
||||
2025-09-10 10:36:46,036 - INFO - Loading chunk starting at offset 930000
|
||||
2025-09-10 10:36:53,221 - INFO - Loading chunk starting at offset 940000
|
||||
2025-09-10 10:37:06,754 - INFO - Loading chunk starting at offset 950000
|
||||
2025-09-10 10:37:15,558 - INFO - Loading chunk starting at offset 960000
|
||||
2025-09-10 10:37:21,798 - INFO - [2025-09-10 10:37:21] Detecting salary...
|
||||
2025-09-10 10:37:22,954 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 10:37:22,955 - INFO - [2025-09-10 10:37:22] Salary detection complete
|
||||
2025-09-10 10:37:29,088 - INFO - Loading chunk starting at offset 970000
|
||||
2025-09-10 10:37:54,730 - INFO - Loading chunk starting at offset 980000
|
||||
2025-09-10 10:38:13,667 - INFO - Loading chunk starting at offset 990000
|
||||
2025-09-10 10:38:27,880 - INFO - Loading chunk starting at offset 1000000
|
||||
2025-09-10 10:39:07,546 - INFO - Loading chunk starting at offset 1010000
|
||||
2025-09-10 10:39:17,369 - INFO - Loading chunk starting at offset 1020000
|
||||
2025-09-10 10:39:23,176 - INFO - [2025-09-10 10:39:23] Detecting salary...
|
||||
2025-09-10 10:39:24,444 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 10:39:24,445 - INFO - [2025-09-10 10:39:24] Salary detection complete
|
||||
2025-09-10 10:39:26,048 - INFO - Loading chunk starting at offset 1030000
|
||||
2025-09-10 10:39:45,494 - INFO - Loading chunk starting at offset 1040000
|
||||
2025-09-10 10:39:50,480 - INFO - Loading chunk starting at offset 1050000
|
||||
2025-09-10 10:39:54,793 - INFO - Loading chunk starting at offset 1060000
|
||||
2025-09-10 10:39:59,415 - INFO - Loading chunk starting at offset 1070000
|
||||
2025-09-10 10:40:04,306 - INFO - Loading chunk starting at offset 1080000
|
||||
2025-09-10 10:40:10,517 - INFO - Loading chunk starting at offset 1090000
|
||||
2025-09-10 10:40:15,671 - INFO - Loading chunk starting at offset 1100000
|
||||
2025-09-10 10:40:21,662 - INFO - Loading chunk starting at offset 1110000
|
||||
2025-09-10 10:40:30,551 - INFO - Loading chunk starting at offset 1120000
|
||||
2025-09-10 10:40:47,391 - INFO - Loading chunk starting at offset 1130000
|
||||
2025-09-10 10:40:55,609 - INFO - Loading chunk starting at offset 1140000
|
||||
2025-09-10 10:41:04,496 - INFO - Loading chunk starting at offset 1150000
|
||||
2025-09-10 10:41:14,608 - INFO - Loading chunk starting at offset 1160000
|
||||
2025-09-10 10:41:24,447 - INFO - [2025-09-10 10:41:24] Detecting salary...
|
||||
2025-09-10 10:41:28,537 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 10:41:28,548 - INFO - [2025-09-10 10:41:28] Salary detection complete
|
||||
2025-09-10 10:42:00,867 - INFO - Loading chunk starting at offset 1170000
|
||||
2025-09-10 10:42:13,069 - INFO - Loading chunk starting at offset 1180000
|
||||
2025-09-10 10:42:21,593 - INFO - Loading chunk starting at offset 1190000
|
||||
2025-09-10 10:42:32,011 - INFO - Loading chunk starting at offset 1200000
|
||||
2025-09-10 10:42:37,982 - INFO - Loading chunk starting at offset 1210000
|
||||
2025-09-10 10:42:45,458 - INFO - Loading chunk starting at offset 1220000
|
||||
2025-09-10 10:42:54,545 - INFO - Loading chunk starting at offset 1230000
|
||||
2025-09-10 10:43:15,705 - INFO - Loading chunk starting at offset 1240000
|
||||
2025-09-10 10:43:28,549 - INFO - [2025-09-10 10:43:28] Detecting salary...
|
||||
2025-09-10 10:43:31,640 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 10:43:31,641 - INFO - [2025-09-10 10:43:31] Salary detection complete
|
||||
2025-09-10 10:43:43,344 - INFO - Loading chunk starting at offset 1250000
|
||||
2025-09-10 10:43:50,004 - INFO - Loading chunk starting at offset 1260000
|
||||
2025-09-10 10:43:59,430 - INFO - Loading chunk starting at offset 1270000
|
||||
2025-09-10 10:44:07,478 - INFO - Loading chunk starting at offset 1280000
|
||||
2025-09-10 10:44:18,927 - INFO - Loading chunk starting at offset 1290000
|
||||
2025-09-10 10:44:28,523 - INFO - Loading chunk starting at offset 1300000
|
||||
2025-09-10 10:44:38,629 - INFO - Loading chunk starting at offset 1310000
|
||||
2025-09-10 10:44:56,977 - INFO - Loading chunk starting at offset 1320000
|
||||
2025-09-10 10:45:09,203 - INFO - Loading chunk starting at offset 1330000
|
||||
2025-09-10 10:45:18,657 - INFO - Loading chunk starting at offset 1340000
|
||||
2025-09-10 10:45:27,201 - INFO - Loading chunk starting at offset 1350000
|
||||
2025-09-10 10:45:31,641 - INFO - [2025-09-10 10:45:31] Detecting salary...
|
||||
2025-09-10 10:45:32,261 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 10:45:32,262 - INFO - [2025-09-10 10:45:32] Salary detection complete
|
||||
2025-09-10 10:45:35,308 - INFO - Loading chunk starting at offset 1360000
|
||||
2025-09-10 10:45:44,783 - INFO - Loading chunk starting at offset 1370000
|
||||
2025-09-10 10:45:52,048 - INFO - Loading chunk starting at offset 1380000
|
||||
2025-09-10 10:45:58,491 - INFO - Loading chunk starting at offset 1390000
|
||||
2025-09-10 10:46:13,270 - INFO - Loading chunk starting at offset 1400000
|
||||
2025-09-10 10:46:25,221 - INFO - Loading chunk starting at offset 1410000
|
||||
2025-09-10 10:46:34,358 - INFO - Loading chunk starting at offset 1420000
|
||||
2025-09-10 10:46:38,777 - INFO - Loading chunk starting at offset 1430000
|
||||
2025-09-10 10:46:42,534 - INFO - Loading chunk starting at offset 1440000
|
||||
2025-09-10 10:46:46,147 - INFO - Loading chunk starting at offset 1450000
|
||||
2025-09-10 10:46:50,727 - INFO - Loading chunk starting at offset 1460000
|
||||
2025-09-10 10:46:55,939 - INFO - Loading chunk starting at offset 1470000
|
||||
2025-09-10 10:47:01,696 - INFO - Loading chunk starting at offset 1480000
|
||||
2025-09-10 10:47:07,411 - INFO - Loading chunk starting at offset 1490000
|
||||
2025-09-10 10:47:13,449 - INFO - Loading chunk starting at offset 1500000
|
||||
2025-09-10 10:47:23,058 - INFO - Loading chunk starting at offset 1510000
|
||||
2025-09-10 10:47:30,097 - INFO - Loading chunk starting at offset 1520000
|
||||
2025-09-10 10:47:32,259 - INFO - [2025-09-10 10:47:32] Detecting salary...
|
||||
2025-09-10 10:47:32,923 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 10:47:32,924 - INFO - [2025-09-10 10:47:32] Salary detection complete
|
||||
2025-09-10 10:47:36,423 - INFO - Loading chunk starting at offset 1530000
|
||||
2025-09-10 10:47:45,086 - INFO - Loading chunk starting at offset 1540000
|
||||
2025-09-10 10:47:57,079 - INFO - Loading chunk starting at offset 1550000
|
||||
2025-09-10 10:48:19,741 - INFO - Loading chunk starting at offset 1560000
|
||||
2025-09-10 10:48:41,300 - INFO - Loading chunk starting at offset 1570000
|
||||
2025-09-10 10:48:51,349 - INFO - Loading chunk starting at offset 1580000
|
||||
2025-09-10 10:48:58,892 - INFO - Loading chunk starting at offset 1590000
|
||||
2025-09-10 10:49:04,857 - INFO - Loading chunk starting at offset 1600000
|
||||
2025-09-10 10:49:10,299 - INFO - Loading chunk starting at offset 1610000
|
||||
2025-09-10 10:49:16,650 - INFO - Loading chunk starting at offset 1620000
|
||||
2025-09-10 10:49:23,107 - INFO - Loading chunk starting at offset 1630000
|
||||
2025-09-10 10:49:32,320 - INFO - Loading chunk starting at offset 1640000
|
||||
2025-09-10 10:49:32,927 - INFO - [2025-09-10 10:49:32] Detecting salary...
|
||||
2025-09-10 10:49:33,484 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 10:49:33,485 - INFO - [2025-09-10 10:49:33] Salary detection complete
|
||||
2025-09-10 10:49:40,707 - INFO - Loading chunk starting at offset 1650000
|
||||
2025-09-10 10:49:50,677 - INFO - Loading chunk starting at offset 1660000
|
||||
2025-09-10 10:49:59,664 - INFO - Loading chunk starting at offset 1670000
|
||||
2025-09-10 10:50:07,452 - INFO - Loading chunk starting at offset 1680000
|
||||
2025-09-10 10:50:13,583 - INFO - Loading chunk starting at offset 1690000
|
||||
2025-09-10 10:50:20,858 - INFO - Loading chunk starting at offset 1700000
|
||||
2025-09-10 10:50:27,519 - INFO - Loading chunk starting at offset 1710000
|
||||
2025-09-10 10:50:34,012 - INFO - Loading chunk starting at offset 1720000
|
||||
2025-09-10 10:50:39,628 - INFO - Loading chunk starting at offset 1730000
|
||||
2025-09-10 10:50:50,012 - INFO - Loading chunk starting at offset 1740000
|
||||
2025-09-10 10:51:06,647 - INFO - Loading chunk starting at offset 1750000
|
||||
2025-09-10 10:51:17,401 - INFO - Loading chunk starting at offset 1760000
|
||||
2025-09-10 10:51:26,360 - INFO - Loading chunk starting at offset 1770000
|
||||
2025-09-10 10:51:33,485 - INFO - [2025-09-10 10:51:33] Detecting salary...
|
||||
2025-09-10 10:51:55,392 - INFO - Initializing pipeline...
|
||||
2025-09-10 10:51:55,395 - INFO - [2025-09-10 10:51:55] Detecting salary...
|
||||
2025-09-10 10:51:55,395 - INFO - Started autonomous salary detection loop.
|
||||
2025-09-10 10:51:55,409 - INFO - Server running on hostname: 22bad35c69c3
|
||||
2025-09-10 10:51:55,416 - INFO - Server IP address: 172.25.0.2
|
||||
2025-09-10 10:51:55,424 - INFO - Server is accessible at:
|
||||
2025-09-10 10:51:55,426 - INFO - - http://localhost:8000
|
||||
2025-09-10 10:51:55,444 - INFO - - http://127.0.0.1:8000
|
||||
2025-09-10 10:51:55,455 - INFO - - http://172.25.0.2:8000
|
||||
2025-09-10 10:51:55,456 - INFO - Pipeline initialized successfully
|
||||
2025-09-10 10:51:56,024 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 10:51:56,024 - INFO - [2025-09-10 10:51:56] Salary detection complete
|
||||
2025-09-10 10:52:36,450 - INFO - Initializing SalaryAnalyticsPipeline
|
||||
2025-09-10 10:52:36,451 - INFO - Starting data loading process
|
||||
2025-09-10 10:52:36,452 - INFO - No database connection. Attempting to connect...
|
||||
2025-09-10 10:52:36,452 - INFO - Attempting to connect to database...
|
||||
2025-09-10 10:52:36,538 - ERROR - Error connecting to database: No module named 'oracledb'
|
||||
2025-09-10 10:52:36,539 - ERROR - Failed to establish database connection
|
||||
2025-09-10 10:52:36,539 - ERROR - Failed to load data
|
||||
2025-09-10 10:52:36,540 - ERROR - Failed to load data
|
||||
2025-09-10 10:52:36,541 - INFO - Load data endpoint failed after 0.09 seconds
|
||||
2025-09-10 10:52:36,541 - ERROR - Error loading data: 500: Failed to load data
|
||||
2025-09-10 10:52:36,542 - INFO - Load data endpoint failed after 0.09 seconds
|
||||
2025-09-10 10:52:58,149 - INFO - Shutting down Salary Analytics API...
|
||||
2025-09-10 10:53:10,697 - INFO - generated new fontManager
|
||||
2025-09-10 10:53:16,039 - INFO - Initializing pipeline...
|
||||
2025-09-10 10:53:16,041 - INFO - [2025-09-10 10:53:16] Detecting salary...
|
||||
2025-09-10 10:53:16,042 - INFO - Started autonomous salary detection loop.
|
||||
2025-09-10 10:53:16,055 - INFO - Server running on hostname: 1c5d2376fb2a
|
||||
2025-09-10 10:53:16,056 - INFO - Server IP address: 172.25.0.2
|
||||
2025-09-10 10:53:16,057 - INFO - Server is accessible at:
|
||||
2025-09-10 10:53:16,058 - INFO - - http://localhost:8000
|
||||
2025-09-10 10:53:16,059 - INFO - - http://127.0.0.1:8000
|
||||
2025-09-10 10:53:16,060 - INFO - - http://172.25.0.2:8000
|
||||
2025-09-10 10:53:16,062 - INFO - Pipeline initialized successfully
|
||||
2025-09-10 10:53:16,812 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 10:53:16,813 - INFO - [2025-09-10 10:53:16] Salary detection complete
|
||||
2025-09-10 10:53:25,477 - INFO - Initializing SalaryAnalyticsPipeline
|
||||
2025-09-10 10:53:25,482 - INFO - Starting data loading process
|
||||
2025-09-10 10:53:25,483 - INFO - No database connection. Attempting to connect...
|
||||
2025-09-10 10:53:25,484 - INFO - Attempting to connect to database...
|
||||
2025-09-10 10:53:25,679 - ERROR - Error connecting to database: No module named 'oracledb'
|
||||
2025-09-10 10:53:25,679 - ERROR - Failed to establish database connection
|
||||
2025-09-10 10:53:25,680 - ERROR - Failed to load data
|
||||
2025-09-10 10:53:25,680 - ERROR - Failed to load data
|
||||
2025-09-10 10:53:25,681 - INFO - Load data endpoint failed after 0.20 seconds
|
||||
2025-09-10 10:53:25,682 - ERROR - Error loading data: 500: Failed to load data
|
||||
2025-09-10 10:53:25,682 - INFO - Load data endpoint failed after 0.20 seconds
|
||||
2025-09-10 10:53:46,324 - INFO - Shutting down Salary Analytics API...
|
||||
2025-09-10 11:07:49,841 - INFO - generated new fontManager
|
||||
2025-09-10 11:07:59,771 - INFO - Initializing pipeline...
|
||||
2025-09-10 11:07:59,774 - INFO - [2025-09-10 11:07:59] Detecting salary...
|
||||
2025-09-10 11:07:59,774 - INFO - Started autonomous salary detection loop.
|
||||
2025-09-10 11:07:59,893 - INFO - Server running on hostname: 0b21809edf52
|
||||
2025-09-10 11:07:59,894 - INFO - Server IP address: 172.25.0.2
|
||||
2025-09-10 11:07:59,895 - INFO - Server is accessible at:
|
||||
2025-09-10 11:07:59,915 - INFO - - http://localhost:8000
|
||||
2025-09-10 11:07:59,917 - INFO - - http://127.0.0.1:8000
|
||||
2025-09-10 11:07:59,918 - INFO - - http://172.25.0.2:8000
|
||||
2025-09-10 11:07:59,919 - INFO - Pipeline initialized successfully
|
||||
2025-09-10 11:08:01,268 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 11:08:01,269 - INFO - [2025-09-10 11:08:01] Salary detection complete
|
||||
2025-09-10 11:08:15,668 - INFO - Initializing SalaryAnalyticsPipeline
|
||||
2025-09-10 11:08:15,669 - INFO - Starting data loading process
|
||||
2025-09-10 11:08:15,671 - INFO - No database connection. Attempting to connect...
|
||||
2025-09-10 11:08:15,672 - INFO - Attempting to connect to database...
|
||||
2025-09-10 11:08:20,076 - ERROR - Error connecting to database: (oracledb.exceptions.DatabaseError) ORA-00936: missing expression
|
||||
Help: https://docs.oracle.com/error-help/db/ora-00936/
|
||||
[SQL: SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = 'customer_account_transaction_hx')]
|
||||
(Background on this error at: https://sqlalche.me/e/20/4xp6)
|
||||
2025-09-10 11:08:20,086 - ERROR - Failed to establish database connection
|
||||
2025-09-10 11:08:20,087 - ERROR - Failed to load data
|
||||
2025-09-10 11:08:20,092 - ERROR - Failed to load data
|
||||
2025-09-10 11:08:20,119 - INFO - Load data endpoint failed after 4.45 seconds
|
||||
2025-09-10 11:08:20,123 - ERROR - Error loading data: 500: Failed to load data
|
||||
2025-09-10 11:08:20,124 - INFO - Load data endpoint failed after 4.46 seconds
|
||||
2025-09-10 11:10:01,280 - INFO - [2025-09-10 11:10:01] Detecting salary...
|
||||
2025-09-10 11:10:02,367 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 11:10:02,368 - INFO - [2025-09-10 11:10:02] Salary detection complete
|
||||
2025-09-10 11:12:02,395 - INFO - [2025-09-10 11:12:02] Detecting salary...
|
||||
2025-09-10 11:12:03,234 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 11:12:03,256 - INFO - [2025-09-10 11:12:03] Salary detection complete
|
||||
2025-09-10 11:14:03,279 - INFO - [2025-09-10 11:14:03] Detecting salary...
|
||||
2025-09-10 11:14:04,266 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 11:14:04,267 - INFO - [2025-09-10 11:14:04] Salary detection complete
|
||||
2025-09-10 11:16:04,268 - INFO - [2025-09-10 11:16:04] Detecting salary...
|
||||
2025-09-10 11:16:05,133 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 11:16:05,134 - INFO - [2025-09-10 11:16:05] Salary detection complete
|
||||
2025-09-10 11:18:05,134 - INFO - [2025-09-10 11:18:05] Detecting salary...
|
||||
2025-09-10 11:18:05,955 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 11:18:05,956 - INFO - [2025-09-10 11:18:05] Salary detection complete
|
||||
2025-09-10 11:20:05,855 - INFO - [2025-09-10 11:20:05] Detecting salary...
|
||||
2025-09-10 11:20:06,453 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 11:20:06,463 - INFO - [2025-09-10 11:20:06] Salary detection complete
|
||||
2025-09-10 11:22:06,512 - INFO - [2025-09-10 11:22:06] Detecting salary...
|
||||
2025-09-10 11:22:07,087 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 11:22:07,088 - INFO - [2025-09-10 11:22:07] Salary detection complete
|
||||
2025-09-10 11:24:07,092 - INFO - [2025-09-10 11:24:07] Detecting salary...
|
||||
2025-09-10 11:24:08,235 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 11:24:08,236 - INFO - [2025-09-10 11:24:08] Salary detection complete
|
||||
2025-09-10 11:25:13,551 - INFO - Shutting down Salary Analytics API...
|
||||
2025-09-10 11:25:52,852 - INFO - Initializing pipeline...
|
||||
2025-09-10 11:25:52,857 - INFO - [2025-09-10 11:25:52] Detecting salary...
|
||||
2025-09-10 11:25:52,857 - INFO - Started autonomous salary detection loop.
|
||||
2025-09-10 11:25:52,866 - INFO - Server running on hostname: 0b21809edf52
|
||||
2025-09-10 11:25:52,876 - INFO - Server IP address: 172.25.0.2
|
||||
2025-09-10 11:25:52,881 - INFO - Server is accessible at:
|
||||
2025-09-10 11:25:52,882 - INFO - - http://localhost:8000
|
||||
2025-09-10 11:25:52,883 - INFO - - http://127.0.0.1:8000
|
||||
2025-09-10 11:25:52,884 - INFO - - http://172.25.0.2:8000
|
||||
2025-09-10 11:25:52,884 - INFO - Pipeline initialized successfully
|
||||
2025-09-10 11:25:53,580 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 11:25:53,581 - INFO - [2025-09-10 11:25:53] Salary detection complete
|
||||
2025-09-10 11:27:53,578 - INFO - [2025-09-10 11:27:53] Detecting salary...
|
||||
2025-09-10 11:27:54,235 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 11:27:54,235 - INFO - [2025-09-10 11:27:54] Salary detection complete
|
||||
2025-09-10 11:29:54,235 - INFO - [2025-09-10 11:29:54] Detecting salary...
|
||||
2025-09-10 11:29:54,827 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-10 11:29:54,827 - INFO - [2025-09-10 11:29:54] Salary detection complete
|
||||
2025-09-10 11:30:20,979 - INFO - Shutting down Salary Analytics API...
|
||||
2025-09-10 11:31:18,351 - INFO - Initializing pipeline...
|
||||
2025-09-10 11:31:18,375 - INFO - [2025-09-10 11:31:18] Detecting salary...
|
||||
2025-09-10 11:31:18,376 - INFO - Started autonomous salary detection loop.
|
||||
2025-09-10 11:31:18,405 - INFO - Server running on hostname: 0b21809edf52
|
||||
2025-09-10 11:31:18,447 - INFO - Server IP address: 172.25.0.2
|
||||
2025-09-10 11:31:18,528 - INFO - Server is accessible at:
|
||||
2025-09-10 11:31:18,552 - INFO - - http://localhost:8000
|
||||
2025-09-10 11:31:18,552 - INFO - - http://127.0.0.1:8000
|
||||
2025-09-10 11:31:18,553 - INFO - - http://172.25.0.2:8000
|
||||
2025-09-10 11:31:18,554 - INFO - Pipeline initialized successfully
|
||||
2025-09-19 11:27:20,722 - INFO - Initializing pipeline...
|
||||
2025-09-19 11:27:20,724 - INFO - [2025-09-19 11:27:20] Detecting salary...
|
||||
2025-09-19 11:27:20,724 - INFO - Started autonomous salary detection loop.
|
||||
2025-09-19 11:27:20,750 - INFO - Server running on hostname: 0b21809edf52
|
||||
2025-09-19 11:27:20,752 - INFO - Server IP address: 172.25.0.2
|
||||
2025-09-19 11:27:20,753 - INFO - Server is accessible at:
|
||||
2025-09-19 11:27:20,759 - INFO - - http://localhost:8000
|
||||
2025-09-19 11:27:20,761 - INFO - - http://127.0.0.1:8000
|
||||
2025-09-19 11:27:20,766 - INFO - - http://172.25.0.2:8000
|
||||
2025-09-19 11:27:20,771 - INFO - Pipeline initialized successfully
|
||||
2025-09-19 11:27:24,622 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-19 11:27:24,628 - INFO - [2025-09-19 11:27:24] Salary detection complete
|
||||
2025-09-19 11:29:13,095 - INFO - Shutting down Salary Analytics API...
|
||||
2025-09-24 09:47:38,012 - INFO - Initializing pipeline...
|
||||
2025-09-24 09:47:38,017 - INFO - [2025-09-24 09:47:38] Detecting salary...
|
||||
2025-09-24 09:47:38,024 - INFO - Started autonomous salary detection loop.
|
||||
2025-09-24 09:47:38,077 - INFO - Server running on hostname: 0b21809edf52
|
||||
2025-09-24 09:47:38,079 - INFO - Server IP address: 172.25.0.2
|
||||
2025-09-24 09:47:38,080 - INFO - Server is accessible at:
|
||||
2025-09-24 09:47:38,081 - INFO - - http://localhost:8000
|
||||
2025-09-24 09:47:38,082 - INFO - - http://127.0.0.1:8000
|
||||
2025-09-24 09:47:38,083 - INFO - - http://172.25.0.2:8000
|
||||
2025-09-24 09:47:38,084 - INFO - Pipeline initialized successfully
|
||||
2025-09-24 09:47:41,272 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-24 09:47:41,273 - INFO - [2025-09-24 09:47:41] Salary detection complete
|
||||
2025-09-24 09:49:41,290 - INFO - [2025-09-24 09:49:41] Detecting salary...
|
||||
2025-09-24 09:49:43,791 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-24 09:49:43,791 - INFO - [2025-09-24 09:49:43] Salary detection complete
|
||||
2025-09-24 09:50:21,300 - INFO - Shutting down Salary Analytics API...
|
||||
2025-09-24 09:59:27,300 - INFO - generated new fontManager
|
||||
2025-09-24 09:59:30,871 - INFO - Initializing pipeline...
|
||||
2025-09-24 09:59:30,872 - INFO - [2025-09-24 09:59:30] Detecting salary...
|
||||
2025-09-24 09:59:30,872 - INFO - Started autonomous salary detection loop.
|
||||
2025-09-24 09:59:30,877 - INFO - Server running on hostname: bfe07c2f7da2
|
||||
2025-09-24 09:59:30,878 - INFO - Server IP address: 172.25.0.2
|
||||
2025-09-24 09:59:30,878 - INFO - Server is accessible at:
|
||||
2025-09-24 09:59:30,883 - INFO - - http://localhost:8000
|
||||
2025-09-24 09:59:30,884 - INFO - - http://127.0.0.1:8000
|
||||
2025-09-24 09:59:30,887 - INFO - - http://172.25.0.2:8000
|
||||
2025-09-24 09:59:30,889 - INFO - Pipeline initialized successfully
|
||||
2025-09-24 09:59:32,676 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
|
||||
"data": [],
|
||||
"error": {},
|
||||
"message": "AutoCall Add Salary Successful",
|
||||
"status": true,
|
||||
"statusCode": 200
|
||||
}
|
||||
|
||||
2025-09-24 09:59:32,705 - INFO - [2025-09-24 09:59:32] Salary detection complete
|
||||
@@ -1,17 +1,21 @@
|
||||
from flask import Flask
|
||||
import os
|
||||
from .extensions import db, migrate
|
||||
from app.extensions import db, migrate
|
||||
|
||||
|
||||
|
||||
def create_app():
|
||||
app = Flask(__name__)
|
||||
app.config.from_object('salary_analytics.config')
|
||||
|
||||
# Load configuration from config.py
|
||||
app.config.from_object('app.config')
|
||||
|
||||
# Initialize extensions
|
||||
db.init_app(app)
|
||||
migrate.init_app(app, db)
|
||||
|
||||
# Register blueprints or CLI commands here if needed
|
||||
from . import commands
|
||||
from app.api.commands import commands
|
||||
app.cli.add_command(commands.upload_xls_cli)
|
||||
|
||||
return app
|
||||
@@ -2,8 +2,8 @@ import click
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
from flask.cli import with_appcontext
|
||||
from salary_analytics.app.extensions import db
|
||||
from salary_analytics.app.models import RawTransaction
|
||||
from app.extensions import db
|
||||
from app.models import RawTransaction
|
||||
|
||||
@click.group()
|
||||
def commands():
|
||||
@@ -22,20 +22,32 @@ os.makedirs(PLOTS_DIR, exist_ok=True)
|
||||
os.makedirs(CSV_DIR, exist_ok=True)
|
||||
os.makedirs(MODEL_DIR, exist_ok=True)
|
||||
|
||||
|
||||
# Database Configuration
|
||||
DB_CONFIG = {
|
||||
"user": os.getenv("DB_USER"), # Default value as fallback
|
||||
"password": os.getenv("DB_PASSWORD"),
|
||||
"name": os.getenv("DB_NAME"),
|
||||
"port": os.getenv("DB_PORT"),
|
||||
"host": os.getenv("DB_HOST")
|
||||
"user": os.getenv("DATABASE_USER"),
|
||||
"password": os.getenv("DATABASE_PASSWORD"),
|
||||
"name": os.getenv("DATABASE_NAME"),
|
||||
"port": os.getenv("DATABASE_PORT", 10532),
|
||||
"host": os.getenv("DATABASE_HOST", "firstadvancedev"),
|
||||
"sid": os.getenv("DATABASE_SID", "FREE"),
|
||||
"service_name": os.getenv("DATABASE_SERVICE_NAME", "firstadv")
|
||||
}
|
||||
|
||||
|
||||
# DNS = f"(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST={DB_CONFIG['host']})(PORT={DB_CONFIG['port']}))(CONNECT_DATA=(SID={DB_CONFIG['sid']})))"
|
||||
|
||||
# Database Connection
|
||||
# SQLALCHEMY_DATABASE_URI = (f"oracle+oracledb://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DNS}")
|
||||
SQLALCHEMY_DATABASE_URI = ( f"oracle+oracledb://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/?service_name={DB_CONFIG['service_name']}")
|
||||
|
||||
|
||||
# SQLAlchemy Configuration
|
||||
SQLALCHEMY_DATABASE_URI = (
|
||||
f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@"
|
||||
f"{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}"
|
||||
)
|
||||
# SQLALCHEMY_DATABASE_URI = (
|
||||
# f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@"
|
||||
# f"{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}"
|
||||
# )
|
||||
|
||||
SQLALCHEMY_TRACK_MODIFICATIONS = False
|
||||
|
||||
# Table Configuration
|
||||
@@ -80,7 +92,7 @@ OUTPUT_PATHS = {
|
||||
}
|
||||
|
||||
SIMBRELLA_BASE_URL = os.getenv("SIMBRELLA_BASE_URL", "http://127.0.0.1:6337")
|
||||
SIMBRELLA_ENDPOINT_RAC_CHECKS = os.getenv("SIMBRELLA_ENDPOINT_RAC_CHECKS","api/rac-check")
|
||||
SIMBRELLA_ENDPOINT_RAC_CHECKS = os.getenv("SIMBRELLA_ENDPOINT_RAC_CHECKS", "api/rac-check")
|
||||
|
||||
# Salary Detect Endpoint Config
|
||||
SALARY_DETECT_URL = "http://www.simbrellang.net:5000/autocall/analytic-salary-detect"
|
||||
@@ -0,0 +1,4 @@
|
||||
from .raw_transaction import RawTransaction
|
||||
|
||||
|
||||
__all__ = ['RawTransaction']
|
||||
@@ -0,0 +1,18 @@
|
||||
from app.extensions import db
|
||||
|
||||
|
||||
class Account(db.Model):
|
||||
__tablename__ = "accounts"
|
||||
|
||||
customerid = db.Column(db.String(50), primary_key=True)
|
||||
accountid = db.Column(db.String(11), nullable=False)
|
||||
registrationdate = db.Column(db.DateTime)
|
||||
accountcurrencycode = db.Column(db.String(3))
|
||||
schemecode = db.Column(db.String(5))
|
||||
lastinflowtransactiondate = db.Column(db.DateTime)
|
||||
accountstatus = db.Column(db.String(50))
|
||||
accountstatusdate = db.Column(db.DateTime)
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return f"<Account {self.accountid}>"
|
||||
@@ -0,0 +1,97 @@
|
||||
from sqlalchemy import Column, Integer, String, DateTime, Numeric, Boolean, func
|
||||
from sqlalchemy.orm import declarative_base, Session
|
||||
from datetime import datetime
|
||||
from app.utils.logger import logger
|
||||
from app.extensions import db
|
||||
|
||||
|
||||
class BatchResult(db.Model):
|
||||
__tablename__ = "salary_analytics_batch_results"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
batch_number = Column(Integer, nullable=False)
|
||||
total_batches = Column(Integer, nullable=False)
|
||||
processed_at = Column(DateTime, default=datetime.utcnow)
|
||||
accountid = Column(String, nullable=False)
|
||||
num_months = Column(Integer)
|
||||
least_inflow_6m = Column(Numeric)
|
||||
avg_monthly_salary = Column(Numeric)
|
||||
estimated_next_amount = Column(Numeric)
|
||||
estimated_next_date = Column(DateTime)
|
||||
is_45day_salary = Column(Boolean, default=False)
|
||||
is_2months_salary = Column(Boolean, default=False)
|
||||
status = Column(String, default="success")
|
||||
|
||||
|
||||
@classmethod
|
||||
def save_batch(cls, batch_number, total_batches, results_df, status="success"):
|
||||
"""Save batch results into DB using ORM bulk insert."""
|
||||
try:
|
||||
results_df["batch_number"] = batch_number
|
||||
results_df["total_batches"] = total_batches
|
||||
results_df["processed_at"] = datetime.utcnow()
|
||||
results_df["status"] = status
|
||||
|
||||
# Normalize boolean columns
|
||||
results_df["is_45day_salary"] = results_df.get("45daysalary", False)
|
||||
results_df["is_2months_salary"] = results_df.get("2monthssalary", False)
|
||||
|
||||
# Convert to list of ORM objects
|
||||
records = [
|
||||
cls(**row)
|
||||
for row in results_df.to_dict("records")
|
||||
]
|
||||
|
||||
db.session.bulk_save_objects(records)
|
||||
db.session.commit()
|
||||
logger.info(f"Saved batch {batch_number} successfully.")
|
||||
return True
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
logger.error(f"Error saving batch {batch_number}: {str(e)}")
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def get_batch_status(cls, batch_number: int):
|
||||
"""Return summary info about one batch."""
|
||||
try:
|
||||
result = (
|
||||
db.session.query(
|
||||
cls.batch_number,
|
||||
cls.total_batches,
|
||||
cls.processed_at,
|
||||
func.count().label("total_records"),
|
||||
func.sum(func.case((cls.status == "success", 1), else_=0)).label("successful_records"),
|
||||
func.sum(func.case((cls.status == "error", 1), else_=0)).label("failed_records"),
|
||||
)
|
||||
.filter(cls.batch_number == batch_number)
|
||||
.group_by(cls.batch_number, cls.total_batches, cls.processed_at)
|
||||
.order_by(cls.processed_at.desc())
|
||||
.first()
|
||||
)
|
||||
return dict(result._mapping) if result else None
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching batch {batch_number} status: {str(e)}")
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def get_all_batches(cls):
|
||||
"""Return summaries for all batches."""
|
||||
try:
|
||||
results = (
|
||||
db.session.query(
|
||||
cls.batch_number,
|
||||
cls.total_batches,
|
||||
cls.processed_at,
|
||||
func.count().label("total_records"),
|
||||
func.sum(func.case((cls.status == "success", 1), else_=0)).label("successful_records"),
|
||||
func.sum(func.case((cls.status == "error", 1), else_=0)).label("failed_records"),
|
||||
)
|
||||
.group_by(cls.batch_number, cls.total_batches, cls.processed_at)
|
||||
.order_by(cls.batch_number)
|
||||
.all()
|
||||
)
|
||||
return [dict(r._mapping) for r in results]
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching all batches: {str(e)}")
|
||||
return []
|
||||
@@ -0,0 +1,100 @@
|
||||
from venv import logger
|
||||
from sqlalchemy import Column, Integer, String, Float, DateTime, ForeignKey
|
||||
from sqlalchemy.orm import relationship
|
||||
from app.extensions import db
|
||||
import pandas as pd
|
||||
|
||||
class CustomerAccountTransactionHx(db.Model):
|
||||
__tablename__ = "customer_account_transaction_hx"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
accountid = Column(String(64), nullable=False, index=True)
|
||||
trx_type = Column(String(50), nullable=False)
|
||||
amount = Column(Float, nullable=False)
|
||||
description = Column(String(255))
|
||||
customer_id = Column(String(64))
|
||||
trx_start_date = Column(DateTime, nullable=False)
|
||||
trx_end_date = Column(DateTime)
|
||||
is_salary_related = Column(Integer, default=0)
|
||||
is_consistent_amount = Column(Integer, default=0)
|
||||
is_salary_type = Column(Integer, default=0)
|
||||
|
||||
|
||||
|
||||
@classmethod
|
||||
def get_all(cls):
|
||||
"""Fetch all transactions."""
|
||||
return db.session.query(cls).all()
|
||||
|
||||
@classmethod
|
||||
def get_rows_count(cls):
|
||||
"""Return total number of transaction rows."""
|
||||
try:
|
||||
count = db.session.query(db.func.count(cls.id)).scalar()
|
||||
return count
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting row count: {str(e)}")
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def get_by_account(cls, accountid: str):
|
||||
"""Fetch transactions for a given account."""
|
||||
return db.session.query(cls).filter_by(accountid=accountid).all()
|
||||
|
||||
@classmethod
|
||||
def get_accounts(cls, limit=None):
|
||||
"""Fetch distinct account IDs."""
|
||||
query = db.session.query(cls.accountid).distinct()
|
||||
if limit:
|
||||
query = query.limit(limit)
|
||||
return [row.accountid for row in query.all()]
|
||||
|
||||
@classmethod
|
||||
def insert_transaction(cls, **kwargs):
|
||||
"""Insert a new transaction."""
|
||||
trx = cls(**kwargs)
|
||||
try:
|
||||
db.session.add(trx)
|
||||
db.session.commit()
|
||||
except Exception as e:
|
||||
logger.error(f"Error inserting transaction: {str(e)}")
|
||||
return None
|
||||
return trx
|
||||
|
||||
@classmethod
|
||||
def bulk_insert(cls, transactions: list[dict]):
|
||||
"""Insert multiple transactions at once."""
|
||||
objs = [cls(**trx) for trx in transactions]
|
||||
|
||||
try:
|
||||
db.session.bulk_save_objects(objs)
|
||||
db.session.commit()
|
||||
except Exception as e:
|
||||
logger.error(f"Error in bulk insert: {str(e)}")
|
||||
return None
|
||||
return objs
|
||||
|
||||
@classmethod
|
||||
def get_transactions_df(cls, accountids: list[str] = None):
|
||||
"""Return a Pandas DataFrame for ML model preparation."""
|
||||
query = db.session.query(cls)
|
||||
if accountids:
|
||||
query = query.filter(cls.accountid.in_(accountids))
|
||||
rows = query.all()
|
||||
|
||||
|
||||
df = pd.DataFrame([{
|
||||
"id": trx.id,
|
||||
"accountid": trx.accountid,
|
||||
"trx_type": trx.trx_type,
|
||||
"amount": trx.amount,
|
||||
"description": trx.description,
|
||||
"customer_id": trx.customer_id,
|
||||
"trx_start_date": trx.trx_start_date,
|
||||
"trx_end_date": trx.trx_end_date,
|
||||
"is_salary_related": trx.is_salary_related,
|
||||
"is_consistent_amount": trx.is_consistent_amount,
|
||||
"is_salary_type": trx.is_salary_type,
|
||||
} for trx in rows])
|
||||
|
||||
return df
|
||||
@@ -2,12 +2,10 @@
|
||||
Database operations module for salary analytics.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from sqlalchemy import text
|
||||
from .config import BATCH_RESULTS_TABLE
|
||||
from app.config import BATCH_RESULTS_TABLE
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from app.utils.logger import logger
|
||||
|
||||
class DatabaseOperations:
|
||||
def __init__(self, engine):
|
||||
@@ -1,4 +1,4 @@
|
||||
from .extensions import db
|
||||
from app.extensions import db
|
||||
|
||||
class RawTransaction(db.Model):
|
||||
__tablename__ = 'analytics_raw_transactions'
|
||||
@@ -0,0 +1,36 @@
|
||||
from app.extensions import db
|
||||
|
||||
class SimbrellaCustomer(db.Model):
|
||||
__tablename__ = "simbrella_customers"
|
||||
|
||||
customerid = db.Column(db.String(500), primary_key=True)
|
||||
acct_opn_date = db.Column(db.DateTime)
|
||||
gender = db.Column(db.String(20))
|
||||
birth_date = db.Column(db.DateTime)
|
||||
msisdn = db.Column(db.String(100))
|
||||
isbvnvalid = db.Column(db.String(100))
|
||||
datebvnvalidated = db.Column(db.DateTime)
|
||||
iscrmsvalid = db.Column(db.String(100))
|
||||
datecrmsvalidated = db.Column(db.DateTime)
|
||||
iscrcvalid = db.Column(db.String(100))
|
||||
datecrcvalidated = db.Column(db.DateTime)
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return f"<SimbrellaCustomer {self.customerid}>"
|
||||
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"customerid": self.customerid,
|
||||
"acct_opn_date": self.acct_opn_date,
|
||||
"gender": self.gender,
|
||||
"birth_date": self.birth_date,
|
||||
"msisdn": self.msisdn,
|
||||
"isbvnvalid": self.isbvnvalid,
|
||||
"datebvnvalidated": self.datebvnvalidated,
|
||||
"iscrmsvalid": self.iscrmsvalid,
|
||||
"datecrmsvalidated": self.datecrmsvalidated,
|
||||
"iscrcvalid": self.iscrcvalid,
|
||||
"datecrcvalidated": self.datecrcvalidated,
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
from app.extensions import db
|
||||
|
||||
|
||||
class TempSimbrellaLien(db.Model):
|
||||
__tablename__ = "tmp_simbrella_lien"
|
||||
|
||||
lienid = db.Column(db.String(33), primary_key=True)
|
||||
customerid = db.Column(db.String(500))
|
||||
accountid = db.Column(db.String(11))
|
||||
lien_start_date = db.Column(db.DateTime)
|
||||
action = db.Column(db.String(5))
|
||||
restriction_nature = db.Column(db.String(150))
|
||||
claim_amount = db.Column(db.Numeric(20, 4))
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return f"<TempSimbrellaLien {self.lienid}>"
|
||||
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"lienid": self.lienid,
|
||||
"customerid": self.customerid,
|
||||
"accountid": self.accountid,
|
||||
"lien_start_date": self.lien_start_date,
|
||||
"action": self.action,
|
||||
"restriction_nature": self.restriction_nature,
|
||||
"claim_amount": float(self.claim_amount) if self.claim_amount is not None else None,
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
from app.extensions import db
|
||||
|
||||
|
||||
class Transaction(db.Model):
|
||||
__tablename__ = "transactions"
|
||||
|
||||
tran_id = db.Column(db.String(50), primary_key=True)
|
||||
cif_id = db.Column(db.String(500))
|
||||
foracid = db.Column(db.String(150))
|
||||
acid = db.Column(db.String(150))
|
||||
tran_date = db.Column(db.DateTime)
|
||||
value_date = db.Column(db.DateTime)
|
||||
pstd_date = db.Column(db.DateTime)
|
||||
tran_sub_type = db.Column(db.String(50))
|
||||
part_tran_type = db.Column(db.String(50))
|
||||
tran_crncy_code = db.Column(db.String(50))
|
||||
tran_amt = db.Column(db.Numeric(38, 0))
|
||||
tran_particular = db.Column(db.String(250))
|
||||
origination_channel = db.Column(db.String(150))
|
||||
reversal_tran_id = db.Column(db.String(50))
|
||||
isreversal = db.Column(db.String(50))
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return f"<Transaction {self.tran_id}>"
|
||||
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"tran_id": self.tran_id,
|
||||
"cif_id": self.cif_id,
|
||||
"foracid": self.foracid,
|
||||
"acid": self.acid,
|
||||
"tran_date": self.tran_date,
|
||||
"value_date": self.value_date,
|
||||
"pstd_date": self.pstd_date,
|
||||
"tran_sub_type": self.tran_sub_type,
|
||||
"part_tran_type": self.part_tran_type,
|
||||
"tran_crncy_code": self.tran_crncy_code,
|
||||
"tran_amt": float(self.tran_amt) if self.tran_amt is not None else None,
|
||||
"tran_particular": self.tran_particular,
|
||||
"origination_channel": self.origination_channel,
|
||||
"reversal_tran_id": self.reversal_tran_id,
|
||||
"isreversal": self.isreversal,
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
from app.extensions import db
|
||||
|
||||
|
||||
class TransactionStg(db.Model):
|
||||
__tablename__ = "transaction_stg"
|
||||
|
||||
tran_id = db.Column(db.String(50), primary_key=True)
|
||||
cif_id = db.Column(db.String(500))
|
||||
foracid = db.Column(db.String(150))
|
||||
acid = db.Column(db.String(150))
|
||||
tran_date = db.Column(db.String(50)) # staging table keeps as string
|
||||
value_date = db.Column(db.String(50))
|
||||
pstd_date = db.Column(db.String(50))
|
||||
tran_sub_type = db.Column(db.String(50))
|
||||
part_tran_type = db.Column(db.String(50))
|
||||
tran_crncy_code = db.Column(db.String(50))
|
||||
tran_amt = db.Column(db.Numeric(38, 0))
|
||||
tran_particular = db.Column(db.String(250))
|
||||
origination_channel = db.Column(db.String(150))
|
||||
reversal_tran_id = db.Column(db.String(50))
|
||||
isreversal = db.Column(db.String(50))
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return f"<TransactionStg {self.tran_id}>"
|
||||
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"tran_id": self.tran_id,
|
||||
"cif_id": self.cif_id,
|
||||
"foracid": self.foracid,
|
||||
"acid": self.acid,
|
||||
"tran_date": self.tran_date,
|
||||
"value_date": self.value_date,
|
||||
"pstd_date": self.pstd_date,
|
||||
"tran_sub_type": self.tran_sub_type,
|
||||
"part_tran_type": self.part_tran_type,
|
||||
"tran_crncy_code": self.tran_crncy_code,
|
||||
"tran_amt": float(self.tran_amt) if self.tran_amt is not None else None,
|
||||
"tran_particular": self.tran_particular,
|
||||
"origination_channel": self.origination_channel,
|
||||
"reversal_tran_id": self.reversal_tran_id,
|
||||
"isreversal": self.isreversal,
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
from fastapi import FastAPI
|
||||
from app.salary_analytics.routes import analysis, reports, pipeline, load, base, train
|
||||
from app.salary_analytics.middlewares.middleware import add_middlewares
|
||||
from app.salary_analytics.events.lifecycle import register_events
|
||||
from app.utils.logger import logger
|
||||
import socket
|
||||
|
||||
"""
|
||||
Salary Analytics Package
|
||||
A package for analyzing and predicting salary patterns from transaction data.
|
||||
"""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
|
||||
|
||||
def create_app() -> FastAPI:
|
||||
app = FastAPI(
|
||||
title="Salary Analytics API",
|
||||
description="API for analyzing and predicting salary patterns from transaction data",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
# Middlewares
|
||||
add_middlewares(app)
|
||||
|
||||
# Events
|
||||
register_events(app)
|
||||
|
||||
# Routers
|
||||
app.include_router(base.router, tags=["Base"])
|
||||
app.include_router(analysis.router, prefix="/analyze", tags=["Analysis"])
|
||||
app.include_router(reports.router, tags=["Reports"])
|
||||
app.include_router(pipeline.router, tags=["Pipeline"])
|
||||
app.include_router(load.router, tags=["Data"])
|
||||
app.include_router(train.router, tags=["Model Training"])
|
||||
|
||||
|
||||
return app
|
||||
@@ -0,0 +1,64 @@
|
||||
from app.salary_analytics.services.main import SalaryAnalyticsPipeline
|
||||
from app.salary_analytics.services.data_loader import DataLoader
|
||||
|
||||
|
||||
class GlobalState:
|
||||
def __init__(self):
|
||||
self._pipeline = None
|
||||
self._data_loader = None
|
||||
self.df = None
|
||||
self.salary_predictor = None
|
||||
self.salary_earner_analyzer = None
|
||||
|
||||
# ---- Pipeline ----
|
||||
@property
|
||||
def pipeline(self):
|
||||
if self._pipeline is None:
|
||||
self._pipeline = SalaryAnalyticsPipeline()
|
||||
return self._pipeline
|
||||
|
||||
@pipeline.setter
|
||||
def pipeline(self, value):
|
||||
self._pipeline = value
|
||||
|
||||
# ---- Data Loader ----
|
||||
@property
|
||||
def data_loader(self):
|
||||
if self._data_loader is None:
|
||||
self._data_loader = DataLoader()
|
||||
return self._data_loader
|
||||
|
||||
@data_loader.setter
|
||||
def data_loader(self, value):
|
||||
self._data_loader = value
|
||||
|
||||
# ---- DataFrame ----
|
||||
@property
|
||||
def df(self):
|
||||
return self._df
|
||||
|
||||
@df.setter
|
||||
def df(self, value):
|
||||
self._df = value
|
||||
|
||||
# ---- Salary Predictor ----
|
||||
@property
|
||||
def salary_predictor(self):
|
||||
return self._salary_predictor
|
||||
|
||||
@salary_predictor.setter
|
||||
def salary_predictor(self, value):
|
||||
self._salary_predictor = value
|
||||
|
||||
# ---- Salary Earner Analyzer ----
|
||||
@property
|
||||
def salary_earner_analyzer(self):
|
||||
return self._salary_earner_analyzer
|
||||
|
||||
@salary_earner_analyzer.setter
|
||||
def salary_earner_analyzer(self, value):
|
||||
self._salary_earner_analyzer = value
|
||||
|
||||
|
||||
state = GlobalState()
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
import socket
|
||||
from fastapi import FastAPI
|
||||
from app.salary_analytics.integrations.salary_detect import SalaryDetect
|
||||
from app.utils.logger import logger
|
||||
|
||||
salary_detect = SalaryDetect()
|
||||
|
||||
|
||||
def register_events(app: FastAPI):
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
"""Initialize the pipeline on startup."""
|
||||
try:
|
||||
logger.info("Initializing pipeline...")
|
||||
|
||||
# Start autonomous salary detection loop
|
||||
salary_detect.start()
|
||||
logger.info("Started autonomous salary detection loop.")
|
||||
|
||||
# Print network information
|
||||
hostname = socket.gethostname()
|
||||
ip_address = socket.gethostbyname(hostname)
|
||||
logger.info(f"Server running on hostname: {hostname}")
|
||||
logger.info(f"Server IP address: {ip_address}")
|
||||
logger.info(f"Server is accessible at:")
|
||||
logger.info(f"- http://localhost:8000")
|
||||
logger.info(f"- http://127.0.0.1:8000")
|
||||
logger.info(f"- http://{ip_address}:8000")
|
||||
logger.info("Pipeline initialized successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Error during startup: {str(e)}")
|
||||
raise
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown_event():
|
||||
logger.info("Shutting down Salary Analytics API...")
|
||||
@@ -0,0 +1,12 @@
|
||||
from fastapi import HTTPException
|
||||
from app.salary_analytics.core.state import state
|
||||
|
||||
|
||||
def check_data_loaded():
|
||||
"""Raise HTTP 400 if no data is loaded into the pipeline."""
|
||||
if state.pipeline.df is None:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="No data loaded. Please load data first using the /load-data endpoint."
|
||||
)
|
||||
return True
|
||||
@@ -0,0 +1,17 @@
|
||||
from typing import Optional, Dict, List, Union
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class AnalysisResponse(BaseModel):
|
||||
"""Response model for analysis endpoints."""
|
||||
message: str
|
||||
data: Optional[Dict] = None
|
||||
file_path: Optional[str] = None
|
||||
|
||||
class BatchResponse(BaseModel):
|
||||
"""Response model for batch processing."""
|
||||
batch_number: int
|
||||
total_batches: int
|
||||
processed_rows: int
|
||||
results_path: str
|
||||
message: str
|
||||
@@ -1,10 +1,8 @@
|
||||
from django.conf import settings
|
||||
import httpx
|
||||
import json
|
||||
from salary_analytics.config import SIMBRELLA_BASE_URL, SIMBRELLA_ENDPOINT_RAC_CHECKS
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from app.config import SIMBRELLA_BASE_URL, SIMBRELLA_ENDPOINT_RAC_CHECKS
|
||||
from app.utils.logger import logger
|
||||
|
||||
class SimbrellaIntegration:
|
||||
BASE_URL = SIMBRELLA_BASE_URL
|
||||
+2
-5
@@ -1,11 +1,8 @@
|
||||
import time
|
||||
import logging
|
||||
import threading
|
||||
import requests
|
||||
from .config import SALARY_DETECT_URL, SALARY_DETECT_HEADERS, get_random_salary_payload
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
from app.config import SALARY_DETECT_URL, SALARY_DETECT_HEADERS, get_random_salary_payload
|
||||
from app.utils.logger import logger
|
||||
|
||||
class SalaryDetect:
|
||||
def __init__(self):
|
||||
@@ -0,0 +1,11 @@
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi import FastAPI
|
||||
|
||||
def add_middlewares(app: FastAPI):
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
@@ -0,0 +1,75 @@
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from app.salary_analytics.helpers.response_helpers import AnalysisResponse
|
||||
from app.salary_analytics.helpers.data_checks import check_data_loaded
|
||||
from app.utils.logger import logger
|
||||
import time
|
||||
from app.salary_analytics.core.state import state
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/keyword", response_model=AnalysisResponse)
|
||||
async def analyze_keyword():
|
||||
"""Run keyword-based salary transaction analysis."""
|
||||
start_time = time.time()
|
||||
try:
|
||||
check_data_loaded()
|
||||
logger.info("Starting keyword analysis...")
|
||||
data = state.pipeline.run_keyword_analysis()
|
||||
logger.info(f"Keyword analysis completed. Found {len(data)} matches")
|
||||
response = AnalysisResponse(
|
||||
message="Keyword analysis completed successfully",
|
||||
data={"count": len(data)}
|
||||
)
|
||||
logger.info(f"Keyword analysis endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error in keyword analysis: {str(e)}")
|
||||
logger.info(f"Keyword analysis endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
|
||||
@router.post("/consistent-amount", response_model=AnalysisResponse)
|
||||
async def analyze_consistent_amount():
|
||||
"""Run consistent amount transaction analysis."""
|
||||
start_time = time.time()
|
||||
try:
|
||||
check_data_loaded()
|
||||
logger.info("Starting consistent amount analysis...")
|
||||
data = state.pipeline.run_consistent_amount_analysis()
|
||||
logger.info(f"Consistent amount analysis completed. Found {len(data)} matches")
|
||||
response = AnalysisResponse(
|
||||
message="Consistent amount analysis completed successfully",
|
||||
data={"count": len(data)}
|
||||
)
|
||||
logger.info(f"Consistent amount analysis endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error in consistent amount analysis: {str(e)}")
|
||||
logger.info(f"Consistent amount analysis endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
|
||||
|
||||
@router.post("/transaction-type", response_model=AnalysisResponse)
|
||||
async def analyze_transaction_type():
|
||||
"""Run transaction type analysis."""
|
||||
start_time = time.time()
|
||||
try:
|
||||
check_data_loaded()
|
||||
logger.info("Starting transaction type analysis...")
|
||||
data = state.pipeline.run_transaction_type_analysis()
|
||||
logger.info(f"Transaction type analysis completed. Found {len(data)} matches")
|
||||
response = AnalysisResponse(
|
||||
message="Transaction type analysis completed successfully",
|
||||
data={"count": len(data)}
|
||||
)
|
||||
logger.info(f"Transaction type analysis endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error in transaction type analysis: {str(e)}")
|
||||
logger.info(f"Transaction type analysis endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
from fastapi import APIRouter
|
||||
from app.utils.logger import logger
|
||||
import time
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@router.get("/")
|
||||
async def root():
|
||||
"""Root endpoint."""
|
||||
start_time = time.time()
|
||||
logger.info("Root endpoint accessed")
|
||||
response = {"message": "Welcome to Salary Analytics API"}
|
||||
logger.info(f"Root endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint."""
|
||||
start_time = time.time()
|
||||
logger.info("Health check endpoint accessed")
|
||||
response = {"status": "healthy"}
|
||||
logger.info(f"Health check completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
from fastapi import APIRouter, HTTPException, UploadFile, File
|
||||
from app.salary_analytics.core.state import state
|
||||
from app.utils.logger import logger
|
||||
import tempfile, os, time
|
||||
from typing import Optional
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
|
||||
@router.post("/load-data")
|
||||
async def load_data(source: str = "db", file: Optional[UploadFile] = File(None)):
|
||||
"""
|
||||
Load data from either database or CSV file.
|
||||
|
||||
Args:
|
||||
source (str): Source of data ('db' or 'csv')
|
||||
file (UploadFile, optional): CSV file to load (required if source is 'csv')
|
||||
|
||||
Returns:
|
||||
dict: Status of data loading
|
||||
"""
|
||||
start_time = time.time()
|
||||
try:
|
||||
if source not in ['db', 'csv']:
|
||||
logger.error(f"Invalid source: {source}")
|
||||
logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'")
|
||||
|
||||
if source == 'csv' and not file:
|
||||
logger.error("No file provided for CSV source")
|
||||
logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=400, detail="File must be provided when loading from CSV")
|
||||
|
||||
if source == 'csv':
|
||||
# Save uploaded file temporarily
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file:
|
||||
content = await file.read()
|
||||
temp_file.write(content)
|
||||
temp_file_path = temp_file.name
|
||||
|
||||
try:
|
||||
success = state.pipeline.load_data(source='csv', file_path=temp_file_path)
|
||||
finally:
|
||||
# Clean up temporary file
|
||||
os.unlink(temp_file_path)
|
||||
else:
|
||||
success = state.pipeline.load_data(source='db')
|
||||
|
||||
if not success:
|
||||
logger.error("Failed to load data")
|
||||
logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail="Failed to load data")
|
||||
|
||||
response = {
|
||||
"status": "success",
|
||||
"message": f"Successfully loaded {len(state.pipeline.df)} rows of data",
|
||||
"columns": state.pipeline.df.columns.tolist(),
|
||||
"row_count": len(state.pipeline.df)
|
||||
}
|
||||
logger.info(f"Load data endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading data: {str(e)}")
|
||||
logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/load-data-with-file")
|
||||
async def get_file_if_csv(source: str, file: Optional[UploadFile] = File(None)):
|
||||
"""Dependency to handle file upload only when source is csv."""
|
||||
if source == 'csv' and not file:
|
||||
raise HTTPException(status_code=400, detail="File must be provided when loading from CSV")
|
||||
return file
|
||||
@@ -0,0 +1,292 @@
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from app.salary_analytics.services.main import SalaryAnalyticsPipeline
|
||||
from app.salary_analytics.helpers.response_helpers import AnalysisResponse, BatchResponse
|
||||
from app.salary_analytics.helpers.data_checks import check_data_loaded
|
||||
from app.salary_analytics.services.data_loader import DataLoader
|
||||
from app.salary_analytics.core.state import state
|
||||
from app.models.db_operations import DatabaseOperations
|
||||
from app.config import OUTPUT_PATHS, TABLE_NAME
|
||||
from app.utils.logger import logger
|
||||
from typing import Optional, List, Union
|
||||
from sqlalchemy import text
|
||||
from datetime import datetime
|
||||
import pandas as pd, os, tempfile, time
|
||||
from typing import Optional, Union
|
||||
from fastapi import UploadFile, File
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/run/pipeline", response_model=AnalysisResponse)
|
||||
async def run_full_pipeline():
|
||||
"""Run the complete salary analytics pipeline."""
|
||||
start_time = time.time()
|
||||
try:
|
||||
check_data_loaded()
|
||||
logger.info("Starting full pipeline...")
|
||||
success = state.pipeline.run_full_pipeline()
|
||||
if not success:
|
||||
logger.error("Pipeline failed")
|
||||
logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail="Pipeline failed")
|
||||
|
||||
logger.info("Pipeline completed successfully")
|
||||
response = AnalysisResponse(
|
||||
message="Pipeline completed successfully"
|
||||
)
|
||||
logger.info(f"Full pipeline endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error in pipeline: {str(e)}")
|
||||
logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/run/streaming-pipeline", response_model=List[BatchResponse])
|
||||
async def run_streaming_pipeline(
|
||||
source: str = "db",
|
||||
batch_size: int = 10000,
|
||||
file: Optional[Union[UploadFile, str]] = File(None)
|
||||
):
|
||||
"""
|
||||
Run the complete salary analytics pipeline in batches.
|
||||
|
||||
Args:
|
||||
source (str): Source of data ('db' or 'csv')
|
||||
batch_size (int): Number of rows to process in each batch
|
||||
file (UploadFile, optional): CSV file to load (required if source is 'csv')
|
||||
|
||||
Returns:
|
||||
List[BatchResponse]: List of responses for each batch processed
|
||||
"""
|
||||
start_time = time.time()
|
||||
try:
|
||||
if source not in ['db', 'csv']:
|
||||
logger.error(f"Invalid source: {source}")
|
||||
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'")
|
||||
|
||||
if source == 'csv' and not file:
|
||||
logger.error("No file provided for CSV source")
|
||||
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=400, detail="File must be provided when loading from CSV")
|
||||
|
||||
# Initialize data loader
|
||||
state.data_loader = DataLoader()
|
||||
state.data_loader.chunk_size = batch_size
|
||||
|
||||
# Create output directory for batch results
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
batch_output_dir = os.path.join(os.path.dirname(OUTPUT_PATHS['final_table']), f"batch_results_{timestamp}")
|
||||
os.makedirs(batch_output_dir, exist_ok=True)
|
||||
|
||||
# Initialize database operations
|
||||
if not state.data_loader.connect():
|
||||
logger.error("Failed to connect to database")
|
||||
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail="Failed to connect to database")
|
||||
|
||||
db_ops = DatabaseOperations(state.data_loader.engine)
|
||||
if not db_ops.create_batch_results_table():
|
||||
logger.error("Failed to create batch results table")
|
||||
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail="Failed to create batch results table")
|
||||
|
||||
responses = []
|
||||
batch_number = 0
|
||||
batch_start_time = time.time()
|
||||
|
||||
def preprocess_chunk(chunk):
|
||||
"""Preprocess a chunk of data with the same logic as DataLoader."""
|
||||
# Convert dates
|
||||
chunk['trx_start_date'] = pd.to_datetime(chunk['trx_start_date'])
|
||||
chunk['trx_end_date'] = pd.to_datetime(chunk['trx_end_date'])
|
||||
|
||||
# Rename columns
|
||||
chunk = chunk.rename(columns={
|
||||
'd1': 'trx_type',
|
||||
'd2': 'trx_subtype',
|
||||
'd3': 'initiated_by',
|
||||
'd4': 'customer_id'
|
||||
})
|
||||
|
||||
chunk = chunk.dropna()
|
||||
|
||||
return chunk
|
||||
|
||||
if source == 'csv':
|
||||
# Save uploaded file temporarily
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file:
|
||||
content = await file.read()
|
||||
temp_file.write(content)
|
||||
temp_file_path = temp_file.name
|
||||
|
||||
try:
|
||||
# Process CSV in chunks
|
||||
for chunk in pd.read_csv(temp_file_path, chunksize=batch_size):
|
||||
batch_number += 1
|
||||
logger.info(f"Processing batch {batch_number}")
|
||||
|
||||
# Preprocess chunk
|
||||
chunk = preprocess_chunk(chunk)
|
||||
|
||||
# Run pipeline on chunk
|
||||
state.pipeline = SalaryAnalyticsPipeline()
|
||||
state.pipeline.df = chunk
|
||||
|
||||
try:
|
||||
batch_start_time = time.time()
|
||||
# Run analyses
|
||||
state.pipeline.run_keyword_analysis()
|
||||
state.pipeline.run_consistent_amount_analysis()
|
||||
state.pipeline.run_transaction_type_analysis()
|
||||
|
||||
# Generate reports
|
||||
reports = state.pipeline.generate_salary_earner_reports()
|
||||
|
||||
# Add batch metadata to results
|
||||
results_df = reports['final_table'].copy()
|
||||
results_df['batch_number'] = batch_number
|
||||
results_df['total_batches'] = -1 # Unknown for CSV
|
||||
results_df['processed_at'] = datetime.now()
|
||||
|
||||
# Save batch results to CSV
|
||||
batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv")
|
||||
results_df.to_csv(batch_results_path, index=False)
|
||||
|
||||
# Save to database
|
||||
db_ops.save_batch_to_db(
|
||||
batch_number=batch_number,
|
||||
total_batches=-1, # Unknown for CSV
|
||||
results_df=results_df,
|
||||
status="success"
|
||||
)
|
||||
|
||||
logger.info(f"Batch {batch_number} processed in {time.time() - batch_start_time:.2f} seconds")
|
||||
|
||||
responses.append(BatchResponse(
|
||||
batch_number=batch_number,
|
||||
total_batches=-1, # Unknown for CSV
|
||||
processed_rows=len(chunk),
|
||||
results_path=batch_results_path,
|
||||
message=f"Successfully processed batch {batch_number}"
|
||||
))
|
||||
except Exception as e:
|
||||
error_message = str(e)
|
||||
logger.error(f"Error processing batch {batch_number}: {error_message}")
|
||||
|
||||
# Save error to database
|
||||
db_ops.save_batch_to_db(
|
||||
batch_number=batch_number,
|
||||
total_batches=-1,
|
||||
results_df=pd.DataFrame(), # Empty DataFrame for error case
|
||||
status="error"
|
||||
)
|
||||
|
||||
responses.append(BatchResponse(
|
||||
batch_number=batch_number,
|
||||
total_batches=-1,
|
||||
processed_rows=len(chunk),
|
||||
results_path="",
|
||||
message=f"Error processing batch {batch_number}: {error_message}"
|
||||
))
|
||||
finally:
|
||||
# Clean up temporary file
|
||||
os.unlink(temp_file_path)
|
||||
else:
|
||||
# Process database in chunks
|
||||
if not state.data_loader.connect():
|
||||
raise HTTPException(status_code=500, detail="Failed to connect to database")
|
||||
|
||||
# Get total row count
|
||||
with state.data_loader.engine.connect() as conn:
|
||||
count_query = text(f"SELECT COUNT(*) FROM {TABLE_NAME}")
|
||||
total_rows = conn.execute(count_query).scalar()
|
||||
|
||||
total_batches = (total_rows + batch_size - 1) // batch_size
|
||||
offset = 0
|
||||
|
||||
while offset < total_rows:
|
||||
batch_number += 1
|
||||
logger.info(f"Processing batch {batch_number} of {total_batches}")
|
||||
|
||||
# Load chunk from database
|
||||
query = f"SELECT * FROM {TABLE_NAME} LIMIT {batch_size} OFFSET {offset}"
|
||||
chunk = pd.read_sql(query, state.data_loader.engine)
|
||||
|
||||
if chunk.empty:
|
||||
break
|
||||
|
||||
# Preprocess chunk
|
||||
chunk = preprocess_chunk(chunk)
|
||||
|
||||
# Run pipeline on chunk
|
||||
pipeline = SalaryAnalyticsPipeline()
|
||||
state.pipeline.df = chunk
|
||||
|
||||
try:
|
||||
batch_start_time = time.time()
|
||||
# Run analyses
|
||||
state.pipeline.run_keyword_analysis()
|
||||
state.pipeline.run_consistent_amount_analysis()
|
||||
state.pipeline.run_transaction_type_analysis()
|
||||
|
||||
# Generate reports
|
||||
reports = state.pipeline.generate_salary_earner_reports()
|
||||
|
||||
# Add batch metadata to results
|
||||
results_df = reports['final_table'].copy()
|
||||
results_df['batch_number'] = batch_number
|
||||
results_df['total_batches'] = total_batches
|
||||
results_df['processed_at'] = datetime.now()
|
||||
|
||||
# Save batch results to CSV
|
||||
batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv")
|
||||
results_df.to_csv(batch_results_path, index=False)
|
||||
|
||||
# Save to database
|
||||
db_ops.save_batch_to_db(
|
||||
batch_number=batch_number,
|
||||
total_batches=total_batches,
|
||||
results_df=results_df,
|
||||
status="success"
|
||||
)
|
||||
|
||||
logger.info(f"Batch {batch_number} of {total_batches} processed in {time.time() - batch_start_time:.2f} seconds")
|
||||
|
||||
responses.append(BatchResponse(
|
||||
batch_number=batch_number,
|
||||
total_batches=total_batches,
|
||||
processed_rows=len(chunk),
|
||||
results_path=batch_results_path,
|
||||
message=f"Successfully processed batch {batch_number} of {total_batches}"
|
||||
))
|
||||
except Exception as e:
|
||||
error_message = str(e)
|
||||
logger.error(f"Error processing batch {batch_number}: {error_message}")
|
||||
|
||||
# Save error to database
|
||||
db_ops.save_batch_to_db(
|
||||
batch_number=batch_number,
|
||||
total_batches=total_batches,
|
||||
results_df=pd.DataFrame(), # Empty DataFrame for error case
|
||||
status="error"
|
||||
)
|
||||
|
||||
responses.append(BatchResponse(
|
||||
batch_number=batch_number,
|
||||
total_batches=total_batches,
|
||||
processed_rows=len(chunk),
|
||||
results_path="",
|
||||
message=f"Error processing batch {batch_number}: {error_message}"
|
||||
))
|
||||
|
||||
offset += batch_size
|
||||
|
||||
logger.info(f"Streaming pipeline endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return responses
|
||||
except Exception as e:
|
||||
logger.error(f"Error in streaming pipeline: {str(e)}")
|
||||
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
from fastapi import APIRouter, HTTPException, BackgroundTasks
|
||||
from fastapi.responses import FileResponse
|
||||
from app.salary_analytics.helpers.response_helpers import AnalysisResponse
|
||||
from app.salary_analytics.helpers.data_checks import check_data_loaded
|
||||
from app.salary_analytics.core.state import state
|
||||
from app.config import OUTPUT_PATHS
|
||||
from app.utils.logger import logger
|
||||
import os, time
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/generate/reports", response_model=AnalysisResponse)
|
||||
async def generate_reports(background_tasks: BackgroundTasks):
|
||||
"""Generate salary earner reports."""
|
||||
start_time = time.time()
|
||||
try:
|
||||
check_data_loaded()
|
||||
logger.info("Starting report generation...")
|
||||
reports = state.pipeline.generate_salary_earner_reports()
|
||||
logger.info("Reports generated successfully")
|
||||
response = AnalysisResponse(
|
||||
message="Reports generated successfully",
|
||||
data={
|
||||
"verified_salary_earners": len(reports['final_table']),
|
||||
"likely_salary_earners": len(reports['likely_salary_earner']),
|
||||
"high_earners": reports['total_high_earners']
|
||||
}
|
||||
)
|
||||
logger.info(f"Report generation endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error in report generation: {str(e)}")
|
||||
logger.info(f"Report generation endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
|
||||
@router.get("/download/{report_type}")
|
||||
async def download_report(report_type: str):
|
||||
"""Download generated reports."""
|
||||
start_time = time.time()
|
||||
try:
|
||||
check_data_loaded()
|
||||
logger.info(f"Attempting to download report: {report_type}")
|
||||
file_paths = {
|
||||
"high_earners": OUTPUT_PATHS["high_earner_details"],
|
||||
"likely_earners": OUTPUT_PATHS["likely_salary_earner"],
|
||||
"final_table": OUTPUT_PATHS["final_table"],
|
||||
"consistent_plot": OUTPUT_PATHS["consistent_earners_plot"],
|
||||
"inconsistent_plot": OUTPUT_PATHS["inconsistent_earners_plot"],
|
||||
"hypothesis_plot": OUTPUT_PATHS["hypothesis_overlap_plot"]
|
||||
}
|
||||
|
||||
if report_type not in file_paths:
|
||||
logger.error(f"Report type not found: {report_type}")
|
||||
logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=404, detail="Report type not found")
|
||||
|
||||
file_path = file_paths[report_type]
|
||||
if not os.path.exists(file_path):
|
||||
logger.error(f"Report file not found: {file_path}")
|
||||
logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=404, detail="Report file not found")
|
||||
|
||||
logger.info(f"Successfully found report file: {file_path}")
|
||||
response = FileResponse(
|
||||
path=file_path,
|
||||
filename=os.path.basename(file_path),
|
||||
media_type="application/octet-stream"
|
||||
)
|
||||
logger.info(f"Download endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading report: {str(e)}")
|
||||
logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
import time
|
||||
import logging
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from app.salary_analytics.services.main import SalaryAnalyticsPipeline
|
||||
from app.salary_analytics.helpers.data_checks import check_data_loaded
|
||||
from app.salary_analytics.helpers.response_helpers import AnalysisResponse
|
||||
from app.salary_analytics.core.state import state
|
||||
from app.utils.logger import logger
|
||||
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/train/models", response_model=AnalysisResponse)
|
||||
async def train_models():
|
||||
"""Train salary prediction models."""
|
||||
start_time = time.time()
|
||||
try:
|
||||
check_data_loaded()
|
||||
logger.info("Starting model training...")
|
||||
state.pipeline.train_salary_prediction_models()
|
||||
logger.info("Models trained successfully")
|
||||
response = AnalysisResponse(
|
||||
message="Models trained successfully"
|
||||
)
|
||||
logger.info(f"Model training endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error in model training: {str(e)}")
|
||||
logger.info(f"Model training endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
from .main import SalaryAnalyticsPipeline
|
||||
from .data_loader import DataLoader
|
||||
from .keyword_analyzer import KeywordAnalyzer
|
||||
from .consistent_amount_analyzer import ConsistentAmountAnalyzer
|
||||
from .transaction_type_analyzer import TransactionTypeAnalyzer
|
||||
from .salary_earner_analyzer import SalaryEarnerAnalyzer
|
||||
from .salary_predictor import SalaryPredictor
|
||||
|
||||
|
||||
"""
|
||||
Salary Analytics Package
|
||||
A package for analyzing and predicting salary patterns from transaction data.
|
||||
"""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
__all__ = [
|
||||
"SalaryAnalyticsPipeline",
|
||||
"DataLoader",
|
||||
"KeywordAnalyzer",
|
||||
"ConsistentAmountAnalyzer",
|
||||
"TransactionTypeAnalyzer",
|
||||
"SalaryEarnerAnalyzer",
|
||||
"SalaryPredictor"
|
||||
]
|
||||
+1
-1
@@ -3,7 +3,7 @@ Consistent amount transaction analysis module.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
from .config import MODEL_CONFIG
|
||||
from app.config import MODEL_CONFIG
|
||||
|
||||
class ConsistentAmountAnalyzer:
|
||||
def __init__(self, df):
|
||||
@@ -7,9 +7,8 @@ import pandas as pd
|
||||
from datetime import datetime
|
||||
import logging
|
||||
import os
|
||||
from .config import DB_CONFIG, TABLE_NAME
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from app.config import SQLALCHEMY_DATABASE_URI, TABLE_NAME
|
||||
from app.utils.logger import logger
|
||||
|
||||
class DataLoader:
|
||||
def __init__(self):
|
||||
@@ -21,8 +20,7 @@ class DataLoader:
|
||||
"""Establish database connection."""
|
||||
try:
|
||||
logger.info("Attempting to connect to database...")
|
||||
DATABASE_URL = f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}"
|
||||
self.engine = create_engine(DATABASE_URL)
|
||||
self.engine = create_engine(SQLALCHEMY_DATABASE_URI)
|
||||
with self.engine.connect() as conn:
|
||||
# First check if table exists
|
||||
check_table = text(f"SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = '{TABLE_NAME}')")
|
||||
+1
-1
@@ -4,7 +4,7 @@ Keyword-based salary transaction analysis module.
|
||||
|
||||
import re
|
||||
import pandas as pd
|
||||
from .config import SALARY_KEYWORDS
|
||||
from app.config import SALARY_KEYWORDS
|
||||
|
||||
class KeywordAnalyzer:
|
||||
def __init__(self, df):
|
||||
@@ -9,8 +9,7 @@ from .consistent_amount_analyzer import ConsistentAmountAnalyzer
|
||||
from .transaction_type_analyzer import TransactionTypeAnalyzer
|
||||
from .salary_earner_analyzer import SalaryEarnerAnalyzer
|
||||
from .salary_predictor import SalaryPredictor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from app.utils.logger import logger
|
||||
|
||||
class SalaryAnalyticsPipeline:
|
||||
def __init__(self):
|
||||
+2
-9
@@ -6,15 +6,8 @@ import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib_venn import venn3
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
from .config import MODEL_CONFIG, OUTPUT_PATHS
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
from app.config import MODEL_CONFIG, OUTPUT_PATHS
|
||||
from app.utils.logger import logger
|
||||
|
||||
class SalaryEarnerAnalyzer:
|
||||
def __init__(self, df):
|
||||
+1
-1
@@ -9,7 +9,7 @@ from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
|
||||
from joblib import dump
|
||||
from .config import OUTPUT_PATHS
|
||||
from app.config import OUTPUT_PATHS
|
||||
|
||||
class SalaryPredictor:
|
||||
def __init__(self, df):
|
||||
+1
-1
@@ -3,7 +3,7 @@ Transaction type analysis module.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
from .config import MODEL_CONFIG
|
||||
from app.config import MODEL_CONFIG
|
||||
|
||||
class TransactionTypeAnalyzer:
|
||||
def __init__(self, df):
|
||||
@@ -0,0 +1,13 @@
|
||||
import logging
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
handlers=[
|
||||
# logging.StreamHandler(),
|
||||
logging.FileHandler("app.log", mode='a') # Log to file
|
||||
]
|
||||
)
|
||||
|
||||
logger = logging.getLogger("DetectionService")
|
||||
+8
-7
@@ -1,16 +1,17 @@
|
||||
services:
|
||||
digifi-analytics:
|
||||
build: .
|
||||
env_file:
|
||||
- .env
|
||||
ports:
|
||||
- "${APP_PORT:-4800}:8000"
|
||||
volumes:
|
||||
- ./output:/app/output
|
||||
environment:
|
||||
- DB_USER=salaryloan
|
||||
- DB_PASSWORD=salaryloan
|
||||
- DB_NAME=salaryloan
|
||||
- DB_PORT=10532
|
||||
- DB_HOST=dev-data.simbrellang.net
|
||||
- FLASK_APP=${FLASK_APP}
|
||||
- FLASK_ENV=${FLASK_ENV}
|
||||
- DATABASE_URL=postgresql+psycopg2://${DATABASE_USER}:${DATABASE_PASSWORD}@${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_NAME}
|
||||
volumes:
|
||||
- .:/app
|
||||
- ./output:/app/output
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- salary_network
|
||||
|
||||
+1
-1
@@ -19,7 +19,7 @@ if config.config_file_name is not None:
|
||||
# from myapp import Base
|
||||
# target_metadata = Base.metadata
|
||||
from flask import current_app
|
||||
from salary_analytics.app.extensions import db
|
||||
from app.extensions import db
|
||||
|
||||
config.set_main_option('sqlalchemy.url',
|
||||
current_app.config.get('SQLALCHEMY_DATABASE_URI'))
|
||||
|
||||
+7
-1
@@ -1,4 +1,8 @@
|
||||
# Database and ORM
|
||||
sqlalchemy>=2.0.0
|
||||
oracledb>=1.0.0
|
||||
|
||||
|
||||
pandas>=1.5.0
|
||||
numpy>=1.21.0
|
||||
matplotlib>=3.5.0
|
||||
@@ -17,4 +21,6 @@ openpyxl>=3.0.10
|
||||
Flask>=2.0.0
|
||||
Flask-SQLAlchemy>=3.0.0
|
||||
Flask-Migrate>=4.0.0
|
||||
alembic>=1.8.0
|
||||
alembic>=1.8.0
|
||||
requests>=2.26.0
|
||||
gunicorn
|
||||
@@ -1,4 +1,4 @@
|
||||
import os
|
||||
from salary_analytics.app import create_app
|
||||
from app.salary_analytics import create_app
|
||||
|
||||
app = create_app()
|
||||
@@ -1,6 +0,0 @@
|
||||
"""
|
||||
Salary Analytics Package
|
||||
A package for analyzing and predicting salary patterns from transaction data.
|
||||
"""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
@@ -1,605 +0,0 @@
|
||||
"""
|
||||
FastAPI application for salary analytics.
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI, HTTPException, BackgroundTasks, UploadFile, File, Depends
|
||||
from fastapi.responses import FileResponse
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, Dict, List, Union
|
||||
import os
|
||||
import socket
|
||||
import logging
|
||||
import pandas as pd
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
from sqlalchemy import text, Table, Column, Integer, String, Float, DateTime, MetaData
|
||||
import numpy as np
|
||||
import warnings
|
||||
import time
|
||||
from .main import SalaryAnalyticsPipeline
|
||||
from .config import OUTPUT_PATHS, TABLE_NAME, BATCH_RESULTS_TABLE
|
||||
from .data_loader import DataLoader
|
||||
from .salary_predictor import SalaryPredictor
|
||||
from .salary_earner_analyzer import SalaryEarnerAnalyzer
|
||||
from .db_operations import DatabaseOperations
|
||||
from .salary_detect import SalaryDetect
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Suppress warnings
|
||||
warnings.filterwarnings('ignore', category=RuntimeWarning, module='numpy')
|
||||
pd.options.mode.chained_assignment = None
|
||||
|
||||
app = FastAPI(
|
||||
title="Salary Analytics API",
|
||||
description="API for analyzing and predicting salary patterns from transaction data",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
# Add CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # Allows all origins
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"], # Allows all methods
|
||||
allow_headers=["*"], # Allows all headers
|
||||
)
|
||||
|
||||
# Global pipeline instance
|
||||
pipeline = SalaryAnalyticsPipeline()
|
||||
|
||||
# Global variables to store loaded data and models
|
||||
data_loader = None
|
||||
df = None
|
||||
salary_predictor = None
|
||||
salary_earner_analyzer = None
|
||||
|
||||
salary_detect = SalaryDetect()
|
||||
|
||||
class AnalysisResponse(BaseModel):
|
||||
"""Response model for analysis endpoints."""
|
||||
message: str
|
||||
data: Optional[Dict] = None
|
||||
file_path: Optional[str] = None
|
||||
|
||||
class BatchResponse(BaseModel):
|
||||
"""Response model for batch processing."""
|
||||
batch_number: int
|
||||
total_batches: int
|
||||
processed_rows: int
|
||||
results_path: str
|
||||
message: str
|
||||
|
||||
def check_data_loaded():
|
||||
"""Check if data is loaded before running analytics."""
|
||||
if pipeline.df is None:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="No data loaded. Please load data first using the /load-data endpoint."
|
||||
)
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
"""Initialize the pipeline on startup."""
|
||||
try:
|
||||
logger.info("Initializing pipeline...")
|
||||
|
||||
# Start autonomous salary detection loop
|
||||
salary_detect.start()
|
||||
logger.info("Started autonomous salary detection loop.")
|
||||
|
||||
# Print network information
|
||||
hostname = socket.gethostname()
|
||||
ip_address = socket.gethostbyname(hostname)
|
||||
logger.info(f"Server running on hostname: {hostname}")
|
||||
logger.info(f"Server IP address: {ip_address}")
|
||||
logger.info(f"Server is accessible at:")
|
||||
logger.info(f"- http://localhost:8000")
|
||||
logger.info(f"- http://127.0.0.1:8000")
|
||||
logger.info(f"- http://{ip_address}:8000")
|
||||
logger.info("Pipeline initialized successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Error during startup: {str(e)}")
|
||||
raise
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""Root endpoint."""
|
||||
start_time = time.time()
|
||||
logger.info("Root endpoint accessed")
|
||||
response = {"message": "Welcome to Salary Analytics API"}
|
||||
logger.info(f"Root endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint."""
|
||||
start_time = time.time()
|
||||
logger.info("Health check endpoint accessed")
|
||||
response = {"status": "healthy"}
|
||||
logger.info(f"Health check completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
|
||||
@app.post("/analyze/keyword", response_model=AnalysisResponse)
|
||||
async def analyze_keyword():
|
||||
"""Run keyword-based salary transaction analysis."""
|
||||
start_time = time.time()
|
||||
try:
|
||||
check_data_loaded()
|
||||
logger.info("Starting keyword analysis...")
|
||||
data = pipeline.run_keyword_analysis()
|
||||
logger.info(f"Keyword analysis completed. Found {len(data)} matches")
|
||||
response = AnalysisResponse(
|
||||
message="Keyword analysis completed successfully",
|
||||
data={"count": len(data)}
|
||||
)
|
||||
logger.info(f"Keyword analysis endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error in keyword analysis: {str(e)}")
|
||||
logger.info(f"Keyword analysis endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/analyze/consistent-amount", response_model=AnalysisResponse)
|
||||
async def analyze_consistent_amount():
|
||||
"""Run consistent amount transaction analysis."""
|
||||
start_time = time.time()
|
||||
try:
|
||||
check_data_loaded()
|
||||
logger.info("Starting consistent amount analysis...")
|
||||
data = pipeline.run_consistent_amount_analysis()
|
||||
logger.info(f"Consistent amount analysis completed. Found {len(data)} matches")
|
||||
response = AnalysisResponse(
|
||||
message="Consistent amount analysis completed successfully",
|
||||
data={"count": len(data)}
|
||||
)
|
||||
logger.info(f"Consistent amount analysis endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error in consistent amount analysis: {str(e)}")
|
||||
logger.info(f"Consistent amount analysis endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/analyze/transaction-type", response_model=AnalysisResponse)
|
||||
async def analyze_transaction_type():
|
||||
"""Run transaction type analysis."""
|
||||
start_time = time.time()
|
||||
try:
|
||||
check_data_loaded()
|
||||
logger.info("Starting transaction type analysis...")
|
||||
data = pipeline.run_transaction_type_analysis()
|
||||
logger.info(f"Transaction type analysis completed. Found {len(data)} matches")
|
||||
response = AnalysisResponse(
|
||||
message="Transaction type analysis completed successfully",
|
||||
data={"count": len(data)}
|
||||
)
|
||||
logger.info(f"Transaction type analysis endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error in transaction type analysis: {str(e)}")
|
||||
logger.info(f"Transaction type analysis endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/generate/reports", response_model=AnalysisResponse)
|
||||
async def generate_reports(background_tasks: BackgroundTasks):
|
||||
"""Generate salary earner reports."""
|
||||
start_time = time.time()
|
||||
try:
|
||||
check_data_loaded()
|
||||
logger.info("Starting report generation...")
|
||||
reports = pipeline.generate_salary_earner_reports()
|
||||
logger.info("Reports generated successfully")
|
||||
response = AnalysisResponse(
|
||||
message="Reports generated successfully",
|
||||
data={
|
||||
"verified_salary_earners": len(reports['final_table']),
|
||||
"likely_salary_earners": len(reports['likely_salary_earner']),
|
||||
"high_earners": reports['total_high_earners']
|
||||
}
|
||||
)
|
||||
logger.info(f"Report generation endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error in report generation: {str(e)}")
|
||||
logger.info(f"Report generation endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/train/models", response_model=AnalysisResponse)
|
||||
async def train_models():
|
||||
"""Train salary prediction models."""
|
||||
start_time = time.time()
|
||||
try:
|
||||
check_data_loaded()
|
||||
logger.info("Starting model training...")
|
||||
pipeline.train_salary_prediction_models()
|
||||
logger.info("Models trained successfully")
|
||||
response = AnalysisResponse(
|
||||
message="Models trained successfully"
|
||||
)
|
||||
logger.info(f"Model training endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error in model training: {str(e)}")
|
||||
logger.info(f"Model training endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/download/{report_type}")
|
||||
async def download_report(report_type: str):
|
||||
"""Download generated reports."""
|
||||
start_time = time.time()
|
||||
try:
|
||||
check_data_loaded()
|
||||
logger.info(f"Attempting to download report: {report_type}")
|
||||
file_paths = {
|
||||
"high_earners": OUTPUT_PATHS["high_earner_details"],
|
||||
"likely_earners": OUTPUT_PATHS["likely_salary_earner"],
|
||||
"final_table": OUTPUT_PATHS["final_table"],
|
||||
"consistent_plot": OUTPUT_PATHS["consistent_earners_plot"],
|
||||
"inconsistent_plot": OUTPUT_PATHS["inconsistent_earners_plot"],
|
||||
"hypothesis_plot": OUTPUT_PATHS["hypothesis_overlap_plot"]
|
||||
}
|
||||
|
||||
if report_type not in file_paths:
|
||||
logger.error(f"Report type not found: {report_type}")
|
||||
logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=404, detail="Report type not found")
|
||||
|
||||
file_path = file_paths[report_type]
|
||||
if not os.path.exists(file_path):
|
||||
logger.error(f"Report file not found: {file_path}")
|
||||
logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=404, detail="Report file not found")
|
||||
|
||||
logger.info(f"Successfully found report file: {file_path}")
|
||||
response = FileResponse(
|
||||
path=file_path,
|
||||
filename=os.path.basename(file_path),
|
||||
media_type="application/octet-stream"
|
||||
)
|
||||
logger.info(f"Download endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading report: {str(e)}")
|
||||
logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/run/pipeline", response_model=AnalysisResponse)
|
||||
async def run_full_pipeline():
|
||||
"""Run the complete salary analytics pipeline."""
|
||||
start_time = time.time()
|
||||
try:
|
||||
check_data_loaded()
|
||||
logger.info("Starting full pipeline...")
|
||||
success = pipeline.run_full_pipeline()
|
||||
if not success:
|
||||
logger.error("Pipeline failed")
|
||||
logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail="Pipeline failed")
|
||||
|
||||
logger.info("Pipeline completed successfully")
|
||||
response = AnalysisResponse(
|
||||
message="Pipeline completed successfully"
|
||||
)
|
||||
logger.info(f"Full pipeline endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error in pipeline: {str(e)}")
|
||||
logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/load-data")
|
||||
async def load_data(source: str = "db", file: Optional[UploadFile] = File(None)):
|
||||
"""
|
||||
Load data from either database or CSV file.
|
||||
|
||||
Args:
|
||||
source (str): Source of data ('db' or 'csv')
|
||||
file (UploadFile, optional): CSV file to load (required if source is 'csv')
|
||||
|
||||
Returns:
|
||||
dict: Status of data loading
|
||||
"""
|
||||
start_time = time.time()
|
||||
try:
|
||||
if source not in ['db', 'csv']:
|
||||
logger.error(f"Invalid source: {source}")
|
||||
logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'")
|
||||
|
||||
if source == 'csv' and not file:
|
||||
logger.error("No file provided for CSV source")
|
||||
logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=400, detail="File must be provided when loading from CSV")
|
||||
|
||||
if source == 'csv':
|
||||
# Save uploaded file temporarily
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file:
|
||||
content = await file.read()
|
||||
temp_file.write(content)
|
||||
temp_file_path = temp_file.name
|
||||
|
||||
try:
|
||||
success = pipeline.load_data(source='csv', file_path=temp_file_path)
|
||||
finally:
|
||||
# Clean up temporary file
|
||||
os.unlink(temp_file_path)
|
||||
else:
|
||||
success = pipeline.load_data(source='db')
|
||||
|
||||
if not success:
|
||||
logger.error("Failed to load data")
|
||||
logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail="Failed to load data")
|
||||
|
||||
response = {
|
||||
"status": "success",
|
||||
"message": f"Successfully loaded {len(pipeline.df)} rows of data",
|
||||
"columns": pipeline.df.columns.tolist(),
|
||||
"row_count": len(pipeline.df)
|
||||
}
|
||||
logger.info(f"Load data endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading data: {str(e)}")
|
||||
logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
async def get_file_if_csv(source: str, file: Optional[UploadFile] = File(None)):
|
||||
"""Dependency to handle file upload only when source is csv."""
|
||||
if source == 'csv' and not file:
|
||||
raise HTTPException(status_code=400, detail="File must be provided when loading from CSV")
|
||||
return file
|
||||
|
||||
@app.post("/run/streaming-pipeline", response_model=List[BatchResponse])
|
||||
async def run_streaming_pipeline(
|
||||
source: str = "db",
|
||||
batch_size: int = 10000,
|
||||
file: Optional[Union[UploadFile, str]] = File(None)
|
||||
):
|
||||
"""
|
||||
Run the complete salary analytics pipeline in batches.
|
||||
|
||||
Args:
|
||||
source (str): Source of data ('db' or 'csv')
|
||||
batch_size (int): Number of rows to process in each batch
|
||||
file (UploadFile, optional): CSV file to load (required if source is 'csv')
|
||||
|
||||
Returns:
|
||||
List[BatchResponse]: List of responses for each batch processed
|
||||
"""
|
||||
start_time = time.time()
|
||||
try:
|
||||
if source not in ['db', 'csv']:
|
||||
logger.error(f"Invalid source: {source}")
|
||||
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'")
|
||||
|
||||
if source == 'csv' and not file:
|
||||
logger.error("No file provided for CSV source")
|
||||
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=400, detail="File must be provided when loading from CSV")
|
||||
|
||||
# Initialize data loader
|
||||
data_loader = DataLoader()
|
||||
data_loader.chunk_size = batch_size
|
||||
|
||||
# Create output directory for batch results
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
batch_output_dir = os.path.join(os.path.dirname(OUTPUT_PATHS['final_table']), f"batch_results_{timestamp}")
|
||||
os.makedirs(batch_output_dir, exist_ok=True)
|
||||
|
||||
# Initialize database operations
|
||||
if not data_loader.connect():
|
||||
logger.error("Failed to connect to database")
|
||||
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail="Failed to connect to database")
|
||||
|
||||
db_ops = DatabaseOperations(data_loader.engine)
|
||||
if not db_ops.create_batch_results_table():
|
||||
logger.error("Failed to create batch results table")
|
||||
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail="Failed to create batch results table")
|
||||
|
||||
responses = []
|
||||
batch_number = 0
|
||||
batch_start_time = time.time()
|
||||
|
||||
def preprocess_chunk(chunk):
|
||||
"""Preprocess a chunk of data with the same logic as DataLoader."""
|
||||
# Convert dates
|
||||
chunk['trx_start_date'] = pd.to_datetime(chunk['trx_start_date'])
|
||||
chunk['trx_end_date'] = pd.to_datetime(chunk['trx_end_date'])
|
||||
|
||||
# Rename columns
|
||||
chunk = chunk.rename(columns={
|
||||
'd1': 'trx_type',
|
||||
'd2': 'trx_subtype',
|
||||
'd3': 'initiated_by',
|
||||
'd4': 'customer_id'
|
||||
})
|
||||
|
||||
chunk = chunk.dropna()
|
||||
|
||||
return chunk
|
||||
|
||||
if source == 'csv':
|
||||
# Save uploaded file temporarily
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file:
|
||||
content = await file.read()
|
||||
temp_file.write(content)
|
||||
temp_file_path = temp_file.name
|
||||
|
||||
try:
|
||||
# Process CSV in chunks
|
||||
for chunk in pd.read_csv(temp_file_path, chunksize=batch_size):
|
||||
batch_number += 1
|
||||
logger.info(f"Processing batch {batch_number}")
|
||||
|
||||
# Preprocess chunk
|
||||
chunk = preprocess_chunk(chunk)
|
||||
|
||||
# Run pipeline on chunk
|
||||
pipeline = SalaryAnalyticsPipeline()
|
||||
pipeline.df = chunk
|
||||
|
||||
try:
|
||||
batch_start_time = time.time()
|
||||
# Run analyses
|
||||
pipeline.run_keyword_analysis()
|
||||
pipeline.run_consistent_amount_analysis()
|
||||
pipeline.run_transaction_type_analysis()
|
||||
|
||||
# Generate reports
|
||||
reports = pipeline.generate_salary_earner_reports()
|
||||
|
||||
# Add batch metadata to results
|
||||
results_df = reports['final_table'].copy()
|
||||
results_df['batch_number'] = batch_number
|
||||
results_df['total_batches'] = -1 # Unknown for CSV
|
||||
results_df['processed_at'] = datetime.now()
|
||||
|
||||
# Save batch results to CSV
|
||||
batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv")
|
||||
results_df.to_csv(batch_results_path, index=False)
|
||||
|
||||
# Save to database
|
||||
db_ops.save_batch_to_db(
|
||||
batch_number=batch_number,
|
||||
total_batches=-1, # Unknown for CSV
|
||||
results_df=results_df,
|
||||
status="success"
|
||||
)
|
||||
|
||||
logger.info(f"Batch {batch_number} processed in {time.time() - batch_start_time:.2f} seconds")
|
||||
|
||||
responses.append(BatchResponse(
|
||||
batch_number=batch_number,
|
||||
total_batches=-1, # Unknown for CSV
|
||||
processed_rows=len(chunk),
|
||||
results_path=batch_results_path,
|
||||
message=f"Successfully processed batch {batch_number}"
|
||||
))
|
||||
except Exception as e:
|
||||
error_message = str(e)
|
||||
logger.error(f"Error processing batch {batch_number}: {error_message}")
|
||||
|
||||
# Save error to database
|
||||
db_ops.save_batch_to_db(
|
||||
batch_number=batch_number,
|
||||
total_batches=-1,
|
||||
results_df=pd.DataFrame(), # Empty DataFrame for error case
|
||||
status="error"
|
||||
)
|
||||
|
||||
responses.append(BatchResponse(
|
||||
batch_number=batch_number,
|
||||
total_batches=-1,
|
||||
processed_rows=len(chunk),
|
||||
results_path="",
|
||||
message=f"Error processing batch {batch_number}: {error_message}"
|
||||
))
|
||||
finally:
|
||||
# Clean up temporary file
|
||||
os.unlink(temp_file_path)
|
||||
else:
|
||||
# Process database in chunks
|
||||
if not data_loader.connect():
|
||||
raise HTTPException(status_code=500, detail="Failed to connect to database")
|
||||
|
||||
# Get total row count
|
||||
with data_loader.engine.connect() as conn:
|
||||
count_query = text(f"SELECT COUNT(*) FROM {TABLE_NAME}")
|
||||
total_rows = conn.execute(count_query).scalar()
|
||||
|
||||
total_batches = (total_rows + batch_size - 1) // batch_size
|
||||
offset = 0
|
||||
|
||||
while offset < total_rows:
|
||||
batch_number += 1
|
||||
logger.info(f"Processing batch {batch_number} of {total_batches}")
|
||||
|
||||
# Load chunk from database
|
||||
query = f"SELECT * FROM {TABLE_NAME} LIMIT {batch_size} OFFSET {offset}"
|
||||
chunk = pd.read_sql(query, data_loader.engine)
|
||||
|
||||
if chunk.empty:
|
||||
break
|
||||
|
||||
# Preprocess chunk
|
||||
chunk = preprocess_chunk(chunk)
|
||||
|
||||
# Run pipeline on chunk
|
||||
pipeline = SalaryAnalyticsPipeline()
|
||||
pipeline.df = chunk
|
||||
|
||||
try:
|
||||
batch_start_time = time.time()
|
||||
# Run analyses
|
||||
pipeline.run_keyword_analysis()
|
||||
pipeline.run_consistent_amount_analysis()
|
||||
pipeline.run_transaction_type_analysis()
|
||||
|
||||
# Generate reports
|
||||
reports = pipeline.generate_salary_earner_reports()
|
||||
|
||||
# Add batch metadata to results
|
||||
results_df = reports['final_table'].copy()
|
||||
results_df['batch_number'] = batch_number
|
||||
results_df['total_batches'] = total_batches
|
||||
results_df['processed_at'] = datetime.now()
|
||||
|
||||
# Save batch results to CSV
|
||||
batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv")
|
||||
results_df.to_csv(batch_results_path, index=False)
|
||||
|
||||
# Save to database
|
||||
db_ops.save_batch_to_db(
|
||||
batch_number=batch_number,
|
||||
total_batches=total_batches,
|
||||
results_df=results_df,
|
||||
status="success"
|
||||
)
|
||||
|
||||
logger.info(f"Batch {batch_number} of {total_batches} processed in {time.time() - batch_start_time:.2f} seconds")
|
||||
|
||||
responses.append(BatchResponse(
|
||||
batch_number=batch_number,
|
||||
total_batches=total_batches,
|
||||
processed_rows=len(chunk),
|
||||
results_path=batch_results_path,
|
||||
message=f"Successfully processed batch {batch_number} of {total_batches}"
|
||||
))
|
||||
except Exception as e:
|
||||
error_message = str(e)
|
||||
logger.error(f"Error processing batch {batch_number}: {error_message}")
|
||||
|
||||
# Save error to database
|
||||
db_ops.save_batch_to_db(
|
||||
batch_number=batch_number,
|
||||
total_batches=total_batches,
|
||||
results_df=pd.DataFrame(), # Empty DataFrame for error case
|
||||
status="error"
|
||||
)
|
||||
|
||||
responses.append(BatchResponse(
|
||||
batch_number=batch_number,
|
||||
total_batches=total_batches,
|
||||
processed_rows=len(chunk),
|
||||
results_path="",
|
||||
message=f"Error processing batch {batch_number}: {error_message}"
|
||||
))
|
||||
|
||||
offset += batch_size
|
||||
|
||||
logger.info(f"Streaming pipeline endpoint completed in {time.time() - start_time:.2f} seconds")
|
||||
return responses
|
||||
except Exception as e:
|
||||
logger.error(f"Error in streaming pipeline: {str(e)}")
|
||||
logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
Reference in New Issue
Block a user