Update config.py

[add]: Database config
Create temp_simbrella_lien.py
2025-09-24 14:48:57 +01:00 · 2025-09-24 10:59:43 +01:00 · 2025-09-19 12:28:49 +01:00 · 2025-09-19 12:28:16 +01:00 · 2025-09-19 12:25:42 +01:00 · 2025-09-10 12:31:31 +01:00
46 changed files with 1896 additions and 675 deletions
@@ -1,20 +1,26 @@
 FROM python:3.11-slim
 # Set the working directory in the container
 WORKDIR /app
 # Copy the current directory contents into the container at /app
 COPY . /app
 RUN apt-get update && apt-get install -y libpq-dev && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
 RUN pip install -r requirements.txt
 COPY salary_analytics/ ./salary_analytics/
 RUN mkdir -p output/csv output/plots output/models
-ENV PYTHONPATH=/app
+
-ENV HOST=0.0.0.0
+# ENV FLASK_APP=wsgi.py
-ENV PORT=8000
+
 ENV FLASK_APP=run.py. 
 ENV FLASK_RUN_HOST=0.0.0.0
 EXPOSE 8000
-CMD ["uvicorn", "salary_analytics.api:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] 
+# CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:8000", "wsgi:wsgi_app"]
 CMD ["uvicorn", "run:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
@@ -0,0 +1,644 @@
 2025-09-10 10:21:11,016 - INFO - Initializing pipeline...
 2025-09-10 10:21:11,019 - INFO - [2025-09-10 10:21:11] Detecting salary...
 2025-09-10 10:21:11,019 - INFO - Started autonomous salary detection loop.
 2025-09-10 10:21:11,030 - INFO - Server running on hostname: 22bad35c69c3
 2025-09-10 10:21:11,035 - INFO - Server IP address: 172.25.0.2
 2025-09-10 10:21:11,036 - INFO - Server is accessible at:
 2025-09-10 10:21:11,037 - INFO - - http://localhost:8000
 2025-09-10 10:21:11,039 - INFO - - http://127.0.0.1:8000
 2025-09-10 10:21:11,040 - INFO - - http://172.25.0.2:8000
 2025-09-10 10:21:11,041 - INFO - Pipeline initialized successfully
 2025-09-10 10:21:11,532 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 10:21:11,533 - INFO - [2025-09-10 10:21:11] Salary detection complete
 2025-09-10 10:21:19,451 - INFO - Initializing SalaryAnalyticsPipeline
 2025-09-10 10:21:19,452 - INFO - Starting data loading process
 2025-09-10 10:21:19,453 - INFO - No database connection. Attempting to connect...
 2025-09-10 10:21:19,455 - INFO - Attempting to connect to database...
 2025-09-10 10:21:27,271 - INFO - Table customer_account_transaction_hx exists with 5354307 rows
 2025-09-10 10:21:27,488 - INFO - Connected successfully to database!
 2025-09-10 10:21:27,715 - INFO - Loading data from table: customer_account_transaction_hx
 2025-09-10 10:21:28,614 - INFO - Total rows to process: 5354307
 2025-09-10 10:21:28,852 - INFO - Loading chunk starting at offset 0
 2025-09-10 10:21:40,251 - INFO - Loading chunk starting at offset 10000
 2025-09-10 10:21:46,981 - INFO - Loading chunk starting at offset 20000
 2025-09-10 10:21:57,010 - INFO - Loading chunk starting at offset 30000
 2025-09-10 10:22:04,792 - INFO - Loading chunk starting at offset 40000
 2025-09-10 10:22:09,599 - INFO - Loading chunk starting at offset 50000
 2025-09-10 10:22:15,016 - INFO - Loading chunk starting at offset 60000
 2025-09-10 10:22:18,613 - INFO - Loading chunk starting at offset 70000
 2025-09-10 10:22:22,007 - INFO - Loading chunk starting at offset 80000
 2025-09-10 10:22:28,397 - INFO - Loading chunk starting at offset 90000
 2025-09-10 10:22:36,081 - INFO - Loading chunk starting at offset 100000
 2025-09-10 10:22:48,738 - INFO - Loading chunk starting at offset 110000
 2025-09-10 10:23:01,521 - INFO - Loading chunk starting at offset 120000
 2025-09-10 10:23:11,532 - INFO - [2025-09-10 10:23:11] Detecting salary...
 2025-09-10 10:23:13,504 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 10:23:13,504 - INFO - [2025-09-10 10:23:13] Salary detection complete
 2025-09-10 10:23:16,348 - INFO - Loading chunk starting at offset 130000
 2025-09-10 10:23:23,259 - INFO - Loading chunk starting at offset 140000
 2025-09-10 10:23:29,284 - INFO - Loading chunk starting at offset 150000
 2025-09-10 10:23:41,579 - INFO - Loading chunk starting at offset 160000
 2025-09-10 10:23:54,788 - INFO - Loading chunk starting at offset 170000
 2025-09-10 10:24:19,519 - INFO - Loading chunk starting at offset 180000
 2025-09-10 10:24:31,657 - INFO - Loading chunk starting at offset 190000
 2025-09-10 10:24:46,130 - INFO - Loading chunk starting at offset 200000
 2025-09-10 10:24:57,289 - INFO - Loading chunk starting at offset 210000
 2025-09-10 10:25:07,964 - INFO - Loading chunk starting at offset 220000
 2025-09-10 10:25:13,503 - INFO - [2025-09-10 10:25:13] Detecting salary...
 2025-09-10 10:25:15,477 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 10:25:15,478 - INFO - [2025-09-10 10:25:15] Salary detection complete
 2025-09-10 10:25:17,793 - INFO - Loading chunk starting at offset 230000
 2025-09-10 10:25:25,026 - INFO - Loading chunk starting at offset 240000
 2025-09-10 10:25:32,079 - INFO - Loading chunk starting at offset 250000
 2025-09-10 10:25:39,990 - INFO - Loading chunk starting at offset 260000
 2025-09-10 10:25:50,492 - INFO - Loading chunk starting at offset 270000
 2025-09-10 10:26:00,181 - INFO - Loading chunk starting at offset 280000
 2025-09-10 10:26:10,138 - INFO - Loading chunk starting at offset 290000
 2025-09-10 10:26:20,437 - INFO - Loading chunk starting at offset 300000
 2025-09-10 10:26:34,962 - INFO - Loading chunk starting at offset 310000
 2025-09-10 10:26:46,248 - INFO - Loading chunk starting at offset 320000
 2025-09-10 10:26:55,275 - INFO - Loading chunk starting at offset 330000
 2025-09-10 10:27:09,733 - INFO - Loading chunk starting at offset 340000
 2025-09-10 10:27:15,480 - INFO - [2025-09-10 10:27:15] Detecting salary...
 2025-09-10 10:27:16,553 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 10:27:16,553 - INFO - [2025-09-10 10:27:16] Salary detection complete
 2025-09-10 10:27:26,568 - INFO - Loading chunk starting at offset 350000
 2025-09-10 10:27:36,472 - INFO - Loading chunk starting at offset 360000
 2025-09-10 10:27:44,909 - INFO - Loading chunk starting at offset 370000
 2025-09-10 10:27:54,557 - INFO - Loading chunk starting at offset 380000
 2025-09-10 10:28:00,588 - INFO - Loading chunk starting at offset 390000
 2025-09-10 10:28:05,957 - INFO - Loading chunk starting at offset 400000
 2025-09-10 10:28:12,058 - INFO - Loading chunk starting at offset 410000
 2025-09-10 10:28:19,248 - INFO - Loading chunk starting at offset 420000
 2025-09-10 10:28:29,938 - INFO - Loading chunk starting at offset 430000
 2025-09-10 10:28:51,274 - INFO - Loading chunk starting at offset 440000
 2025-09-10 10:28:57,720 - INFO - Loading chunk starting at offset 450000
 2025-09-10 10:29:02,117 - INFO - Loading chunk starting at offset 460000
 2025-09-10 10:29:11,555 - INFO - Loading chunk starting at offset 470000
 2025-09-10 10:29:16,565 - INFO - [2025-09-10 10:29:16] Detecting salary...
 2025-09-10 10:29:17,452 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 10:29:17,455 - INFO - [2025-09-10 10:29:17] Salary detection complete
 2025-09-10 10:29:30,145 - INFO - Loading chunk starting at offset 480000
 2025-09-10 10:29:39,048 - INFO - Loading chunk starting at offset 490000
 2025-09-10 10:29:43,412 - INFO - Loading chunk starting at offset 500000
 2025-09-10 10:29:47,415 - INFO - Loading chunk starting at offset 510000
 2025-09-10 10:29:52,149 - INFO - Loading chunk starting at offset 520000
 2025-09-10 10:29:58,331 - INFO - Loading chunk starting at offset 530000
 2025-09-10 10:30:04,933 - INFO - Loading chunk starting at offset 540000
 2025-09-10 10:30:11,934 - INFO - Loading chunk starting at offset 550000
 2025-09-10 10:30:19,315 - INFO - Loading chunk starting at offset 560000
 2025-09-10 10:30:25,683 - INFO - Loading chunk starting at offset 570000
 2025-09-10 10:30:33,577 - INFO - Loading chunk starting at offset 580000
 2025-09-10 10:30:40,363 - INFO - Loading chunk starting at offset 590000
 2025-09-10 10:30:46,205 - INFO - Loading chunk starting at offset 600000
 2025-09-10 10:30:53,957 - INFO - Loading chunk starting at offset 610000
 2025-09-10 10:31:08,083 - INFO - Loading chunk starting at offset 620000
 2025-09-10 10:31:17,454 - INFO - [2025-09-10 10:31:17] Detecting salary...
 2025-09-10 10:31:17,989 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 10:31:17,989 - INFO - [2025-09-10 10:31:17] Salary detection complete
 2025-09-10 10:31:18,901 - INFO - Loading chunk starting at offset 630000
 2025-09-10 10:31:27,551 - INFO - Loading chunk starting at offset 640000
 2025-09-10 10:31:33,662 - INFO - Loading chunk starting at offset 650000
 2025-09-10 10:31:40,107 - INFO - Loading chunk starting at offset 660000
 2025-09-10 10:31:49,938 - INFO - Loading chunk starting at offset 670000
 2025-09-10 10:31:57,777 - INFO - Loading chunk starting at offset 680000
 2025-09-10 10:32:06,629 - INFO - Loading chunk starting at offset 690000
 2025-09-10 10:32:15,281 - INFO - Loading chunk starting at offset 700000
 2025-09-10 10:32:25,328 - INFO - Loading chunk starting at offset 710000
 2025-09-10 10:32:40,639 - INFO - Loading chunk starting at offset 720000
 2025-09-10 10:32:51,078 - INFO - Loading chunk starting at offset 730000
 2025-09-10 10:33:03,924 - INFO - Loading chunk starting at offset 740000
 2025-09-10 10:33:13,007 - INFO - Loading chunk starting at offset 750000
 2025-09-10 10:33:17,989 - INFO - [2025-09-10 10:33:17] Detecting salary...
 2025-09-10 10:33:19,698 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 10:33:20,039 - INFO - [2025-09-10 10:33:20] Salary detection complete
 2025-09-10 10:33:20,857 - INFO - Loading chunk starting at offset 760000
 2025-09-10 10:33:30,803 - INFO - Loading chunk starting at offset 770000
 2025-09-10 10:33:38,162 - INFO - Loading chunk starting at offset 780000
 2025-09-10 10:33:44,504 - INFO - Loading chunk starting at offset 790000
 2025-09-10 10:33:50,063 - INFO - Loading chunk starting at offset 800000
 2025-09-10 10:33:57,957 - INFO - Loading chunk starting at offset 810000
 2025-09-10 10:34:05,256 - INFO - Loading chunk starting at offset 820000
 2025-09-10 10:34:12,212 - INFO - Loading chunk starting at offset 830000
 2025-09-10 10:34:20,478 - INFO - Loading chunk starting at offset 840000
 2025-09-10 10:34:28,018 - INFO - Loading chunk starting at offset 850000
 2025-09-10 10:34:36,413 - INFO - Loading chunk starting at offset 860000
 2025-09-10 10:34:50,168 - INFO - Loading chunk starting at offset 870000
 2025-09-10 10:35:13,867 - INFO - Loading chunk starting at offset 880000
 2025-09-10 10:35:20,391 - INFO - [2025-09-10 10:35:20] Detecting salary...
 2025-09-10 10:35:21,791 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 10:35:21,794 - INFO - [2025-09-10 10:35:21] Salary detection complete
 2025-09-10 10:35:28,918 - INFO - Loading chunk starting at offset 890000
 2025-09-10 10:35:40,300 - INFO - Loading chunk starting at offset 900000
 2025-09-10 10:36:05,763 - INFO - Loading chunk starting at offset 910000
 2025-09-10 10:36:15,199 - INFO - Loading chunk starting at offset 920000
 2025-09-10 10:36:46,036 - INFO - Loading chunk starting at offset 930000
 2025-09-10 10:36:53,221 - INFO - Loading chunk starting at offset 940000
 2025-09-10 10:37:06,754 - INFO - Loading chunk starting at offset 950000
 2025-09-10 10:37:15,558 - INFO - Loading chunk starting at offset 960000
 2025-09-10 10:37:21,798 - INFO - [2025-09-10 10:37:21] Detecting salary...
 2025-09-10 10:37:22,954 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 10:37:22,955 - INFO - [2025-09-10 10:37:22] Salary detection complete
 2025-09-10 10:37:29,088 - INFO - Loading chunk starting at offset 970000
 2025-09-10 10:37:54,730 - INFO - Loading chunk starting at offset 980000
 2025-09-10 10:38:13,667 - INFO - Loading chunk starting at offset 990000
 2025-09-10 10:38:27,880 - INFO - Loading chunk starting at offset 1000000
 2025-09-10 10:39:07,546 - INFO - Loading chunk starting at offset 1010000
 2025-09-10 10:39:17,369 - INFO - Loading chunk starting at offset 1020000
 2025-09-10 10:39:23,176 - INFO - [2025-09-10 10:39:23] Detecting salary...
 2025-09-10 10:39:24,444 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 10:39:24,445 - INFO - [2025-09-10 10:39:24] Salary detection complete
 2025-09-10 10:39:26,048 - INFO - Loading chunk starting at offset 1030000
 2025-09-10 10:39:45,494 - INFO - Loading chunk starting at offset 1040000
 2025-09-10 10:39:50,480 - INFO - Loading chunk starting at offset 1050000
 2025-09-10 10:39:54,793 - INFO - Loading chunk starting at offset 1060000
 2025-09-10 10:39:59,415 - INFO - Loading chunk starting at offset 1070000
 2025-09-10 10:40:04,306 - INFO - Loading chunk starting at offset 1080000
 2025-09-10 10:40:10,517 - INFO - Loading chunk starting at offset 1090000
 2025-09-10 10:40:15,671 - INFO - Loading chunk starting at offset 1100000
 2025-09-10 10:40:21,662 - INFO - Loading chunk starting at offset 1110000
 2025-09-10 10:40:30,551 - INFO - Loading chunk starting at offset 1120000
 2025-09-10 10:40:47,391 - INFO - Loading chunk starting at offset 1130000
 2025-09-10 10:40:55,609 - INFO - Loading chunk starting at offset 1140000
 2025-09-10 10:41:04,496 - INFO - Loading chunk starting at offset 1150000
 2025-09-10 10:41:14,608 - INFO - Loading chunk starting at offset 1160000
 2025-09-10 10:41:24,447 - INFO - [2025-09-10 10:41:24] Detecting salary...
 2025-09-10 10:41:28,537 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 10:41:28,548 - INFO - [2025-09-10 10:41:28] Salary detection complete
 2025-09-10 10:42:00,867 - INFO - Loading chunk starting at offset 1170000
 2025-09-10 10:42:13,069 - INFO - Loading chunk starting at offset 1180000
 2025-09-10 10:42:21,593 - INFO - Loading chunk starting at offset 1190000
 2025-09-10 10:42:32,011 - INFO - Loading chunk starting at offset 1200000
 2025-09-10 10:42:37,982 - INFO - Loading chunk starting at offset 1210000
 2025-09-10 10:42:45,458 - INFO - Loading chunk starting at offset 1220000
 2025-09-10 10:42:54,545 - INFO - Loading chunk starting at offset 1230000
 2025-09-10 10:43:15,705 - INFO - Loading chunk starting at offset 1240000
 2025-09-10 10:43:28,549 - INFO - [2025-09-10 10:43:28] Detecting salary...
 2025-09-10 10:43:31,640 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 10:43:31,641 - INFO - [2025-09-10 10:43:31] Salary detection complete
 2025-09-10 10:43:43,344 - INFO - Loading chunk starting at offset 1250000
 2025-09-10 10:43:50,004 - INFO - Loading chunk starting at offset 1260000
 2025-09-10 10:43:59,430 - INFO - Loading chunk starting at offset 1270000
 2025-09-10 10:44:07,478 - INFO - Loading chunk starting at offset 1280000
 2025-09-10 10:44:18,927 - INFO - Loading chunk starting at offset 1290000
 2025-09-10 10:44:28,523 - INFO - Loading chunk starting at offset 1300000
 2025-09-10 10:44:38,629 - INFO - Loading chunk starting at offset 1310000
 2025-09-10 10:44:56,977 - INFO - Loading chunk starting at offset 1320000
 2025-09-10 10:45:09,203 - INFO - Loading chunk starting at offset 1330000
 2025-09-10 10:45:18,657 - INFO - Loading chunk starting at offset 1340000
 2025-09-10 10:45:27,201 - INFO - Loading chunk starting at offset 1350000
 2025-09-10 10:45:31,641 - INFO - [2025-09-10 10:45:31] Detecting salary...
 2025-09-10 10:45:32,261 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 10:45:32,262 - INFO - [2025-09-10 10:45:32] Salary detection complete
 2025-09-10 10:45:35,308 - INFO - Loading chunk starting at offset 1360000
 2025-09-10 10:45:44,783 - INFO - Loading chunk starting at offset 1370000
 2025-09-10 10:45:52,048 - INFO - Loading chunk starting at offset 1380000
 2025-09-10 10:45:58,491 - INFO - Loading chunk starting at offset 1390000
 2025-09-10 10:46:13,270 - INFO - Loading chunk starting at offset 1400000
 2025-09-10 10:46:25,221 - INFO - Loading chunk starting at offset 1410000
 2025-09-10 10:46:34,358 - INFO - Loading chunk starting at offset 1420000
 2025-09-10 10:46:38,777 - INFO - Loading chunk starting at offset 1430000
 2025-09-10 10:46:42,534 - INFO - Loading chunk starting at offset 1440000
 2025-09-10 10:46:46,147 - INFO - Loading chunk starting at offset 1450000
 2025-09-10 10:46:50,727 - INFO - Loading chunk starting at offset 1460000
 2025-09-10 10:46:55,939 - INFO - Loading chunk starting at offset 1470000
 2025-09-10 10:47:01,696 - INFO - Loading chunk starting at offset 1480000
 2025-09-10 10:47:07,411 - INFO - Loading chunk starting at offset 1490000
 2025-09-10 10:47:13,449 - INFO - Loading chunk starting at offset 1500000
 2025-09-10 10:47:23,058 - INFO - Loading chunk starting at offset 1510000
 2025-09-10 10:47:30,097 - INFO - Loading chunk starting at offset 1520000
 2025-09-10 10:47:32,259 - INFO - [2025-09-10 10:47:32] Detecting salary...
 2025-09-10 10:47:32,923 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 10:47:32,924 - INFO - [2025-09-10 10:47:32] Salary detection complete
 2025-09-10 10:47:36,423 - INFO - Loading chunk starting at offset 1530000
 2025-09-10 10:47:45,086 - INFO - Loading chunk starting at offset 1540000
 2025-09-10 10:47:57,079 - INFO - Loading chunk starting at offset 1550000
 2025-09-10 10:48:19,741 - INFO - Loading chunk starting at offset 1560000
 2025-09-10 10:48:41,300 - INFO - Loading chunk starting at offset 1570000
 2025-09-10 10:48:51,349 - INFO - Loading chunk starting at offset 1580000
 2025-09-10 10:48:58,892 - INFO - Loading chunk starting at offset 1590000
 2025-09-10 10:49:04,857 - INFO - Loading chunk starting at offset 1600000
 2025-09-10 10:49:10,299 - INFO - Loading chunk starting at offset 1610000
 2025-09-10 10:49:16,650 - INFO - Loading chunk starting at offset 1620000
 2025-09-10 10:49:23,107 - INFO - Loading chunk starting at offset 1630000
 2025-09-10 10:49:32,320 - INFO - Loading chunk starting at offset 1640000
 2025-09-10 10:49:32,927 - INFO - [2025-09-10 10:49:32] Detecting salary...
 2025-09-10 10:49:33,484 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 10:49:33,485 - INFO - [2025-09-10 10:49:33] Salary detection complete
 2025-09-10 10:49:40,707 - INFO - Loading chunk starting at offset 1650000
 2025-09-10 10:49:50,677 - INFO - Loading chunk starting at offset 1660000
 2025-09-10 10:49:59,664 - INFO - Loading chunk starting at offset 1670000
 2025-09-10 10:50:07,452 - INFO - Loading chunk starting at offset 1680000
 2025-09-10 10:50:13,583 - INFO - Loading chunk starting at offset 1690000
 2025-09-10 10:50:20,858 - INFO - Loading chunk starting at offset 1700000
 2025-09-10 10:50:27,519 - INFO - Loading chunk starting at offset 1710000
 2025-09-10 10:50:34,012 - INFO - Loading chunk starting at offset 1720000
 2025-09-10 10:50:39,628 - INFO - Loading chunk starting at offset 1730000
 2025-09-10 10:50:50,012 - INFO - Loading chunk starting at offset 1740000
 2025-09-10 10:51:06,647 - INFO - Loading chunk starting at offset 1750000
 2025-09-10 10:51:17,401 - INFO - Loading chunk starting at offset 1760000
 2025-09-10 10:51:26,360 - INFO - Loading chunk starting at offset 1770000
 2025-09-10 10:51:33,485 - INFO - [2025-09-10 10:51:33] Detecting salary...
 2025-09-10 10:51:55,392 - INFO - Initializing pipeline...
 2025-09-10 10:51:55,395 - INFO - [2025-09-10 10:51:55] Detecting salary...
 2025-09-10 10:51:55,395 - INFO - Started autonomous salary detection loop.
 2025-09-10 10:51:55,409 - INFO - Server running on hostname: 22bad35c69c3
 2025-09-10 10:51:55,416 - INFO - Server IP address: 172.25.0.2
 2025-09-10 10:51:55,424 - INFO - Server is accessible at:
 2025-09-10 10:51:55,426 - INFO - - http://localhost:8000
 2025-09-10 10:51:55,444 - INFO - - http://127.0.0.1:8000
 2025-09-10 10:51:55,455 - INFO - - http://172.25.0.2:8000
 2025-09-10 10:51:55,456 - INFO - Pipeline initialized successfully
 2025-09-10 10:51:56,024 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 10:51:56,024 - INFO - [2025-09-10 10:51:56] Salary detection complete
 2025-09-10 10:52:36,450 - INFO - Initializing SalaryAnalyticsPipeline
 2025-09-10 10:52:36,451 - INFO - Starting data loading process
 2025-09-10 10:52:36,452 - INFO - No database connection. Attempting to connect...
 2025-09-10 10:52:36,452 - INFO - Attempting to connect to database...
 2025-09-10 10:52:36,538 - ERROR - Error connecting to database: No module named 'oracledb'
 2025-09-10 10:52:36,539 - ERROR - Failed to establish database connection
 2025-09-10 10:52:36,539 - ERROR - Failed to load data
 2025-09-10 10:52:36,540 - ERROR - Failed to load data
 2025-09-10 10:52:36,541 - INFO - Load data endpoint failed after 0.09 seconds
 2025-09-10 10:52:36,541 - ERROR - Error loading data: 500: Failed to load data
 2025-09-10 10:52:36,542 - INFO - Load data endpoint failed after 0.09 seconds
 2025-09-10 10:52:58,149 - INFO - Shutting down Salary Analytics API...
 2025-09-10 10:53:10,697 - INFO - generated new fontManager
 2025-09-10 10:53:16,039 - INFO - Initializing pipeline...
 2025-09-10 10:53:16,041 - INFO - [2025-09-10 10:53:16] Detecting salary...
 2025-09-10 10:53:16,042 - INFO - Started autonomous salary detection loop.
 2025-09-10 10:53:16,055 - INFO - Server running on hostname: 1c5d2376fb2a
 2025-09-10 10:53:16,056 - INFO - Server IP address: 172.25.0.2
 2025-09-10 10:53:16,057 - INFO - Server is accessible at:
 2025-09-10 10:53:16,058 - INFO - - http://localhost:8000
 2025-09-10 10:53:16,059 - INFO - - http://127.0.0.1:8000
 2025-09-10 10:53:16,060 - INFO - - http://172.25.0.2:8000
 2025-09-10 10:53:16,062 - INFO - Pipeline initialized successfully
 2025-09-10 10:53:16,812 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 10:53:16,813 - INFO - [2025-09-10 10:53:16] Salary detection complete
 2025-09-10 10:53:25,477 - INFO - Initializing SalaryAnalyticsPipeline
 2025-09-10 10:53:25,482 - INFO - Starting data loading process
 2025-09-10 10:53:25,483 - INFO - No database connection. Attempting to connect...
 2025-09-10 10:53:25,484 - INFO - Attempting to connect to database...
 2025-09-10 10:53:25,679 - ERROR - Error connecting to database: No module named 'oracledb'
 2025-09-10 10:53:25,679 - ERROR - Failed to establish database connection
 2025-09-10 10:53:25,680 - ERROR - Failed to load data
 2025-09-10 10:53:25,680 - ERROR - Failed to load data
 2025-09-10 10:53:25,681 - INFO - Load data endpoint failed after 0.20 seconds
 2025-09-10 10:53:25,682 - ERROR - Error loading data: 500: Failed to load data
 2025-09-10 10:53:25,682 - INFO - Load data endpoint failed after 0.20 seconds
 2025-09-10 10:53:46,324 - INFO - Shutting down Salary Analytics API...
 2025-09-10 11:07:49,841 - INFO - generated new fontManager
 2025-09-10 11:07:59,771 - INFO - Initializing pipeline...
 2025-09-10 11:07:59,774 - INFO - [2025-09-10 11:07:59] Detecting salary...
 2025-09-10 11:07:59,774 - INFO - Started autonomous salary detection loop.
 2025-09-10 11:07:59,893 - INFO - Server running on hostname: 0b21809edf52
 2025-09-10 11:07:59,894 - INFO - Server IP address: 172.25.0.2
 2025-09-10 11:07:59,895 - INFO - Server is accessible at:
 2025-09-10 11:07:59,915 - INFO - - http://localhost:8000
 2025-09-10 11:07:59,917 - INFO - - http://127.0.0.1:8000
 2025-09-10 11:07:59,918 - INFO - - http://172.25.0.2:8000
 2025-09-10 11:07:59,919 - INFO - Pipeline initialized successfully
 2025-09-10 11:08:01,268 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 11:08:01,269 - INFO - [2025-09-10 11:08:01] Salary detection complete
 2025-09-10 11:08:15,668 - INFO - Initializing SalaryAnalyticsPipeline
 2025-09-10 11:08:15,669 - INFO - Starting data loading process
 2025-09-10 11:08:15,671 - INFO - No database connection. Attempting to connect...
 2025-09-10 11:08:15,672 - INFO - Attempting to connect to database...
 2025-09-10 11:08:20,076 - ERROR - Error connecting to database: (oracledb.exceptions.DatabaseError) ORA-00936: missing expression
 Help: https://docs.oracle.com/error-help/db/ora-00936/
 [SQL: SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = 'customer_account_transaction_hx')]
 (Background on this error at: https://sqlalche.me/e/20/4xp6)
 2025-09-10 11:08:20,086 - ERROR - Failed to establish database connection
 2025-09-10 11:08:20,087 - ERROR - Failed to load data
 2025-09-10 11:08:20,092 - ERROR - Failed to load data
 2025-09-10 11:08:20,119 - INFO - Load data endpoint failed after 4.45 seconds
 2025-09-10 11:08:20,123 - ERROR - Error loading data: 500: Failed to load data
 2025-09-10 11:08:20,124 - INFO - Load data endpoint failed after 4.46 seconds
 2025-09-10 11:10:01,280 - INFO - [2025-09-10 11:10:01] Detecting salary...
 2025-09-10 11:10:02,367 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 11:10:02,368 - INFO - [2025-09-10 11:10:02] Salary detection complete
 2025-09-10 11:12:02,395 - INFO - [2025-09-10 11:12:02] Detecting salary...
 2025-09-10 11:12:03,234 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 11:12:03,256 - INFO - [2025-09-10 11:12:03] Salary detection complete
 2025-09-10 11:14:03,279 - INFO - [2025-09-10 11:14:03] Detecting salary...
 2025-09-10 11:14:04,266 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 11:14:04,267 - INFO - [2025-09-10 11:14:04] Salary detection complete
 2025-09-10 11:16:04,268 - INFO - [2025-09-10 11:16:04] Detecting salary...
 2025-09-10 11:16:05,133 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 11:16:05,134 - INFO - [2025-09-10 11:16:05] Salary detection complete
 2025-09-10 11:18:05,134 - INFO - [2025-09-10 11:18:05] Detecting salary...
 2025-09-10 11:18:05,955 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 11:18:05,956 - INFO - [2025-09-10 11:18:05] Salary detection complete
 2025-09-10 11:20:05,855 - INFO - [2025-09-10 11:20:05] Detecting salary...
 2025-09-10 11:20:06,453 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 11:20:06,463 - INFO - [2025-09-10 11:20:06] Salary detection complete
 2025-09-10 11:22:06,512 - INFO - [2025-09-10 11:22:06] Detecting salary...
 2025-09-10 11:22:07,087 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 11:22:07,088 - INFO - [2025-09-10 11:22:07] Salary detection complete
 2025-09-10 11:24:07,092 - INFO - [2025-09-10 11:24:07] Detecting salary...
 2025-09-10 11:24:08,235 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 11:24:08,236 - INFO - [2025-09-10 11:24:08] Salary detection complete
 2025-09-10 11:25:13,551 - INFO - Shutting down Salary Analytics API...
 2025-09-10 11:25:52,852 - INFO - Initializing pipeline...
 2025-09-10 11:25:52,857 - INFO - [2025-09-10 11:25:52] Detecting salary...
 2025-09-10 11:25:52,857 - INFO - Started autonomous salary detection loop.
 2025-09-10 11:25:52,866 - INFO - Server running on hostname: 0b21809edf52
 2025-09-10 11:25:52,876 - INFO - Server IP address: 172.25.0.2
 2025-09-10 11:25:52,881 - INFO - Server is accessible at:
 2025-09-10 11:25:52,882 - INFO - - http://localhost:8000
 2025-09-10 11:25:52,883 - INFO - - http://127.0.0.1:8000
 2025-09-10 11:25:52,884 - INFO - - http://172.25.0.2:8000
 2025-09-10 11:25:52,884 - INFO - Pipeline initialized successfully
 2025-09-10 11:25:53,580 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 11:25:53,581 - INFO - [2025-09-10 11:25:53] Salary detection complete
 2025-09-10 11:27:53,578 - INFO - [2025-09-10 11:27:53] Detecting salary...
 2025-09-10 11:27:54,235 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 11:27:54,235 - INFO - [2025-09-10 11:27:54] Salary detection complete
 2025-09-10 11:29:54,235 - INFO - [2025-09-10 11:29:54] Detecting salary...
 2025-09-10 11:29:54,827 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-10 11:29:54,827 - INFO - [2025-09-10 11:29:54] Salary detection complete
 2025-09-10 11:30:20,979 - INFO - Shutting down Salary Analytics API...
 2025-09-10 11:31:18,351 - INFO - Initializing pipeline...
 2025-09-10 11:31:18,375 - INFO - [2025-09-10 11:31:18] Detecting salary...
 2025-09-10 11:31:18,376 - INFO - Started autonomous salary detection loop.
 2025-09-10 11:31:18,405 - INFO - Server running on hostname: 0b21809edf52
 2025-09-10 11:31:18,447 - INFO - Server IP address: 172.25.0.2
 2025-09-10 11:31:18,528 - INFO - Server is accessible at:
 2025-09-10 11:31:18,552 - INFO - - http://localhost:8000
 2025-09-10 11:31:18,552 - INFO - - http://127.0.0.1:8000
 2025-09-10 11:31:18,553 - INFO - - http://172.25.0.2:8000
 2025-09-10 11:31:18,554 - INFO - Pipeline initialized successfully
 2025-09-19 11:27:20,722 - INFO - Initializing pipeline...
 2025-09-19 11:27:20,724 - INFO - [2025-09-19 11:27:20] Detecting salary...
 2025-09-19 11:27:20,724 - INFO - Started autonomous salary detection loop.
 2025-09-19 11:27:20,750 - INFO - Server running on hostname: 0b21809edf52
 2025-09-19 11:27:20,752 - INFO - Server IP address: 172.25.0.2
 2025-09-19 11:27:20,753 - INFO - Server is accessible at:
 2025-09-19 11:27:20,759 - INFO - - http://localhost:8000
 2025-09-19 11:27:20,761 - INFO - - http://127.0.0.1:8000
 2025-09-19 11:27:20,766 - INFO - - http://172.25.0.2:8000
 2025-09-19 11:27:20,771 - INFO - Pipeline initialized successfully
 2025-09-19 11:27:24,622 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-19 11:27:24,628 - INFO - [2025-09-19 11:27:24] Salary detection complete
 2025-09-19 11:29:13,095 - INFO - Shutting down Salary Analytics API...
 2025-09-24 09:47:38,012 - INFO - Initializing pipeline...
 2025-09-24 09:47:38,017 - INFO - [2025-09-24 09:47:38] Detecting salary...
 2025-09-24 09:47:38,024 - INFO - Started autonomous salary detection loop.
 2025-09-24 09:47:38,077 - INFO - Server running on hostname: 0b21809edf52
 2025-09-24 09:47:38,079 - INFO - Server IP address: 172.25.0.2
 2025-09-24 09:47:38,080 - INFO - Server is accessible at:
 2025-09-24 09:47:38,081 - INFO - - http://localhost:8000
 2025-09-24 09:47:38,082 - INFO - - http://127.0.0.1:8000
 2025-09-24 09:47:38,083 - INFO - - http://172.25.0.2:8000
 2025-09-24 09:47:38,084 - INFO - Pipeline initialized successfully
 2025-09-24 09:47:41,272 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-24 09:47:41,273 - INFO - [2025-09-24 09:47:41] Salary detection complete
 2025-09-24 09:49:41,290 - INFO - [2025-09-24 09:49:41] Detecting salary...
 2025-09-24 09:49:43,791 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-24 09:49:43,791 - INFO - [2025-09-24 09:49:43] Salary detection complete
 2025-09-24 09:50:21,300 - INFO - Shutting down Salary Analytics API...
 2025-09-24 09:59:27,300 - INFO - generated new fontManager
 2025-09-24 09:59:30,871 - INFO - Initializing pipeline...
 2025-09-24 09:59:30,872 - INFO - [2025-09-24 09:59:30] Detecting salary...
 2025-09-24 09:59:30,872 - INFO - Started autonomous salary detection loop.
 2025-09-24 09:59:30,877 - INFO - Server running on hostname: bfe07c2f7da2
 2025-09-24 09:59:30,878 - INFO - Server IP address: 172.25.0.2
 2025-09-24 09:59:30,878 - INFO - Server is accessible at:
 2025-09-24 09:59:30,883 - INFO - - http://localhost:8000
 2025-09-24 09:59:30,884 - INFO - - http://127.0.0.1:8000
 2025-09-24 09:59:30,887 - INFO - - http://172.25.0.2:8000
 2025-09-24 09:59:30,889 - INFO - Pipeline initialized successfully
 2025-09-24 09:59:32,676 - INFO - POST http://www.simbrellang.net:5000/autocall/analytic-salary-detect status: 200, response: {
  "data": [],
  "error": {},
  "message": "AutoCall Add Salary Successful",
  "status": true,
  "statusCode": 200
 }
 2025-09-24 09:59:32,705 - INFO - [2025-09-24 09:59:32] Salary detection complete
@@ -1,17 +1,21 @@
 from flask import Flask
 import os
-from .extensions import db, migrate
+from app.extensions import db, migrate
 def create_app():
    app = Flask(__name__)
-    app.config.from_object('salary_analytics.config')
+    
    # Load configuration from config.py
    app.config.from_object('app.config')
    # Initialize extensions
    db.init_app(app)
    migrate.init_app(app, db)
    # Register blueprints or CLI commands here if needed
-    from . import commands
+    from app.api.commands import commands
    app.cli.add_command(commands.upload_xls_cli)
    return app 
@@ -2,8 +2,8 @@ import click
 import pandas as pd
 from datetime import datetime
 from flask.cli import with_appcontext
-from salary_analytics.app.extensions import db
+from app.extensions import db
-from salary_analytics.app.models import RawTransaction
+from app.models import RawTransaction
@click.group()
 def commands():
@@ -22,20 +22,32 @@ os.makedirs(PLOTS_DIR, exist_ok=True)
 os.makedirs(CSV_DIR, exist_ok=True)
 os.makedirs(MODEL_DIR, exist_ok=True)
 # Database Configuration
 DB_CONFIG = {
-    "user": os.getenv("DB_USER"),  # Default value as fallback
+    "user": os.getenv("DATABASE_USER"), 
-    "password": os.getenv("DB_PASSWORD"),
+    "password": os.getenv("DATABASE_PASSWORD"),
-    "name": os.getenv("DB_NAME"),
+    "name": os.getenv("DATABASE_NAME"),
-    "port": os.getenv("DB_PORT"),
+    "port": os.getenv("DATABASE_PORT", 10532),
-    "host": os.getenv("DB_HOST")
+    "host": os.getenv("DATABASE_HOST", "firstadvancedev"),
    "sid": os.getenv("DATABASE_SID", "FREE"),
    "service_name": os.getenv("DATABASE_SERVICE_NAME", "firstadv")
 }
 # DNS =  f"(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST={DB_CONFIG['host']})(PORT={DB_CONFIG['port']}))(CONNECT_DATA=(SID={DB_CONFIG['sid']})))"
 # Database Connection
 # SQLALCHEMY_DATABASE_URI = (f"oracle+oracledb://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DNS}")
 SQLALCHEMY_DATABASE_URI = ( f"oracle+oracledb://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/?service_name={DB_CONFIG['service_name']}")
 # SQLAlchemy Configuration
-SQLALCHEMY_DATABASE_URI = (
+# SQLALCHEMY_DATABASE_URI = (
-    f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@"
+#     f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@"
-    f"{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}"
+#     f"{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}"
-)
+# )
 SQLALCHEMY_TRACK_MODIFICATIONS = False
 # Table Configuration
@@ -80,7 +92,7 @@ OUTPUT_PATHS = {
 } 
 SIMBRELLA_BASE_URL = os.getenv("SIMBRELLA_BASE_URL", "http://127.0.0.1:6337")
-SIMBRELLA_ENDPOINT_RAC_CHECKS = os.getenv("SIMBRELLA_ENDPOINT_RAC_CHECKS","api/rac-check")
+SIMBRELLA_ENDPOINT_RAC_CHECKS = os.getenv("SIMBRELLA_ENDPOINT_RAC_CHECKS", "api/rac-check")
 # Salary Detect Endpoint Config
 SALARY_DETECT_URL = "http://www.simbrellang.net:5000/autocall/analytic-salary-detect"
@@ -0,0 +1,4 @@
 from .raw_transaction import RawTransaction
 __all__ = ['RawTransaction']
@@ -0,0 +1,18 @@
 from app.extensions import db
 class Account(db.Model):
    __tablename__ = "accounts"
    customerid = db.Column(db.String(50), primary_key=True)
    accountid = db.Column(db.String(11), nullable=False)
    registrationdate = db.Column(db.DateTime)
    accountcurrencycode = db.Column(db.String(3))
    schemecode = db.Column(db.String(5))
    lastinflowtransactiondate = db.Column(db.DateTime)
    accountstatus = db.Column(db.String(50))
    accountstatusdate = db.Column(db.DateTime)
    def __repr__(self):
        return f"<Account {self.accountid}>"
@@ -0,0 +1,97 @@
 from sqlalchemy import Column, Integer, String, DateTime, Numeric, Boolean, func
 from sqlalchemy.orm import declarative_base, Session
 from datetime import datetime
 from app.utils.logger import logger
 from app.extensions import db
 class BatchResult(db.Model):
    __tablename__ = "salary_analytics_batch_results"
    id = Column(Integer, primary_key=True, autoincrement=True)
    batch_number = Column(Integer, nullable=False)
    total_batches = Column(Integer, nullable=False)
    processed_at = Column(DateTime, default=datetime.utcnow)
    accountid = Column(String, nullable=False)
    num_months = Column(Integer)
    least_inflow_6m = Column(Numeric)
    avg_monthly_salary = Column(Numeric)
    estimated_next_amount = Column(Numeric)
    estimated_next_date = Column(DateTime)
    is_45day_salary = Column(Boolean, default=False)
    is_2months_salary = Column(Boolean, default=False)
    status = Column(String, default="success")
    @classmethod
    def save_batch(cls, batch_number, total_batches, results_df, status="success"):
        """Save batch results into DB using ORM bulk insert."""
        try:
            results_df["batch_number"] = batch_number
            results_df["total_batches"] = total_batches
            results_df["processed_at"] = datetime.utcnow()
            results_df["status"] = status
            # Normalize boolean columns
            results_df["is_45day_salary"] = results_df.get("45daysalary", False)
            results_df["is_2months_salary"] = results_df.get("2monthssalary", False)
            # Convert to list of ORM objects
            records = [
                cls(**row)
                for row in results_df.to_dict("records")
            ]
            db.session.bulk_save_objects(records)
            db.session.commit()
            logger.info(f"Saved batch {batch_number} successfully.")
            return True
        except Exception as e:
            db.session.rollback()
            logger.error(f"Error saving batch {batch_number}: {str(e)}")
            return False
    @classmethod
    def get_batch_status(cls, batch_number: int):
        """Return summary info about one batch."""
        try:
            result = (
                db.session.query(
                    cls.batch_number,
                    cls.total_batches,
                    cls.processed_at,
                    func.count().label("total_records"),
                    func.sum(func.case((cls.status == "success", 1), else_=0)).label("successful_records"),
                    func.sum(func.case((cls.status == "error", 1), else_=0)).label("failed_records"),
                )
                .filter(cls.batch_number == batch_number)
                .group_by(cls.batch_number, cls.total_batches, cls.processed_at)
                .order_by(cls.processed_at.desc())
                .first()
            )
            return dict(result._mapping) if result else None
        except Exception as e:
            logger.error(f"Error fetching batch {batch_number} status: {str(e)}")
            return None
    @classmethod
    def get_all_batches(cls):
        """Return summaries for all batches."""
        try:
            results = (
                db.session.query(
                    cls.batch_number,
                    cls.total_batches,
                    cls.processed_at,
                    func.count().label("total_records"),
                    func.sum(func.case((cls.status == "success", 1), else_=0)).label("successful_records"),
                    func.sum(func.case((cls.status == "error", 1), else_=0)).label("failed_records"),
                )
                .group_by(cls.batch_number, cls.total_batches, cls.processed_at)
                .order_by(cls.batch_number)
                .all()
            )
            return [dict(r._mapping) for r in results]
        except Exception as e:
            logger.error(f"Error fetching all batches: {str(e)}")
            return []
@@ -0,0 +1,100 @@
 from venv import logger
 from sqlalchemy import Column, Integer, String, Float, DateTime, ForeignKey
 from sqlalchemy.orm import relationship
 from app.extensions import db 
 import pandas as pd
 class CustomerAccountTransactionHx(db.Model):
    __tablename__ = "customer_account_transaction_hx"
    id = Column(Integer, primary_key=True, autoincrement=True)
    accountid = Column(String(64), nullable=False, index=True)
    trx_type = Column(String(50), nullable=False)
    amount = Column(Float, nullable=False)
    description = Column(String(255))
    customer_id = Column(String(64))
    trx_start_date = Column(DateTime, nullable=False)
    trx_end_date = Column(DateTime)
    is_salary_related = Column(Integer, default=0)  
    is_consistent_amount = Column(Integer, default=0)
    is_salary_type = Column(Integer, default=0)
    @classmethod
    def get_all(cls):
        """Fetch all transactions."""
        return db.session.query(cls).all()
    @classmethod
    def get_rows_count(cls):
        """Return total number of transaction rows."""
        try:
            count = db.session.query(db.func.count(cls.id)).scalar()
            return count
        except Exception as e:
            logger.error(f"Error getting row count: {str(e)}")
            return None
    @classmethod
    def get_by_account(cls, accountid: str):
        """Fetch transactions for a given account."""
        return db.session.query(cls).filter_by(accountid=accountid).all()
    @classmethod
    def get_accounts(cls, limit=None):
        """Fetch distinct account IDs."""
        query = db.session.query(cls.accountid).distinct()
        if limit:
            query = query.limit(limit)
        return [row.accountid for row in query.all()]
    @classmethod
    def insert_transaction(cls, **kwargs):
        """Insert a new transaction."""
        trx = cls(**kwargs)
        try:
            db.session.add(trx)
            db.session.commit()
        except Exception as e:
            logger.error(f"Error inserting transaction: {str(e)}")
            return None
        return trx
    @classmethod
    def bulk_insert(cls, transactions: list[dict]):
        """Insert multiple transactions at once."""
        objs = [cls(**trx) for trx in transactions]
        try:
            db.session.bulk_save_objects(objs)
            db.session.commit()
        except Exception as e:
            logger.error(f"Error in bulk insert: {str(e)}")
            return None
        return objs
    @classmethod
    def get_transactions_df(cls, accountids: list[str] = None):
        """Return a Pandas DataFrame for ML model preparation."""
        query = db.session.query(cls)
        if accountids:
            query = query.filter(cls.accountid.in_(accountids))
        rows = query.all()
        df = pd.DataFrame([{
            "id": trx.id,
            "accountid": trx.accountid,
            "trx_type": trx.trx_type,
            "amount": trx.amount,
            "description": trx.description,
            "customer_id": trx.customer_id,
            "trx_start_date": trx.trx_start_date,
            "trx_end_date": trx.trx_end_date,
            "is_salary_related": trx.is_salary_related,
            "is_consistent_amount": trx.is_consistent_amount,
            "is_salary_type": trx.is_salary_type,
        } for trx in rows])
        return df
@@ -2,12 +2,10 @@
 Database operations module for salary analytics.
 """
 import logging
 from sqlalchemy import text
-from .config import BATCH_RESULTS_TABLE
+from app.config import BATCH_RESULTS_TABLE
 from datetime import datetime
-
+from app.utils.logger import logger
 logger = logging.getLogger(__name__)
 class DatabaseOperations:
    def __init__(self, engine):
@@ -1,4 +1,4 @@
-from .extensions import db
+from app.extensions import db
 class RawTransaction(db.Model):
    __tablename__ = 'analytics_raw_transactions'
@@ -0,0 +1,36 @@
 from app.extensions import db
 class SimbrellaCustomer(db.Model):
    __tablename__ = "simbrella_customers"
    customerid = db.Column(db.String(500), primary_key=True)
    acct_opn_date = db.Column(db.DateTime)
    gender = db.Column(db.String(20))
    birth_date = db.Column(db.DateTime)
    msisdn = db.Column(db.String(100))
    isbvnvalid = db.Column(db.String(100))
    datebvnvalidated = db.Column(db.DateTime)
    iscrmsvalid = db.Column(db.String(100))
    datecrmsvalidated = db.Column(db.DateTime)
    iscrcvalid = db.Column(db.String(100))
    datecrcvalidated = db.Column(db.DateTime)
    def __repr__(self):
        return f"<SimbrellaCustomer {self.customerid}>"
    def to_dict(self):
        return {
            "customerid": self.customerid,
            "acct_opn_date": self.acct_opn_date,
            "gender": self.gender,
            "birth_date": self.birth_date,
            "msisdn": self.msisdn,
            "isbvnvalid": self.isbvnvalid,
            "datebvnvalidated": self.datebvnvalidated,
            "iscrmsvalid": self.iscrmsvalid,
            "datecrmsvalidated": self.datecrmsvalidated,
            "iscrcvalid": self.iscrcvalid,
            "datecrcvalidated": self.datecrcvalidated,
        }
@@ -0,0 +1,29 @@
 from app.extensions import db
 class TempSimbrellaLien(db.Model):
    __tablename__ = "tmp_simbrella_lien"
    lienid = db.Column(db.String(33), primary_key=True)
    customerid = db.Column(db.String(500))
    accountid = db.Column(db.String(11))
    lien_start_date = db.Column(db.DateTime)
    action = db.Column(db.String(5))
    restriction_nature = db.Column(db.String(150))
    claim_amount = db.Column(db.Numeric(20, 4))
    def __repr__(self):
        return f"<TempSimbrellaLien {self.lienid}>"
    def to_dict(self):
        return {
            "lienid": self.lienid,
            "customerid": self.customerid,
            "accountid": self.accountid,
            "lien_start_date": self.lien_start_date,
            "action": self.action,
            "restriction_nature": self.restriction_nature,
            "claim_amount": float(self.claim_amount) if self.claim_amount is not None else None,
        }
@@ -0,0 +1,45 @@
 from app.extensions import db
 class Transaction(db.Model):
    __tablename__ = "transactions"
    tran_id = db.Column(db.String(50), primary_key=True)
    cif_id = db.Column(db.String(500))
    foracid = db.Column(db.String(150))
    acid = db.Column(db.String(150))
    tran_date = db.Column(db.DateTime)
    value_date = db.Column(db.DateTime)
    pstd_date = db.Column(db.DateTime)
    tran_sub_type = db.Column(db.String(50))
    part_tran_type = db.Column(db.String(50))
    tran_crncy_code = db.Column(db.String(50))
    tran_amt = db.Column(db.Numeric(38, 0))
    tran_particular = db.Column(db.String(250))
    origination_channel = db.Column(db.String(150))
    reversal_tran_id = db.Column(db.String(50))
    isreversal = db.Column(db.String(50))
 def __repr__(self):
    return f"<Transaction {self.tran_id}>" 
 def to_dict(self):
    return {
        "tran_id": self.tran_id,
        "cif_id": self.cif_id,
        "foracid": self.foracid,
        "acid": self.acid,
        "tran_date": self.tran_date,
        "value_date": self.value_date,
        "pstd_date": self.pstd_date,
        "tran_sub_type": self.tran_sub_type,
        "part_tran_type": self.part_tran_type,
        "tran_crncy_code": self.tran_crncy_code,
        "tran_amt": float(self.tran_amt) if self.tran_amt is not None else None,
        "tran_particular": self.tran_particular,
        "origination_channel": self.origination_channel,
        "reversal_tran_id": self.reversal_tran_id,
        "isreversal": self.isreversal,
    }
@@ -0,0 +1,45 @@
 from app.extensions import db
 class TransactionStg(db.Model):
    __tablename__ = "transaction_stg"
    tran_id = db.Column(db.String(50), primary_key=True)
    cif_id = db.Column(db.String(500))
    foracid = db.Column(db.String(150))
    acid = db.Column(db.String(150))
    tran_date = db.Column(db.String(50))  # staging table keeps as string
    value_date = db.Column(db.String(50))
    pstd_date = db.Column(db.String(50))
    tran_sub_type = db.Column(db.String(50))
    part_tran_type = db.Column(db.String(50))
    tran_crncy_code = db.Column(db.String(50))
    tran_amt = db.Column(db.Numeric(38, 0))
    tran_particular = db.Column(db.String(250))
    origination_channel = db.Column(db.String(150))
    reversal_tran_id = db.Column(db.String(50))
    isreversal = db.Column(db.String(50))
    def __repr__(self):
        return f"<TransactionStg {self.tran_id}>"
    def to_dict(self):
        return {
            "tran_id": self.tran_id,
            "cif_id": self.cif_id,
            "foracid": self.foracid,
            "acid": self.acid,
            "tran_date": self.tran_date,
            "value_date": self.value_date,
            "pstd_date": self.pstd_date,
            "tran_sub_type": self.tran_sub_type,
            "part_tran_type": self.part_tran_type,
            "tran_crncy_code": self.tran_crncy_code,
            "tran_amt": float(self.tran_amt) if self.tran_amt is not None else None,
            "tran_particular": self.tran_particular,
            "origination_channel": self.origination_channel,
            "reversal_tran_id": self.reversal_tran_id,
            "isreversal": self.isreversal,
        }
@@ -0,0 +1,38 @@
 from fastapi import FastAPI
 from app.salary_analytics.routes import analysis, reports, pipeline, load, base, train
 from app.salary_analytics.middlewares.middleware import add_middlewares
 from app.salary_analytics.events.lifecycle import register_events
 from app.utils.logger import logger
 import socket
 """
 Salary Analytics Package
 A package for analyzing and predicting salary patterns from transaction data.
 """
 __version__ = "0.1.0" 
 def create_app() -> FastAPI:
    app = FastAPI(
        title="Salary Analytics API",
        description="API for analyzing and predicting salary patterns from transaction data",
        version="1.0.0"
    )
    # Middlewares
    add_middlewares(app)
    # Events
    register_events(app)
    # Routers
    app.include_router(base.router, tags=["Base"])
    app.include_router(analysis.router, prefix="/analyze", tags=["Analysis"])
    app.include_router(reports.router, tags=["Reports"])
    app.include_router(pipeline.router, tags=["Pipeline"])
    app.include_router(load.router, tags=["Data"])
    app.include_router(train.router, tags=["Model Training"])
    return app
@@ -0,0 +1,64 @@
 from app.salary_analytics.services.main import SalaryAnalyticsPipeline
 from app.salary_analytics.services.data_loader import DataLoader
 class GlobalState:
    def __init__(self):
        self._pipeline = None
        self._data_loader = None
        self.df = None
        self.salary_predictor = None
        self.salary_earner_analyzer = None
    # ---- Pipeline ----
    @property
    def pipeline(self):
        if self._pipeline is None:
            self._pipeline = SalaryAnalyticsPipeline()
        return self._pipeline
    @pipeline.setter
    def pipeline(self, value):
        self._pipeline = value
    # ---- Data Loader ----
    @property
    def data_loader(self):
        if self._data_loader is None:
            self._data_loader = DataLoader()
        return self._data_loader
    @data_loader.setter
    def data_loader(self, value):
        self._data_loader = value
    # ---- DataFrame ----
    @property
    def df(self):
        return self._df
    @df.setter
    def df(self, value):
        self._df = value
    # ---- Salary Predictor ----
    @property
    def salary_predictor(self):
        return self._salary_predictor
    @salary_predictor.setter
    def salary_predictor(self, value):
        self._salary_predictor = value
    # ---- Salary Earner Analyzer ----
    @property
    def salary_earner_analyzer(self):
        return self._salary_earner_analyzer
    @salary_earner_analyzer.setter
    def salary_earner_analyzer(self, value):
        self._salary_earner_analyzer = value
 state = GlobalState()
@@ -0,0 +1,36 @@
 import socket
 from fastapi import FastAPI
 from app.salary_analytics.integrations.salary_detect import SalaryDetect
 from app.utils.logger import logger
 salary_detect = SalaryDetect()
 def register_events(app: FastAPI):
    @app.on_event("startup")
    async def startup_event():
        """Initialize the pipeline on startup."""
        try:
            logger.info("Initializing pipeline...")
            # Start autonomous salary detection loop
            salary_detect.start()
            logger.info("Started autonomous salary detection loop.")
            # Print network information
            hostname = socket.gethostname()
            ip_address = socket.gethostbyname(hostname)
            logger.info(f"Server running on hostname: {hostname}")
            logger.info(f"Server IP address: {ip_address}")
            logger.info(f"Server is accessible at:")
            logger.info(f"- http://localhost:8000")
            logger.info(f"- http://127.0.0.1:8000")
            logger.info(f"- http://{ip_address}:8000")
            logger.info("Pipeline initialized successfully")
        except Exception as e:
            logger.error(f"Error during startup: {str(e)}")
            raise
    @app.on_event("shutdown")
    async def shutdown_event():
        logger.info("Shutting down Salary Analytics API...")
@@ -0,0 +1,12 @@
 from fastapi import HTTPException
 from app.salary_analytics.core.state import state
 def check_data_loaded():
    """Raise HTTP 400 if no data is loaded into the pipeline."""
    if state.pipeline.df is None:
        raise HTTPException(
            status_code=400,
            detail="No data loaded. Please load data first using the /load-data endpoint."
        )
    return True
@@ -0,0 +1,17 @@
 from typing import Optional, Dict, List, Union
 from pydantic import BaseModel
 class AnalysisResponse(BaseModel):
    """Response model for analysis endpoints."""
    message: str
    data: Optional[Dict] = None
    file_path: Optional[str] = None
 class BatchResponse(BaseModel):
    """Response model for batch processing."""
    batch_number: int
    total_batches: int
    processed_rows: int
    results_path: str
    message: str
@@ -1,10 +1,8 @@
 from django.conf import settings
 import httpx
 import json
-from salary_analytics.config import SIMBRELLA_BASE_URL, SIMBRELLA_ENDPOINT_RAC_CHECKS
+from app.config import SIMBRELLA_BASE_URL, SIMBRELLA_ENDPOINT_RAC_CHECKS
-import logging
+from app.utils.logger import logger
 logger = logging.getLogger(__name__)
 class SimbrellaIntegration:
    BASE_URL = SIMBRELLA_BASE_URL
@@ -1,11 +1,8 @@
 import time
 import logging
 import threading
 import requests
-from .config import SALARY_DETECT_URL, SALARY_DETECT_HEADERS, get_random_salary_payload
+from app.config import SALARY_DETECT_URL, SALARY_DETECT_HEADERS, get_random_salary_payload
-
+from app.utils.logger import logger
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class SalaryDetect:
    def __init__(self):
@@ -0,0 +1,11 @@
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi import FastAPI
 def add_middlewares(app: FastAPI):
    app.add_middleware(
        CORSMiddleware,
        allow_origins=["*"],
        allow_credentials=True,
        allow_methods=["*"],
        allow_headers=["*"],
    )
@@ -0,0 +1,75 @@
 from fastapi import APIRouter, HTTPException
 from app.salary_analytics.helpers.response_helpers import AnalysisResponse
 from app.salary_analytics.helpers.data_checks import check_data_loaded
 from app.utils.logger import logger
 import time
 from app.salary_analytics.core.state import state
 router = APIRouter()
@router.post("/keyword", response_model=AnalysisResponse)
 async def analyze_keyword():
    """Run keyword-based salary transaction analysis."""
    start_time = time.time()
    try:
        check_data_loaded()
        logger.info("Starting keyword analysis...")
        data = state.pipeline.run_keyword_analysis()
        logger.info(f"Keyword analysis completed. Found {len(data)} matches")
        response = AnalysisResponse(
            message="Keyword analysis completed successfully",
            data={"count": len(data)}
        )
        logger.info(f"Keyword analysis endpoint completed in {time.time() - start_time:.2f} seconds")
        return response
    except Exception as e:
        logger.error(f"Error in keyword analysis: {str(e)}")
        logger.info(f"Keyword analysis endpoint failed after {time.time() - start_time:.2f} seconds")
        raise HTTPException(status_code=500, detail=str(e))
@router.post("/consistent-amount", response_model=AnalysisResponse)
 async def analyze_consistent_amount():
    """Run consistent amount transaction analysis."""
    start_time = time.time()
    try:
        check_data_loaded()
        logger.info("Starting consistent amount analysis...")
        data = state.pipeline.run_consistent_amount_analysis()
        logger.info(f"Consistent amount analysis completed. Found {len(data)} matches")
        response = AnalysisResponse(
            message="Consistent amount analysis completed successfully",
            data={"count": len(data)}
        )
        logger.info(f"Consistent amount analysis endpoint completed in {time.time() - start_time:.2f} seconds")
        return response
    except Exception as e:
        logger.error(f"Error in consistent amount analysis: {str(e)}")
        logger.info(f"Consistent amount analysis endpoint failed after {time.time() - start_time:.2f} seconds")
        raise HTTPException(status_code=500, detail=str(e))
@router.post("/transaction-type", response_model=AnalysisResponse)
 async def analyze_transaction_type():
    """Run transaction type analysis."""
    start_time = time.time()
    try:
        check_data_loaded()
        logger.info("Starting transaction type analysis...")
        data = state.pipeline.run_transaction_type_analysis()
        logger.info(f"Transaction type analysis completed. Found {len(data)} matches")
        response = AnalysisResponse(
            message="Transaction type analysis completed successfully",
            data={"count": len(data)}
        )
        logger.info(f"Transaction type analysis endpoint completed in {time.time() - start_time:.2f} seconds")
        return response
    except Exception as e:
        logger.error(f"Error in transaction type analysis: {str(e)}")
        logger.info(f"Transaction type analysis endpoint failed after {time.time() - start_time:.2f} seconds")
        raise HTTPException(status_code=500, detail=str(e))
@@ -0,0 +1,28 @@
 from fastapi import APIRouter
 from app.utils.logger import logger
 import time
 router = APIRouter()
@router.get("/")
 async def root():
    """Root endpoint."""
    start_time = time.time()
    logger.info("Root endpoint accessed")
    response = {"message": "Welcome to Salary Analytics API"}
    logger.info(f"Root endpoint completed in {time.time() - start_time:.2f} seconds")
    return response
@router.get("/health")
 async def health_check():
    """Health check endpoint."""
    start_time = time.time()
    logger.info("Health check endpoint accessed")
    response = {"status": "healthy"}
    logger.info(f"Health check completed in {time.time() - start_time:.2f} seconds")
    return response
@@ -0,0 +1,74 @@
 from fastapi import APIRouter, HTTPException, UploadFile, File
 from app.salary_analytics.core.state import state
 from app.utils.logger import logger
 import tempfile, os, time
 from typing import Optional
 router = APIRouter()
@router.post("/load-data")
 async def load_data(source: str = "db", file: Optional[UploadFile] = File(None)):
    """
    Load data from either database or CSV file.
    Args:
        source (str): Source of data ('db' or 'csv')
        file (UploadFile, optional): CSV file to load (required if source is 'csv')
    Returns:
        dict: Status of data loading
    """
    start_time = time.time()
    try:
        if source not in ['db', 'csv']:
            logger.error(f"Invalid source: {source}")
            logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'")
        if source == 'csv' and not file:
            logger.error("No file provided for CSV source")
            logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=400, detail="File must be provided when loading from CSV")
        if source == 'csv':
            # Save uploaded file temporarily
            with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file:
                content = await file.read()
                temp_file.write(content)
                temp_file_path = temp_file.name
            try:
                success = state.pipeline.load_data(source='csv', file_path=temp_file_path)
            finally:
                # Clean up temporary file
                os.unlink(temp_file_path)
        else:
            success = state.pipeline.load_data(source='db')
        if not success:
            logger.error("Failed to load data")
            logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=500, detail="Failed to load data")
        response = {
            "status": "success",
            "message": f"Successfully loaded {len(state.pipeline.df)} rows of data",
            "columns": state.pipeline.df.columns.tolist(),
            "row_count": len(state.pipeline.df)
        }
        logger.info(f"Load data endpoint completed in {time.time() - start_time:.2f} seconds")
        return response
    except Exception as e:
        logger.error(f"Error loading data: {str(e)}")
        logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
        raise HTTPException(status_code=500, detail=str(e))
@router.post("/load-data-with-file")
 async def get_file_if_csv(source: str, file: Optional[UploadFile] = File(None)):
    """Dependency to handle file upload only when source is csv."""
    if source == 'csv' and not file:
        raise HTTPException(status_code=400, detail="File must be provided when loading from CSV")
    return file
@@ -0,0 +1,292 @@
 from fastapi import APIRouter, HTTPException
 from app.salary_analytics.services.main import SalaryAnalyticsPipeline
 from app.salary_analytics.helpers.response_helpers import AnalysisResponse, BatchResponse
 from app.salary_analytics.helpers.data_checks import check_data_loaded
 from app.salary_analytics.services.data_loader import DataLoader
 from app.salary_analytics.core.state import state
 from app.models.db_operations import DatabaseOperations
 from app.config import OUTPUT_PATHS, TABLE_NAME
 from app.utils.logger import logger
 from typing import Optional, List, Union
 from sqlalchemy import text
 from datetime import datetime
 import pandas as pd, os, tempfile, time
 from typing import Optional, Union
 from fastapi import UploadFile, File
 router = APIRouter()
@router.post("/run/pipeline", response_model=AnalysisResponse)
 async def run_full_pipeline():
    """Run the complete salary analytics pipeline."""
    start_time = time.time()
    try:
        check_data_loaded()
        logger.info("Starting full pipeline...")
        success = state.pipeline.run_full_pipeline()
        if not success:
            logger.error("Pipeline failed")
            logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=500, detail="Pipeline failed")
        logger.info("Pipeline completed successfully")
        response = AnalysisResponse(
            message="Pipeline completed successfully"
        )
        logger.info(f"Full pipeline endpoint completed in {time.time() - start_time:.2f} seconds")
        return response
    except Exception as e:
        logger.error(f"Error in pipeline: {str(e)}")
        logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
        raise HTTPException(status_code=500, detail=str(e))
@router.post("/run/streaming-pipeline", response_model=List[BatchResponse])
 async def run_streaming_pipeline(
    source: str = "db",
    batch_size: int = 10000,
    file: Optional[Union[UploadFile, str]] = File(None)
 ):
    """
    Run the complete salary analytics pipeline in batches.
    Args:
        source (str): Source of data ('db' or 'csv')
        batch_size (int): Number of rows to process in each batch
        file (UploadFile, optional): CSV file to load (required if source is 'csv')
    Returns:
        List[BatchResponse]: List of responses for each batch processed
    """
    start_time = time.time()
    try:
        if source not in ['db', 'csv']:
            logger.error(f"Invalid source: {source}")
            logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'")
        if source == 'csv' and not file:
            logger.error("No file provided for CSV source")
            logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=400, detail="File must be provided when loading from CSV")
        # Initialize data loader
        state.data_loader = DataLoader()
        state.data_loader.chunk_size = batch_size
        # Create output directory for batch results
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        batch_output_dir = os.path.join(os.path.dirname(OUTPUT_PATHS['final_table']), f"batch_results_{timestamp}")
        os.makedirs(batch_output_dir, exist_ok=True)
        # Initialize database operations
        if not state.data_loader.connect():
            logger.error("Failed to connect to database")
            logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=500, detail="Failed to connect to database")
        db_ops = DatabaseOperations(state.data_loader.engine)
        if not db_ops.create_batch_results_table():
            logger.error("Failed to create batch results table")
            logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=500, detail="Failed to create batch results table")
        responses = []
        batch_number = 0
        batch_start_time = time.time()
        def preprocess_chunk(chunk):
            """Preprocess a chunk of data with the same logic as DataLoader."""
            # Convert dates
            chunk['trx_start_date'] = pd.to_datetime(chunk['trx_start_date'])
            chunk['trx_end_date'] = pd.to_datetime(chunk['trx_end_date'])
            # Rename columns
            chunk = chunk.rename(columns={
                'd1': 'trx_type',
                'd2': 'trx_subtype',
                'd3': 'initiated_by',
                'd4': 'customer_id'
            })
            chunk = chunk.dropna()
            return chunk
        if source == 'csv':
            # Save uploaded file temporarily
            with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file:
                content = await file.read()
                temp_file.write(content)
                temp_file_path = temp_file.name
            try:
                # Process CSV in chunks
                for chunk in pd.read_csv(temp_file_path, chunksize=batch_size):
                    batch_number += 1
                    logger.info(f"Processing batch {batch_number}")
                    # Preprocess chunk
                    chunk = preprocess_chunk(chunk)
                    # Run pipeline on chunk
                    state.pipeline = SalaryAnalyticsPipeline()
                    state.pipeline.df = chunk
                    try:
                        batch_start_time = time.time()
                        # Run analyses
                        state.pipeline.run_keyword_analysis()
                        state.pipeline.run_consistent_amount_analysis()
                        state.pipeline.run_transaction_type_analysis()
                        # Generate reports
                        reports = state.pipeline.generate_salary_earner_reports()
                        # Add batch metadata to results
                        results_df = reports['final_table'].copy()
                        results_df['batch_number'] = batch_number
                        results_df['total_batches'] = -1  # Unknown for CSV
                        results_df['processed_at'] = datetime.now()
                        # Save batch results to CSV
                        batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv")
                        results_df.to_csv(batch_results_path, index=False)
                        # Save to database
                        db_ops.save_batch_to_db(
                            batch_number=batch_number,
                            total_batches=-1,  # Unknown for CSV
                            results_df=results_df,
                            status="success"
                        )
                        logger.info(f"Batch {batch_number} processed in {time.time() - batch_start_time:.2f} seconds")
                        responses.append(BatchResponse(
                            batch_number=batch_number,
                            total_batches=-1,  # Unknown for CSV
                            processed_rows=len(chunk),
                            results_path=batch_results_path,
                            message=f"Successfully processed batch {batch_number}"
                        ))
                    except Exception as e:
                        error_message = str(e)
                        logger.error(f"Error processing batch {batch_number}: {error_message}")
                        # Save error to database
                        db_ops.save_batch_to_db(
                            batch_number=batch_number,
                            total_batches=-1,
                            results_df=pd.DataFrame(),  # Empty DataFrame for error case
                            status="error"
                        )
                        responses.append(BatchResponse(
                            batch_number=batch_number,
                            total_batches=-1,
                            processed_rows=len(chunk),
                            results_path="",
                            message=f"Error processing batch {batch_number}: {error_message}"
                        ))
            finally:
                # Clean up temporary file
                os.unlink(temp_file_path)
        else:
            # Process database in chunks
            if not state.data_loader.connect():
                raise HTTPException(status_code=500, detail="Failed to connect to database")
            # Get total row count
            with state.data_loader.engine.connect() as conn:
                count_query = text(f"SELECT COUNT(*) FROM {TABLE_NAME}")
                total_rows = conn.execute(count_query).scalar()
            total_batches = (total_rows + batch_size - 1) // batch_size
            offset = 0
            while offset < total_rows:
                batch_number += 1
                logger.info(f"Processing batch {batch_number} of {total_batches}")
                # Load chunk from database
                query = f"SELECT * FROM {TABLE_NAME} LIMIT {batch_size} OFFSET {offset}"
                chunk = pd.read_sql(query, state.data_loader.engine)
                if chunk.empty:
                    break
                # Preprocess chunk
                chunk = preprocess_chunk(chunk)
                # Run pipeline on chunk
                pipeline = SalaryAnalyticsPipeline()
                state.pipeline.df = chunk
                try:
                    batch_start_time = time.time()
                    # Run analyses
                    state.pipeline.run_keyword_analysis()
                    state.pipeline.run_consistent_amount_analysis()
                    state.pipeline.run_transaction_type_analysis()
                    # Generate reports
                    reports = state.pipeline.generate_salary_earner_reports()
                    # Add batch metadata to results
                    results_df = reports['final_table'].copy()
                    results_df['batch_number'] = batch_number
                    results_df['total_batches'] = total_batches
                    results_df['processed_at'] = datetime.now()
                    # Save batch results to CSV
                    batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv")
                    results_df.to_csv(batch_results_path, index=False)
                    # Save to database
                    db_ops.save_batch_to_db(
                        batch_number=batch_number,
                        total_batches=total_batches,
                        results_df=results_df,
                        status="success"
                    )
                    logger.info(f"Batch {batch_number} of {total_batches} processed in {time.time() - batch_start_time:.2f} seconds")
                    responses.append(BatchResponse(
                        batch_number=batch_number,
                        total_batches=total_batches,
                        processed_rows=len(chunk),
                        results_path=batch_results_path,
                        message=f"Successfully processed batch {batch_number} of {total_batches}"
                    ))
                except Exception as e:
                    error_message = str(e)
                    logger.error(f"Error processing batch {batch_number}: {error_message}")
                    # Save error to database
                    db_ops.save_batch_to_db(
                        batch_number=batch_number,
                        total_batches=total_batches,
                        results_df=pd.DataFrame(),  # Empty DataFrame for error case
                        status="error"
                    )
                    responses.append(BatchResponse(
                        batch_number=batch_number,
                        total_batches=total_batches,
                        processed_rows=len(chunk),
                        results_path="",
                        message=f"Error processing batch {batch_number}: {error_message}"
                    ))
                offset += batch_size
        logger.info(f"Streaming pipeline endpoint completed in {time.time() - start_time:.2f} seconds")
        return responses
    except Exception as e:
        logger.error(f"Error in streaming pipeline: {str(e)}")
        logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
        raise HTTPException(status_code=500, detail=str(e)) 
@@ -0,0 +1,78 @@
 from fastapi import APIRouter, HTTPException, BackgroundTasks
 from fastapi.responses import FileResponse
 from app.salary_analytics.helpers.response_helpers import AnalysisResponse
 from app.salary_analytics.helpers.data_checks import check_data_loaded
 from app.salary_analytics.core.state import state
 from app.config import OUTPUT_PATHS
 from app.utils.logger import logger
 import os, time
 router = APIRouter()
@router.post("/generate/reports", response_model=AnalysisResponse)
 async def generate_reports(background_tasks: BackgroundTasks):
    """Generate salary earner reports."""
    start_time = time.time()
    try:
        check_data_loaded()
        logger.info("Starting report generation...")
        reports = state.pipeline.generate_salary_earner_reports()
        logger.info("Reports generated successfully")
        response = AnalysisResponse(
            message="Reports generated successfully",
            data={
                "verified_salary_earners": len(reports['final_table']),
                "likely_salary_earners": len(reports['likely_salary_earner']),
                "high_earners": reports['total_high_earners']
            }
        )
        logger.info(f"Report generation endpoint completed in {time.time() - start_time:.2f} seconds")
        return response
    except Exception as e:
        logger.error(f"Error in report generation: {str(e)}")
        logger.info(f"Report generation endpoint failed after {time.time() - start_time:.2f} seconds")
        raise HTTPException(status_code=500, detail=str(e))
@router.get("/download/{report_type}")
 async def download_report(report_type: str):
    """Download generated reports."""
    start_time = time.time()
    try:
        check_data_loaded()
        logger.info(f"Attempting to download report: {report_type}")
        file_paths = {
            "high_earners": OUTPUT_PATHS["high_earner_details"],
            "likely_earners": OUTPUT_PATHS["likely_salary_earner"],
            "final_table": OUTPUT_PATHS["final_table"],
            "consistent_plot": OUTPUT_PATHS["consistent_earners_plot"],
            "inconsistent_plot": OUTPUT_PATHS["inconsistent_earners_plot"],
            "hypothesis_plot": OUTPUT_PATHS["hypothesis_overlap_plot"]
        }
        if report_type not in file_paths:
            logger.error(f"Report type not found: {report_type}")
            logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=404, detail="Report type not found")
        file_path = file_paths[report_type]
        if not os.path.exists(file_path):
            logger.error(f"Report file not found: {file_path}")
            logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=404, detail="Report file not found")
        logger.info(f"Successfully found report file: {file_path}")
        response = FileResponse(
            path=file_path,
            filename=os.path.basename(file_path),
            media_type="application/octet-stream"
        )
        logger.info(f"Download endpoint completed in {time.time() - start_time:.2f} seconds")
        return response
    except Exception as e:
        logger.error(f"Error downloading report: {str(e)}")
        logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds")
        raise HTTPException(status_code=500, detail=str(e))
@@ -0,0 +1,33 @@
 import time
 import logging
 from fastapi import APIRouter, HTTPException
 from app.salary_analytics.services.main import SalaryAnalyticsPipeline
 from app.salary_analytics.helpers.data_checks import check_data_loaded
 from app.salary_analytics.helpers.response_helpers import AnalysisResponse
 from app.salary_analytics.core.state import state
 from app.utils.logger import logger
 router = APIRouter()
@router.post("/train/models", response_model=AnalysisResponse)
 async def train_models():
    """Train salary prediction models."""
    start_time = time.time()
    try:
        check_data_loaded()
        logger.info("Starting model training...")
        state.pipeline.train_salary_prediction_models()
        logger.info("Models trained successfully")
        response = AnalysisResponse(
            message="Models trained successfully"
        )
        logger.info(f"Model training endpoint completed in {time.time() - start_time:.2f} seconds")
        return response
    except Exception as e:
        logger.error(f"Error in model training: {str(e)}")
        logger.info(f"Model training endpoint failed after {time.time() - start_time:.2f} seconds")
        raise HTTPException(status_code=500, detail=str(e))
@@ -0,0 +1,24 @@
 from .main import SalaryAnalyticsPipeline
 from .data_loader import DataLoader
 from .keyword_analyzer import KeywordAnalyzer
 from .consistent_amount_analyzer import ConsistentAmountAnalyzer
 from .transaction_type_analyzer import TransactionTypeAnalyzer
 from .salary_earner_analyzer import SalaryEarnerAnalyzer
 from .salary_predictor import SalaryPredictor
 """
 Salary Analytics Package
 A package for analyzing and predicting salary patterns from transaction data.
 """
 __version__ = "0.1.0" 
 __all__ = [
    "SalaryAnalyticsPipeline",
    "DataLoader",
    "KeywordAnalyzer",
    "ConsistentAmountAnalyzer",
    "TransactionTypeAnalyzer",
    "SalaryEarnerAnalyzer",
    "SalaryPredictor"
 ]
@@ -3,7 +3,7 @@ Consistent amount transaction analysis module.
 """
 import pandas as pd
-from .config import MODEL_CONFIG
+from app.config import MODEL_CONFIG
 class ConsistentAmountAnalyzer:
    def __init__(self, df):
@@ -7,9 +7,8 @@ import pandas as pd
 from datetime import datetime
 import logging
 import os
-from .config import DB_CONFIG, TABLE_NAME
+from app.config import SQLALCHEMY_DATABASE_URI, TABLE_NAME
-
+from app.utils.logger import logger
 logger = logging.getLogger(__name__)
 class DataLoader:
    def __init__(self):
@@ -21,8 +20,7 @@ class DataLoader:
        """Establish database connection."""
        try:
            logger.info("Attempting to connect to database...")
-            DATABASE_URL = f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}"
+            self.engine = create_engine(SQLALCHEMY_DATABASE_URI)
            self.engine = create_engine(DATABASE_URL)
            with self.engine.connect() as conn:
                # First check if table exists
                check_table = text(f"SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = '{TABLE_NAME}')")
@@ -4,7 +4,7 @@ Keyword-based salary transaction analysis module.
 import re
 import pandas as pd
-from .config import SALARY_KEYWORDS
+from app.config import SALARY_KEYWORDS
 class KeywordAnalyzer:
    def __init__(self, df):
@@ -9,8 +9,7 @@ from .consistent_amount_analyzer import ConsistentAmountAnalyzer
 from .transaction_type_analyzer import TransactionTypeAnalyzer
 from .salary_earner_analyzer import SalaryEarnerAnalyzer
 from .salary_predictor import SalaryPredictor
-
+from app.utils.logger import logger
 logger = logging.getLogger(__name__)
 class SalaryAnalyticsPipeline:
    def __init__(self):
@@ -6,15 +6,8 @@ import pandas as pd
 import matplotlib.pyplot as plt
 from matplotlib_venn import venn3
 from datetime import datetime, timedelta
-import logging
+from app.config import MODEL_CONFIG, OUTPUT_PATHS
-from .config import MODEL_CONFIG, OUTPUT_PATHS
+from app.utils.logger import logger
 # Configure logging
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
 class SalaryEarnerAnalyzer:
    def __init__(self, df):
@@ -9,7 +9,7 @@ from sklearn.preprocessing import StandardScaler, OneHotEncoder
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
 from joblib import dump
-from .config import OUTPUT_PATHS
+from app.config import OUTPUT_PATHS
 class SalaryPredictor:
    def __init__(self, df):
@@ -3,7 +3,7 @@ Transaction type analysis module.
 """
 import pandas as pd
-from .config import MODEL_CONFIG
+from app.config import MODEL_CONFIG
 class TransactionTypeAnalyzer:
    def __init__(self, df):
@@ -0,0 +1,13 @@
 import logging
 # Configure logging
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        # logging.StreamHandler(),  
        logging.FileHandler("app.log", mode='a')  # Log to file
    ]
 )
 logger = logging.getLogger("DetectionService")
@@ -1,16 +1,17 @@
 services:
  digifi-analytics:
    build: .
    env_file:
      - .env
    ports:
      - "${APP_PORT:-4800}:8000"
    volumes:
      - ./output:/app/output
    environment:
-      - DB_USER=salaryloan
+      - FLASK_APP=${FLASK_APP}
-      - DB_PASSWORD=salaryloan
+      - FLASK_ENV=${FLASK_ENV}
-      - DB_NAME=salaryloan
+      - DATABASE_URL=postgresql+psycopg2://${DATABASE_USER}:${DATABASE_PASSWORD}@${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_NAME}
-      - DB_PORT=10532
+    volumes:
-      - DB_HOST=dev-data.simbrellang.net
+      - .:/app
      - ./output:/app/output
    restart: unless-stopped
    networks:
      - salary_network
@@ -19,7 +19,7 @@ if config.config_file_name is not None:
 # from myapp import Base
 # target_metadata = Base.metadata
 from flask import current_app
-from salary_analytics.app.extensions import db
+from app.extensions import db
 config.set_main_option('sqlalchemy.url',
                       current_app.config.get('SQLALCHEMY_DATABASE_URI'))
@@ -1,4 +1,8 @@
 # Database and ORM
 sqlalchemy>=2.0.0
 oracledb>=1.0.0
 pandas>=1.5.0
 numpy>=1.21.0
 matplotlib>=3.5.0
@@ -17,4 +21,6 @@ openpyxl>=3.0.10
 Flask>=2.0.0
 Flask-SQLAlchemy>=3.0.0
 Flask-Migrate>=4.0.0
-alembic>=1.8.0
+alembic>=1.8.0
 requests>=2.26.0
 gunicorn
@@ -1,4 +1,4 @@
 import os
-from salary_analytics.app import create_app
+from app.salary_analytics import create_app
 app = create_app() 
@@ -1,6 +0,0 @@
 """
 Salary Analytics Package
 A package for analyzing and predicting salary patterns from transaction data.
 """
 __version__ = "0.1.0" 
@@ -1,605 +0,0 @@
 """
 FastAPI application for salary analytics.
 """
 from fastapi import FastAPI, HTTPException, BackgroundTasks, UploadFile, File, Depends
 from fastapi.responses import FileResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from typing import Optional, Dict, List, Union
 import os
 import socket
 import logging
 import pandas as pd
 import tempfile
 from datetime import datetime
 from sqlalchemy import text, Table, Column, Integer, String, Float, DateTime, MetaData
 import numpy as np
 import warnings
 import time
 from .main import SalaryAnalyticsPipeline
 from .config import OUTPUT_PATHS, TABLE_NAME, BATCH_RESULTS_TABLE
 from .data_loader import DataLoader
 from .salary_predictor import SalaryPredictor
 from .salary_earner_analyzer import SalaryEarnerAnalyzer
 from .db_operations import DatabaseOperations
 from .salary_detect import SalaryDetect
 # Configure logging
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
 # Suppress warnings
 warnings.filterwarnings('ignore', category=RuntimeWarning, module='numpy')
 pd.options.mode.chained_assignment = None
 app = FastAPI(
    title="Salary Analytics API",
    description="API for analyzing and predicting salary patterns from transaction data",
    version="1.0.0"
 )
 # Add CORS middleware
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Allows all origins
    allow_credentials=True,
    allow_methods=["*"],  # Allows all methods
    allow_headers=["*"],  # Allows all headers
 )
 # Global pipeline instance
 pipeline = SalaryAnalyticsPipeline()
 # Global variables to store loaded data and models
 data_loader = None
 df = None
 salary_predictor = None
 salary_earner_analyzer = None
 salary_detect = SalaryDetect()
 class AnalysisResponse(BaseModel):
    """Response model for analysis endpoints."""
    message: str
    data: Optional[Dict] = None
    file_path: Optional[str] = None
 class BatchResponse(BaseModel):
    """Response model for batch processing."""
    batch_number: int
    total_batches: int
    processed_rows: int
    results_path: str
    message: str
 def check_data_loaded():
    """Check if data is loaded before running analytics."""
    if pipeline.df is None:
        raise HTTPException(
            status_code=400,
            detail="No data loaded. Please load data first using the /load-data endpoint."
        )
@app.on_event("startup")
 async def startup_event():
    """Initialize the pipeline on startup."""
    try:
        logger.info("Initializing pipeline...")
        # Start autonomous salary detection loop
        salary_detect.start()
        logger.info("Started autonomous salary detection loop.")
        # Print network information
        hostname = socket.gethostname()
        ip_address = socket.gethostbyname(hostname)
        logger.info(f"Server running on hostname: {hostname}")
        logger.info(f"Server IP address: {ip_address}")
        logger.info(f"Server is accessible at:")
        logger.info(f"- http://localhost:8000")
        logger.info(f"- http://127.0.0.1:8000")
        logger.info(f"- http://{ip_address}:8000")
        logger.info("Pipeline initialized successfully")
    except Exception as e:
        logger.error(f"Error during startup: {str(e)}")
        raise
@app.get("/")
 async def root():
    """Root endpoint."""
    start_time = time.time()
    logger.info("Root endpoint accessed")
    response = {"message": "Welcome to Salary Analytics API"}
    logger.info(f"Root endpoint completed in {time.time() - start_time:.2f} seconds")
    return response
@app.get("/health")
 async def health_check():
    """Health check endpoint."""
    start_time = time.time()
    logger.info("Health check endpoint accessed")
    response = {"status": "healthy"}
    logger.info(f"Health check completed in {time.time() - start_time:.2f} seconds")
    return response
@app.post("/analyze/keyword", response_model=AnalysisResponse)
 async def analyze_keyword():
    """Run keyword-based salary transaction analysis."""
    start_time = time.time()
    try:
        check_data_loaded()
        logger.info("Starting keyword analysis...")
        data = pipeline.run_keyword_analysis()
        logger.info(f"Keyword analysis completed. Found {len(data)} matches")
        response = AnalysisResponse(
            message="Keyword analysis completed successfully",
            data={"count": len(data)}
        )
        logger.info(f"Keyword analysis endpoint completed in {time.time() - start_time:.2f} seconds")
        return response
    except Exception as e:
        logger.error(f"Error in keyword analysis: {str(e)}")
        logger.info(f"Keyword analysis endpoint failed after {time.time() - start_time:.2f} seconds")
        raise HTTPException(status_code=500, detail=str(e))
@app.post("/analyze/consistent-amount", response_model=AnalysisResponse)
 async def analyze_consistent_amount():
    """Run consistent amount transaction analysis."""
    start_time = time.time()
    try:
        check_data_loaded()
        logger.info("Starting consistent amount analysis...")
        data = pipeline.run_consistent_amount_analysis()
        logger.info(f"Consistent amount analysis completed. Found {len(data)} matches")
        response = AnalysisResponse(
            message="Consistent amount analysis completed successfully",
            data={"count": len(data)}
        )
        logger.info(f"Consistent amount analysis endpoint completed in {time.time() - start_time:.2f} seconds")
        return response
    except Exception as e:
        logger.error(f"Error in consistent amount analysis: {str(e)}")
        logger.info(f"Consistent amount analysis endpoint failed after {time.time() - start_time:.2f} seconds")
        raise HTTPException(status_code=500, detail=str(e))
@app.post("/analyze/transaction-type", response_model=AnalysisResponse)
 async def analyze_transaction_type():
    """Run transaction type analysis."""
    start_time = time.time()
    try:
        check_data_loaded()
        logger.info("Starting transaction type analysis...")
        data = pipeline.run_transaction_type_analysis()
        logger.info(f"Transaction type analysis completed. Found {len(data)} matches")
        response = AnalysisResponse(
            message="Transaction type analysis completed successfully",
            data={"count": len(data)}
        )
        logger.info(f"Transaction type analysis endpoint completed in {time.time() - start_time:.2f} seconds")
        return response
    except Exception as e:
        logger.error(f"Error in transaction type analysis: {str(e)}")
        logger.info(f"Transaction type analysis endpoint failed after {time.time() - start_time:.2f} seconds")
        raise HTTPException(status_code=500, detail=str(e))
@app.post("/generate/reports", response_model=AnalysisResponse)
 async def generate_reports(background_tasks: BackgroundTasks):
    """Generate salary earner reports."""
    start_time = time.time()
    try:
        check_data_loaded()
        logger.info("Starting report generation...")
        reports = pipeline.generate_salary_earner_reports()
        logger.info("Reports generated successfully")
        response = AnalysisResponse(
            message="Reports generated successfully",
            data={
                "verified_salary_earners": len(reports['final_table']),
                "likely_salary_earners": len(reports['likely_salary_earner']),
                "high_earners": reports['total_high_earners']
            }
        )
        logger.info(f"Report generation endpoint completed in {time.time() - start_time:.2f} seconds")
        return response
    except Exception as e:
        logger.error(f"Error in report generation: {str(e)}")
        logger.info(f"Report generation endpoint failed after {time.time() - start_time:.2f} seconds")
        raise HTTPException(status_code=500, detail=str(e))
@app.post("/train/models", response_model=AnalysisResponse)
 async def train_models():
    """Train salary prediction models."""
    start_time = time.time()
    try:
        check_data_loaded()
        logger.info("Starting model training...")
        pipeline.train_salary_prediction_models()
        logger.info("Models trained successfully")
        response = AnalysisResponse(
            message="Models trained successfully"
        )
        logger.info(f"Model training endpoint completed in {time.time() - start_time:.2f} seconds")
        return response
    except Exception as e:
        logger.error(f"Error in model training: {str(e)}")
        logger.info(f"Model training endpoint failed after {time.time() - start_time:.2f} seconds")
        raise HTTPException(status_code=500, detail=str(e))
@app.get("/download/{report_type}")
 async def download_report(report_type: str):
    """Download generated reports."""
    start_time = time.time()
    try:
        check_data_loaded()
        logger.info(f"Attempting to download report: {report_type}")
        file_paths = {
            "high_earners": OUTPUT_PATHS["high_earner_details"],
            "likely_earners": OUTPUT_PATHS["likely_salary_earner"],
            "final_table": OUTPUT_PATHS["final_table"],
            "consistent_plot": OUTPUT_PATHS["consistent_earners_plot"],
            "inconsistent_plot": OUTPUT_PATHS["inconsistent_earners_plot"],
            "hypothesis_plot": OUTPUT_PATHS["hypothesis_overlap_plot"]
        }
        if report_type not in file_paths:
            logger.error(f"Report type not found: {report_type}")
            logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=404, detail="Report type not found")
        file_path = file_paths[report_type]
        if not os.path.exists(file_path):
            logger.error(f"Report file not found: {file_path}")
            logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=404, detail="Report file not found")
        logger.info(f"Successfully found report file: {file_path}")
        response = FileResponse(
            path=file_path,
            filename=os.path.basename(file_path),
            media_type="application/octet-stream"
        )
        logger.info(f"Download endpoint completed in {time.time() - start_time:.2f} seconds")
        return response
    except Exception as e:
        logger.error(f"Error downloading report: {str(e)}")
        logger.info(f"Download endpoint failed after {time.time() - start_time:.2f} seconds")
        raise HTTPException(status_code=500, detail=str(e))
@app.post("/run/pipeline", response_model=AnalysisResponse)
 async def run_full_pipeline():
    """Run the complete salary analytics pipeline."""
    start_time = time.time()
    try:
        check_data_loaded()
        logger.info("Starting full pipeline...")
        success = pipeline.run_full_pipeline()
        if not success:
            logger.error("Pipeline failed")
            logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=500, detail="Pipeline failed")
        logger.info("Pipeline completed successfully")
        response = AnalysisResponse(
            message="Pipeline completed successfully"
        )
        logger.info(f"Full pipeline endpoint completed in {time.time() - start_time:.2f} seconds")
        return response
    except Exception as e:
        logger.error(f"Error in pipeline: {str(e)}")
        logger.info(f"Full pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
        raise HTTPException(status_code=500, detail=str(e))
@app.post("/load-data")
 async def load_data(source: str = "db", file: Optional[UploadFile] = File(None)):
    """
    Load data from either database or CSV file.
    Args:
        source (str): Source of data ('db' or 'csv')
        file (UploadFile, optional): CSV file to load (required if source is 'csv')
    Returns:
        dict: Status of data loading
    """
    start_time = time.time()
    try:
        if source not in ['db', 'csv']:
            logger.error(f"Invalid source: {source}")
            logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'")
        if source == 'csv' and not file:
            logger.error("No file provided for CSV source")
            logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=400, detail="File must be provided when loading from CSV")
        if source == 'csv':
            # Save uploaded file temporarily
            with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file:
                content = await file.read()
                temp_file.write(content)
                temp_file_path = temp_file.name
            try:
                success = pipeline.load_data(source='csv', file_path=temp_file_path)
            finally:
                # Clean up temporary file
                os.unlink(temp_file_path)
        else:
            success = pipeline.load_data(source='db')
        if not success:
            logger.error("Failed to load data")
            logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=500, detail="Failed to load data")
        response = {
            "status": "success",
            "message": f"Successfully loaded {len(pipeline.df)} rows of data",
            "columns": pipeline.df.columns.tolist(),
            "row_count": len(pipeline.df)
        }
        logger.info(f"Load data endpoint completed in {time.time() - start_time:.2f} seconds")
        return response
    except Exception as e:
        logger.error(f"Error loading data: {str(e)}")
        logger.info(f"Load data endpoint failed after {time.time() - start_time:.2f} seconds")
        raise HTTPException(status_code=500, detail=str(e))
 async def get_file_if_csv(source: str, file: Optional[UploadFile] = File(None)):
    """Dependency to handle file upload only when source is csv."""
    if source == 'csv' and not file:
        raise HTTPException(status_code=400, detail="File must be provided when loading from CSV")
    return file
@app.post("/run/streaming-pipeline", response_model=List[BatchResponse])
 async def run_streaming_pipeline(
    source: str = "db",
    batch_size: int = 10000,
    file: Optional[Union[UploadFile, str]] = File(None)
 ):
    """
    Run the complete salary analytics pipeline in batches.
    Args:
        source (str): Source of data ('db' or 'csv')
        batch_size (int): Number of rows to process in each batch
        file (UploadFile, optional): CSV file to load (required if source is 'csv')
    Returns:
        List[BatchResponse]: List of responses for each batch processed
    """
    start_time = time.time()
    try:
        if source not in ['db', 'csv']:
            logger.error(f"Invalid source: {source}")
            logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=400, detail="Source must be either 'db' or 'csv'")
        if source == 'csv' and not file:
            logger.error("No file provided for CSV source")
            logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=400, detail="File must be provided when loading from CSV")
        # Initialize data loader
        data_loader = DataLoader()
        data_loader.chunk_size = batch_size
        # Create output directory for batch results
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        batch_output_dir = os.path.join(os.path.dirname(OUTPUT_PATHS['final_table']), f"batch_results_{timestamp}")
        os.makedirs(batch_output_dir, exist_ok=True)
        # Initialize database operations
        if not data_loader.connect():
            logger.error("Failed to connect to database")
            logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=500, detail="Failed to connect to database")
        db_ops = DatabaseOperations(data_loader.engine)
        if not db_ops.create_batch_results_table():
            logger.error("Failed to create batch results table")
            logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
            raise HTTPException(status_code=500, detail="Failed to create batch results table")
        responses = []
        batch_number = 0
        batch_start_time = time.time()
        def preprocess_chunk(chunk):
            """Preprocess a chunk of data with the same logic as DataLoader."""
            # Convert dates
            chunk['trx_start_date'] = pd.to_datetime(chunk['trx_start_date'])
            chunk['trx_end_date'] = pd.to_datetime(chunk['trx_end_date'])
            # Rename columns
            chunk = chunk.rename(columns={
                'd1': 'trx_type',
                'd2': 'trx_subtype',
                'd3': 'initiated_by',
                'd4': 'customer_id'
            })
            chunk = chunk.dropna()
            return chunk
        if source == 'csv':
            # Save uploaded file temporarily
            with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file:
                content = await file.read()
                temp_file.write(content)
                temp_file_path = temp_file.name
            try:
                # Process CSV in chunks
                for chunk in pd.read_csv(temp_file_path, chunksize=batch_size):
                    batch_number += 1
                    logger.info(f"Processing batch {batch_number}")
                    # Preprocess chunk
                    chunk = preprocess_chunk(chunk)
                    # Run pipeline on chunk
                    pipeline = SalaryAnalyticsPipeline()
                    pipeline.df = chunk
                    try:
                        batch_start_time = time.time()
                        # Run analyses
                        pipeline.run_keyword_analysis()
                        pipeline.run_consistent_amount_analysis()
                        pipeline.run_transaction_type_analysis()
                        # Generate reports
                        reports = pipeline.generate_salary_earner_reports()
                        # Add batch metadata to results
                        results_df = reports['final_table'].copy()
                        results_df['batch_number'] = batch_number
                        results_df['total_batches'] = -1  # Unknown for CSV
                        results_df['processed_at'] = datetime.now()
                        # Save batch results to CSV
                        batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv")
                        results_df.to_csv(batch_results_path, index=False)
                        # Save to database
                        db_ops.save_batch_to_db(
                            batch_number=batch_number,
                            total_batches=-1,  # Unknown for CSV
                            results_df=results_df,
                            status="success"
                        )
                        logger.info(f"Batch {batch_number} processed in {time.time() - batch_start_time:.2f} seconds")
                        responses.append(BatchResponse(
                            batch_number=batch_number,
                            total_batches=-1,  # Unknown for CSV
                            processed_rows=len(chunk),
                            results_path=batch_results_path,
                            message=f"Successfully processed batch {batch_number}"
                        ))
                    except Exception as e:
                        error_message = str(e)
                        logger.error(f"Error processing batch {batch_number}: {error_message}")
                        # Save error to database
                        db_ops.save_batch_to_db(
                            batch_number=batch_number,
                            total_batches=-1,
                            results_df=pd.DataFrame(),  # Empty DataFrame for error case
                            status="error"
                        )
                        responses.append(BatchResponse(
                            batch_number=batch_number,
                            total_batches=-1,
                            processed_rows=len(chunk),
                            results_path="",
                            message=f"Error processing batch {batch_number}: {error_message}"
                        ))
            finally:
                # Clean up temporary file
                os.unlink(temp_file_path)
        else:
            # Process database in chunks
            if not data_loader.connect():
                raise HTTPException(status_code=500, detail="Failed to connect to database")
            # Get total row count
            with data_loader.engine.connect() as conn:
                count_query = text(f"SELECT COUNT(*) FROM {TABLE_NAME}")
                total_rows = conn.execute(count_query).scalar()
            total_batches = (total_rows + batch_size - 1) // batch_size
            offset = 0
            while offset < total_rows:
                batch_number += 1
                logger.info(f"Processing batch {batch_number} of {total_batches}")
                # Load chunk from database
                query = f"SELECT * FROM {TABLE_NAME} LIMIT {batch_size} OFFSET {offset}"
                chunk = pd.read_sql(query, data_loader.engine)
                if chunk.empty:
                    break
                # Preprocess chunk
                chunk = preprocess_chunk(chunk)
                # Run pipeline on chunk
                pipeline = SalaryAnalyticsPipeline()
                pipeline.df = chunk
                try:
                    batch_start_time = time.time()
                    # Run analyses
                    pipeline.run_keyword_analysis()
                    pipeline.run_consistent_amount_analysis()
                    pipeline.run_transaction_type_analysis()
                    # Generate reports
                    reports = pipeline.generate_salary_earner_reports()
                    # Add batch metadata to results
                    results_df = reports['final_table'].copy()
                    results_df['batch_number'] = batch_number
                    results_df['total_batches'] = total_batches
                    results_df['processed_at'] = datetime.now()
                    # Save batch results to CSV
                    batch_results_path = os.path.join(batch_output_dir, f"batch_{batch_number}_results.csv")
                    results_df.to_csv(batch_results_path, index=False)
                    # Save to database
                    db_ops.save_batch_to_db(
                        batch_number=batch_number,
                        total_batches=total_batches,
                        results_df=results_df,
                        status="success"
                    )
                    logger.info(f"Batch {batch_number} of {total_batches} processed in {time.time() - batch_start_time:.2f} seconds")
                    responses.append(BatchResponse(
                        batch_number=batch_number,
                        total_batches=total_batches,
                        processed_rows=len(chunk),
                        results_path=batch_results_path,
                        message=f"Successfully processed batch {batch_number} of {total_batches}"
                    ))
                except Exception as e:
                    error_message = str(e)
                    logger.error(f"Error processing batch {batch_number}: {error_message}")
                    # Save error to database
                    db_ops.save_batch_to_db(
                        batch_number=batch_number,
                        total_batches=total_batches,
                        results_df=pd.DataFrame(),  # Empty DataFrame for error case
                        status="error"
                    )
                    responses.append(BatchResponse(
                        batch_number=batch_number,
                        total_batches=total_batches,
                        processed_rows=len(chunk),
                        results_path="",
                        message=f"Error processing batch {batch_number}: {error_message}"
                    ))
                offset += batch_size
        logger.info(f"Streaming pipeline endpoint completed in {time.time() - start_time:.2f} seconds")
        return responses
    except Exception as e:
        logger.error(f"Error in streaming pipeline: {str(e)}")
        logger.info(f"Streaming pipeline endpoint failed after {time.time() - start_time:.2f} seconds")
        raise HTTPException(status_code=500, detail=str(e)) 
@@ -0,0 +1,7 @@
 from app import create_app
 app = create_app()
 if __name__ != "__main__":
    # Expose WSGI app instance for Gunicorn
    wsgi_app = app
Author	SHA1	Message	Date
VivianDee	77165e2adf	Update config.py	2025-09-24 14:48:57 +01:00
VivianDee	46d2754ad9	[add]: Database config	2025-09-24 10:59:43 +01:00
VivianDee	43f2ed5252	Create temp_simbrella_lien.py	2025-09-19 12:28:49 +01:00
VivianDee	f25297611b	Cleanup	2025-09-19 12:28:16 +01:00
VivianDee	495ba93a4b	[add]: First Analytics models	2025-09-19 12:25:42 +01:00
VivianDee	fdd7959370	Update config.py	2025-09-10 12:31:31 +01:00
VivianDee	02094df42c	Update config.py	2025-09-10 12:30:46 +01:00
VivianDee	bc071360cc	Update app.log	2025-09-10 12:30:36 +01:00
VivianDee	ec5f158823	Update config.py	2025-09-10 12:29:55 +01:00
VivianDee	93073012e5	Update app.log	2025-09-10 12:25:16 +01:00
VivianDee	8236462b83	[add]: database connection	2025-09-10 12:02:36 +01:00
VivianDee	95732a470d	[add]: Final commit before model fix	2025-09-10 11:36:32 +01:00
VivianDee	239d19fbac	[add]: Database config	2025-09-09 12:22:05 +01:00
VivianDee	ebe40cda19	[add]: database configuration fix	2025-09-09 12:06:37 +01:00
VivianDee	00d89e460f	[add]: docker config	2025-09-09 11:28:27 +01:00
VivianDee	80cc543cdd	[add]: refactor/clean up	2025-09-09 10:58:08 +01:00
VivianDee	4e22161088	[add]: refactor/clean up	2025-09-09 10:57:47 +01:00
VivianDee	d9b6a7e92e	[add]: refactoring and cleanup	2025-09-07 23:44:25 +01:00
VivianDee	6de9583aaf	[add]: code refractoring and cleanup	2025-09-07 23:07:40 +01:00
ameye	2cc3d70f4f	Merge branch 'development' of DigiFi/digifi-Analytics into master	2025-08-22 11:15:41 +00:00
		`@@ -0,0 +1,4 @@`
							`from .raw_transaction import RawTransaction`


							`__all__ = ['RawTransaction']`