From 9c429caa565b9b485b691047ee7ec6f29ca138f8 Mon Sep 17 00:00:00 2001 From: Joshua Salako Date: Fri, 2 May 2025 14:25:31 +0100 Subject: [PATCH] Implement streaming pipeline endpoint for batch processing - Added `/run/streaming-pipeline` endpoint to process data in batches from either a database or CSV file. - Introduced `BatchResponse` model for structured responses. - Updated README with new endpoint details, including parameters and example usage. - Enhanced error handling and logging during batch processing. - Ensured data preprocessing and NaN handling in analysis functions. --- README.md | 32 +++ .../__pycache__/api.cpython-311.pyc | Bin 17046 -> 25064 bytes .../__pycache__/config.cpython-311.pyc | Bin 2697 -> 3691 bytes .../__pycache__/data_loader.cpython-311.pyc | Bin 10181 -> 10300 bytes .../__pycache__/main.cpython-311.pyc | Bin 9871 -> 9871 bytes .../salary_earner_analyzer.cpython-311.pyc | Bin 8276 -> 8880 bytes .../salary_predictor.cpython-311.pyc | Bin 10835 -> 11520 bytes salary_analytics/api.py | 183 +++++++++++++++++- salary_analytics/data_loader.py | 2 + salary_analytics/salary_earner_analyzer.py | 40 +++- 10 files changed, 246 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index cbc96a5..91850a6 100644 --- a/README.md +++ b/README.md @@ -119,6 +119,32 @@ uvicorn salary_analytics.api:app --reload 6. **Pipeline** - `POST /run/pipeline`: Run complete pipeline + - `POST /run/streaming-pipeline`: Run pipeline in batches + - Parameters: + - `source`: Data source ('db' or 'csv') + - `file`: CSV file (required if source is 'csv') + - `batch_size`: Number of rows to process in each batch (default: 10000) + - Example: + ```bash + # Run streaming pipeline from database + curl -X POST "http://localhost:8000/run/streaming-pipeline?source=db&batch_size=5000" + + # Run streaming pipeline from CSV + curl -X POST "http://localhost:8000/run/streaming-pipeline?source=csv&batch_size=5000" -F "file=@path/to/your/file.csv" + ``` + - Response: + ```json + [ + { + "batch_number": 1, + "total_batches": 10, + "processed_rows": 5000, + "results_path": "/app/output/csv/batch_results_20240315_123456/batch_1_results.csv", + "message": "Successfully processed batch 1 of 10" + }, + // ... more batch responses ... + ] + ``` ### Workflow @@ -127,6 +153,12 @@ uvicorn salary_analytics.api:app --reload 3. Run any of the analysis endpoints 4. Generate and download reports as needed +For large datasets, use the streaming pipeline endpoint: +1. Start the API server +2. Run the streaming pipeline with appropriate batch size +3. Monitor batch processing progress +4. Access results in the batch results directory + Note: All analysis endpoints require data to be loaded first. If you try to run any analysis without loading data, you'll receive a 400 error with a message to load data first. ## Docker Deployment diff --git a/salary_analytics/__pycache__/api.cpython-311.pyc b/salary_analytics/__pycache__/api.cpython-311.pyc index 939b1f5484e841baf58e16cc5c546cab4e347314..374ad32cc2419cd20d985fa180ab38af80261fac 100644 GIT binary patch delta 9014 zcmdTqZB!iBbu;_TvcN73yTixA0L#a+fIxu22#}D3BrJp!K*+{YH*02)R$6w~vx6nk zC9A}$O>D=(9wk=%QIj@x6$hUvhosdXH%^`ASjQh{mb10CQ>#% z6=0`|#)|+q^Vxyosgm&$f?EQmQ_gYcRM~hLspkaBr?_#B;JJZ{smk$6g69QdRa35U z7fop>q2OF~T1LQY(l~Ejw7sgKsPDj?x>mVs({-WlRT|piPF>?}7)`_H^94M!Soo?I z+TdPos~74QJn0%m$sf|@m|*7ZXK3C*AYhB^t9s}Jcj_V&mb?m;@1#l8cmr=7FNeDk z?h3f8G}IxAFSew|KVUZxD2o8ou$0Zqur|(!D%Q+w|J8WtQE*Pc+upyh#f!LO0B?bIS}pYcsTN zGeg^!8G6(Ft?ip;=;b?vZoX5{PwM#luIM+dWKX)^K$l<@b`-6d&=y_qNsq&K3wtLG ze9xA#`gr3A<=!h_)4kH5RZYjjl0Ovm1ytPu|AeIK2K^Cf$W5yz-X{r?e@XycCkYoN zc|c!LqERi!PK+KuG3q_uH+pzPHIMe~8|?QE^&RbBx}+b`SY05Rn2o@Kpj!UWc+^~p zD1u#bt7({POjjoWS-aBy_3=SjH8^8}PSEq(ZfGW$j$j6$>DMp=Vq`TYHD{ksnm9Ww zM8ct9M3`}Wfv3juC2BytZU2NxDY6G9~751wgKErjF^PES27h-!`$l6(O#p$V~w zn!Cy>@Ln-=A);DDAu=71BHpl1IxAMe+(T}oSOdQz?jvFy0yh9v>kmpI?hs-^IIFYsayh;Tsuo2*W{ap}LZv^u(8 z{!LD85elp3L?oXmO^3b0qXI1IexMC5?auvxrmN(9YprW95X3$No95App^zjFLCfdm z)7ECXQGUky{o143a9Yj^zJPSrO9T^#fn!qk+jf93-?05eV?afVE?Hn|_)b8F+}T86 zaWB%z%)0?o8F4!3Juh6k5E6N>4-EQJ#2-<$0U<~%PwYqbg9yko(E;{tjY!OO0PqOD z8aPgDlK+z_)SrS%K>lxM-BL?oSZh21)tQ$4@_(>J@-wVieuvF9i~&tNDgTu9I3Gat z(`1uti(H;@G9&nH1rAC?A(=}_n@=> zcg*`0XVo7x?>{=r^yrXJ!@U0nbN;f-s-;iLzp1GF9TRm`x=xa{kyUQ9K(|)pVPMrL z3gM6_MMR=-^mh(5j!o7xLQoK4$ML3lf_GbvUFHhTS_O< ztv7p#4ZUP?IuP)NS39gry1_t+kQ&t(nV!JSG9_2qLvYx~)8J=Ir*{5KpAW7y2P9QH z5qVVX0PNTDvCe8UI)(TZQvE__W9zSVP0;>rjcR~BS&#(P5SgBw^j{ouYs4=jD^UUt z1dogm9+3a3vxt5|zSrq797m2S*?r&SapLh=L@T(FiI<^CHB1LV1Ls9zhPbziR|#1P z1>l{Q+5q+Z+f>Hu40FfZ-E>i%yj z14b2mQ}8NjOl$KOR(KVt=G1#7;-3*F(9F}96_Cf^{@7rVztcLYk+kyX>}>w5HmZqg zy{KPuNk;jLW=6hMoBLd|!)DBk6lQhuZ`}@g&QU1;YkQ7tsW;PE5REap5|i!^FGZ8w z%&{>(E2?{udRdbhkIm|(oORZ7__8KfRL`3ivom>V>I^k&;4QO8KIaNGYl>35{yY(M zULzio)&mjh+}fCYE}yq(g>ect1l;Y^Y*v(B$d!Gr%KXPD-uB1ZG3t5kVztQFys7g~Q>KytTxuV%o&0>D$0uE5~^ywF7Em0$1 zFasja8l{>vKSeDt7|3dwC}`Zxuo^0Aidr&-n6*eAd`kA&oTw$5&E-w&%3LTxeH2IY_}oQDCM)Q~ zuuzK4y%KuC>IKVVC3N_7ebXvns3M&EWzAx7Dlw5myX<`UNYj>uL(S zKwqFPY9FL7&{~1r zY!p0t@x#w@ix{{CSZhU;+3vCO zl_*>K0=}xK2B0fi2;7EQyR^|o-D$q4edDq-RRT_GIfri^a*3MKc{+II|CLJ~$TaaN z^ZU4DTEQC)xrU~KPi=9_FZ>_4D0|+M68+?3qD9ONgtW+EQw(<{m{ORnd#{+4vD1uYO)$3 z+39-E^mqbKL=&F|e;_fH_zYreF_rjLsLte6KUh6g&3mg4S0Ak&5hsDB=BA_;kF6%G zqnOVN7hzs+=)8!p?}-iD6NTYU0o)%a0X$D_+`3RfsSDoxGz1kz74VC})u(c4kjq5z z8sv(~KPqP{w7ibj^9J7dl5WwIxp~b}QEEYp8(0icIGMEL

tFN=K|WP8 zVNJRe7SwFQH~lcFSp2b|U-AP#?|t}^nmsWckwQ~~==Jk%ovPtm;NA&$E8J}monmN- z4PhR}36Y`kutYV8LJ%Z_2v-b+gFaBao-81OicBXv;kETa(hIAbX@+Pr1~RXPlWW0w zQ+FUV;R{51n>KgZ5938Hz?he*Um2)c_qT#KPAI#NB-#h#HAfRQN9X#L{K~kc>W-`C zTSGV6Z|}|9n&2QgN3ldVcV^!DO*3SzMbeu1m&>Yge-L_4XAC!2IAx zl)ljM34*!)6$@2ZjNH!-T^*Y1zgxi0rOp@*it`|X_$&ZCGs;RW6~bN-!?qdQ@wLO2 z)zcT%kTT5~3#2pTGzBvP)Dq~Q7cTzdw{XokM^bqQCMuWK-C=bMw_a?)S$866MbL%- z-#0Ukeo=&@p7leX)jl(tE>5LHOqgbz2Yrxs!MioYO@x9$VM0QbkAVq2?1QtT!jb;L z{{5re{$nSGM(gU`+`-{vN4Xgt+Tcv?YGeH>)yNGF932?tW=!0%g9k_Y0duBgfZbs2 zfpY2mpo?>RL`?8RjMh~%QJ5DK1u>S36m2kcK{Y|gNi-*t*od>(pgR}x2UR2Q7ty{% z40=`5l<&L%l!*9ssEW@cxQ5^x2wp(&Jpif+?27kdYPJ_?eF$s_NS5>=g+WDHoAK1bV^jSP!ZtHQ(@JbGM>=16rPs6uy8dCtcaZW zMN&5LS2)I>B6tPC0)ihPz*t$F0-)xvZjKud^Dyg+sXAN*xPE9lD0v^57Q{=cIdvw8 zJ3X;C)exGTj0lpdC+!gjbi~)hHdpFk!;T5}njNe;w)pqZu?l_o9PGY7hri2uYNhx= zjoj0ix>MqOspFOI z7rI~9EE>O?_gdZ?mN!o)I!EGlqlvmvWlU7ggb|L3@sdcQB%+wuJ0|nf*$U&jE-9wA zxT!5+Y6IjhW1q`PI@ppq%Ze^Ht7wH<0~}?^pFi>Bo-fN!bp3g(A*cRdoaHY)biM4& zF2y+%cMc_-Ln~CRt>hyF^ZiMNTj+~3)$@aQnBwP;Ja+mT>n z9SYl#MADvRc2At`OR#+k+eaw9%WQ9)-J4+dD(qgEp~`c+a$vb~AYOSSQF-K=5vFKr zUOf5TQ?H#;y6#s7L&^ysLQ>+5;Y4FtVe6A@`9e;DZAwk5>^iI*KcV34V+nRliLqmM z*^1k2-7;JEx-;o+Pu4fw@ibkZ{&v@5SJJ&Z=^B1>P;m`IS;;ok7k@+n_yhr9MJ0e0 zm8>8j)qB6nrAn(;C{rO2)8GzU_KM{Ni&A~^W)uLF_ae&3=&eWJeN?HAVLi@1kYFEB z*awoIA&MvI8oL=#Tw_q)n^1;FZcV;BnW}!aWJJ78ASLa+d-q~1dXN*4l|AsZe>F-| zwt}l!*P7y1cf#sctZqQ=eNB@=0xEVeP)zylf|})mns`B7qM&YtqH|8uckER+v`Y6w ziv3jFekx%4IrRvZdT>zlO%MK^X z4$o&NofWT~dg0V{Pu#gH;oNoG*|Y5Ii97csoO|ZYD|+mk8X*NgBBOmm;70nV;V3_0 zj%~wXqRQ&7*T@%Dny#Tl`Y? zLjU#M2~SU)?M<+~^LltUU)>E_wH5yZQ-njf6}IkrVVvEOV0X;x!Td{G<0Wm0lD2tM zvaD*}bTu11%;vbul-_1qmYJ3~)0$vf6{art3) zn4!G(c3J;&S%19jP@?S6{2`d4zG2bv-O|@em0bsw!{?RHokmDX+#N``0}4}YG`S{HY^zMQNpARNgTf6%d9Uhi->sT=Yf`n}L#}lR*D)}* zd*Eg^2mUgY&2g)Cx~i)*{%^@7>8}H<+w+5wo3`(beSb_T z?12(D^(IWcim6xrYE?}v0dJiv`&wgnNIH3-rlW;3LGHgvGEXmY9PPjH=*X< zm--xMob*Bb5cZ4m54*!LJk&|WRr>%_jgUh?$Z|$CgndEY7a!kP2BygjR`+%?R*_Pc?;t8$QRW+A~7x zLmT;ars~2Wh&N6l_vaBzAi%FFlc1Q5IY`6NRGDK5yOyV(^tG-o4Vl>$#g*I z6@Lk>5U)@XMSisRLTRl+eQJ@ldzy!mlvBwpNvcH2EJ@0(WR@h=plnx`PVOn9>ulg3 z^ky(W0A5eqz}M4F)}`RyVol%_{Lh8{3HD2@Vy?Pz;HERtcIYx2rw=FS!z&s=LwB#h kzj)By*9%g=H@i~5%J}INvJrlyr8EQdrk*JV^0D=Q02Dj?*#H0l delta 2719 zcmb7GO>7%Q6yDv~j+;1%<21ic>UCl_b`s~O%}+@}_$j3ov}tG>pl*V@^-K~sYp#iY1mS|p3ZzP0Dtaj@Cn_P{8^>wlq^cP2ynQq8 z&Ad0?y!Y1kZp)usl|$Fd%1R_?kM35@o(fCSA2`Uj;&tYsZu=_kq5^B+^lUNrd);h0bjj1~x2o?O>sMn}q{;{l1<83;kq2 z?I*p|K5Zj?%l3kkJVT1drSJgnwqNromh*TzlQ2@6ZrNT+#51G(k=QSad1O30;22ljDnt`2MdCu@=bt){mFz(lNgw~qdBOon3B&BI5|`W>LS9U?0cTal zXK8#cMzo9;(+!PK!uG-Z5dXXMfE><7Ttzk+0^HqNFVxG-jK(ta=@>mvQyKOgs2|suy|iSk^Q#uiUKyD_x0y(Uh=g)#pZ2a%m1=jsqV4A z2{y0bDfy&&8|4viHIY)22BEr|VOk=UNX-nYY!loi+pjzp3XD7n>9CwL zjU|2kvRz{1m@s0p84z60-U|I_b4&uuPVnhZZ&Ip=XNz%3uF#JJ!|VZz38=&}Wnf0O;PYuM(lN9#mcy(-}jD>&jh^C@umXfk$!9@)EBe4h1jak`W*# z{@1}Pf+k7|`w&G)?q|;qFDkBe2u8z$EXV_=FKg%k8vtSz>&DCs1iIx&Ybm0cmXapy z1C-d3kOHh6Sx4MRO($j;MwnIlxMnlv#u>M%=^9&zr5Ppg(J&aRvI`V37tK2Ep1M%uUz2ut_!m>%fxA+8ZR>MKQrEUVS^r!0 Ta+bvElya#<9)5sCeBk~A5G_r7 diff --git a/salary_analytics/__pycache__/config.cpython-311.pyc b/salary_analytics/__pycache__/config.cpython-311.pyc index 66e50b5fc9437f03259c9fea33cd4c4f05aea6e3..7495a52e8ce1108b6795a7f3dc130852bbb2967d 100644 GIT binary patch delta 1755 zcma))&rj1}7{}kXY+cuu4aV57Dk2)Qs6-*+L1F@dC_31}z#qLdW$!Bt$GWV;D2W*+ z9@LPisfh-cuy{}ty?OEA#s6SSc4*$C9=VX{k%R9xhIEUvw|(2U&-e3u-=|Oe)VnR+ z`5=l;K*rdvXXyu7{j5GThPb!*wc7!}YXE@^AO-0ZB~#?ir05k!W+*@d#Jnfyr$!#2 z0HT&jPJI+@Mi=FA9Uik+IR+s1W%0DjVX@$Q4n-O-dawr;J>L@9launu%Tspk~! zE@mgjmHGJOEOzQ@O3gkXCTYx*U~DWNzZspKs1Y+`)04GDEIK=fZRgKj7#^xUUya7+ z*8K3xcP7u|*L(>rtrKIE&MCFc2A8#kRB~Z0kWBwe@*fcNgqlLxMpDp{WYNM->{&`K zE-6UOrcqXbD5oZkc{ZEJ?gM_c8gRG8rylD%K-HbG$AZ;^VJ~JC($j>ZU|Q3$z24p| zLwNCO7lJ(rN14>xuABm5}bzo0( zE5<|4cWcK1#NH?48&fZmMX9cu?!XKB75C=sN2%-{sklc>!P`Xa2Yh8AToJD7T0=p$_3dP%{b5mviwTl0m8I%gAw$5yxDLyC-O_zf+mEepS2o-!= y(xy~Qmd;-<2O^a~#0(uNT-+Mn9R0|ZhNIjU?SF1i?ru(FZaIpIWI5VkIbo_zfn~Qf@(Z!TV(;@Exfc# z{5gDPSb#G?FaZfNC1Mhr>@aQYlBHRa)S#~x2{{&QuJJsp@m+W{e3}BANA@}}zWX&d zg*dphO%Vh#Kifje{*>x|_n$CU3@( zDS$CXXSk&WpLgoDO1}PX=z1~*^iSPR20;`=t-weE?@GK0%DZ4fUf4x-zT>76^}%`0^Pcy4f6jBxyVjR& z%rwK4A?1iK%va7lWtPhIPqYZ#L!}_IUT_*d;!*f+ctugN0lsL>;I^~$btu_!@FbpfJ~jfEK==woX*Gtm|+ zlL8FTRdAK{=tQ;0V{W@fHWvzNgw5J^fdMVq5{gjBRwclGBsnn1-*70Y&dK|vzD@$ zNX@zfF`jzQ>tZ4_ixoJNJuN=d;4xsWGn8KDHk{IDv@)+cHE-P=`kEGtuY*w1SmYUP zcgZ7hO4(>exz|M73?+v3jY_zH+MCAg8BVm*aX2gA9he=Nzp2*I7ZelzI z?Y?@YcElItiC9t_Tp}z1bH2E1i_V%y>(GHCBwmLbu*W~n_Yk={l>F*aFD&}4xF1&h Xcbd;>a77;d8<*ARKmL=IOiHgmNWk7{ delta 861 zcmZ`%O=uHA6rQ)6Y_-Ym=BJzHr%h@$|D|ACTeZd(Eoe)ljcq+B29Y+UhL)IZA{2^0 zc+sOgMLZdc&_hAcKtU*o1|e7vo_i559<)M=6-p1{%$B4V9hh&v`QGX3ak zm5@b}k?^_Gfw!d`jc!SHuwc%86ytKpB-MNwE9Qh2JRz&H!ZK=l(MG*~d?N>N&eJT{ z^>Lz-d<4Um9%wJ6EENC;hHOc^Wm`HJ;Rv}ys)is&?KcET3|5JR&+G-x+FVkb2B3>0 z?^@3UhB#8;8i#?em}5h6z7jW5J_)s(>DoXiPv@_R(j#S+gGC&1uJDGUjE7vq^5?v= z@6A|n!Q}%hK6Q1-!F`?9V%4kv;mRr2nx3)OSh0!Lmon`~6ul42ZdLZM+`7-9_aW%n z>qA}F`*0Z1(*xajRp>13dX@p)xYU~RvB%NQF^Drb&tQ~6lEDR3z3s|4r5i$adU`&c z-!SG1X>CE9z)A0E;S!C^kK>9r0Y`Ahy9G}C?km$C;#ylw=yZzi-@T>HwstrkA@8MA zErY#$Ic6S-kaDkKu&<%q$1{Es81FwDrJMCgV_SQdo0YX=?7e6YzhZM z318Ugtdcs6L7eUDu!t{1kE3a3*Dqy%yf(vtJ&nftyL%u$mz^riru(%l&W0ZjT;`y; PO8$*oy1xCNsD9{QNW8%9 diff --git a/salary_analytics/__pycache__/main.cpython-311.pyc b/salary_analytics/__pycache__/main.cpython-311.pyc index 4f1a3ec44893c33145e870944df2c8c6a9088771..6727b9c21e8b6ed5a662f056b4744f3815fa6e45 100644 GIT binary patch delta 23 dcmeD8?f2zd&dbZi00c+Zie!9{+{o9c1^`dW2JZj> delta 23 dcmeD8?f2zd&dbZi00eojgflv&Hu5#90RT)K25tZV diff --git a/salary_analytics/__pycache__/salary_earner_analyzer.cpython-311.pyc b/salary_analytics/__pycache__/salary_earner_analyzer.cpython-311.pyc index d72e45e7e093680f68d707f48184809dff91aaaa..828d0ba2db733ac67cd868e5c2069958194ed693 100644 GIT binary patch delta 1775 zcmZ`(Pi)gx8258x$B7*~X+qj2O`L}QODRPRhSrkO0fUKg;Ln)Eq@l>fX-P?(=*5AS zIjgvAl~!q4-^7^4A=`n0-~cLz-8XI1E^DetjXX`{$_0Y9-L|p!T#~I~`(*#}{J!t| z<@@(u{ndL%#IM|L7lq}_H~tIn_uUsC(qIierVpd{e1l$_&Jf+E7@%rHSA9NoPms_Y zCyEAr1-q8*h8+fUC%S6e5#rm%6g9`T*Q#R{=o0$g)(<%J$QB0J_Bl=5J}H*DSxR+& z3Cd1`tGb9PL>bPzuA6|O?%;P*%ACQyNyT|IYyWP)&2%cMRKBF?8PybZIG-%)3e=OT zqNhz($q~(MV6w&1`C?j+^H9JfrahG}ETBEiUYE(LFkjG=xC>>NquWh>CX?0EFs&xl z8PlDD89kFyvdL6FTP%|5M5E{da|--{SawL*5GU8g$(op`iwQKwex3;QUXOpGE%7au z>OBP3dJf`e$=wL_Zv;lx10(m`Yk`qk;6OcaV2N)^!8I-jeV7!6F|0P(Fb(#hzqkt6 zjy`fW!7wVgp3p;fr0_3?et3t*6}FKY&vi-x!Lq0`=nHNaTs>I`dApJt6aMg5)6^u-ZmMkb$6p@g2Zzgjomq{q|rlR zJT}+q@EobBC0OZ5;1&7prO@M^2zK=(bduk5nUmTp!@0GukXAX|dCcGWbE@iT$HkUi zh6|_kx_kJ=T%(4I4B$V3l--6~bzZazRVF36?+(4_ChDCWC`xsqXZ!#fcZj0OU$myZ zEE#mwYejHU>^d&o;I>8#s0vvAaZ7exo$TnXwq*~VZ8x56ui>fs2xpiwysCItx``Wm z2fv$A_81;K-5&IlI1wdvgAojm9$<(A)2=EDMM#XHi~;SI0^OE=Cov+We?*jV4u;X& zQhCu1PvB2@lE5hfr!kl|C7X@Yu-{r=$X|~8O~)+EmkQ?>OeUQx=nHU!7~3raEq@u3 za+tPUMl%^LuWJgX&=yR4F0E)drp(NYNvk?MMy%LeTFodL9Ks)irae<6);VP!pTMD+?H$cX zeRV0;qHOM|h8(R36`?7Q-8x&B_if0N>+)nxPSoW@MZgZ8ZI!djiyP9wx-?LehU(JL zhBUe^jn<^Gx->>8#jB(lLqY@i~ciLOSZ**&{vJxfBv7y5L1W%}0G*GDSTHQ%AS@6c+*7Xjcjn(~Jx ztT`Y}JY&rOzOmwVNKQPAVK8}^#+O?%qp9h6t8;M%dWg1&R{WPhcGbCUf&pQ4I2dDK z6f00Zc!ec(geCMO*b9b`GZc#(WP=<}c$9#3Q?hzKnJQj_9NuZO&Y&})?O+kjhmHe% z_1jR2#@r*3UE3^1%eO=bEWQs=HZt{+j|sJ?4vg}QzeR0@;6A3OMRlNnevTZ$arMhG lm_!HV{oq4XmV3Yox+X79pQC}RMg2EEwf2AeAFXxY{taObwA=sy delta 1273 zcmZ`(OKcle6rDF;kL|G?J56nm$F)NOxXnijrEMTIh?0^f-Bc77RV8ccnI?AXPv49y zr?R07Bm&AJa2G8ih%CxSLPACEwkuFui4CqSH5wM}0v4tcO#unPiub0qOH_D^J8$ke z_uMn@&dimW_RE1U6vfMsb@yImu@?O%aEC=U{K_5*H#nQ4z{6EX8!WVf{V>f3;5|-7 z5`4z(QRObqFa@EzH>3q*D{zlX6PChj$n&UjmVcee$nY(HU6iq((BV5_FPrfID;^ds z4^1qI$IuP9CO!@K#gAjb@Y>|+TR(U-93h}1>$zOHTC(#-^SY$5XbgUm*N_HP?_HFJ zH+}cn0tc`8pZA?T$udlXYw$J)7yYA3&l<7GLCe1r$fRwGzvH~hrV@UVB? z^Epxf%I!eAAZ87%TDD5}4_6@oYADXvLmR>FGf1I&h|JRmXG4?6MhMLb*tnLpY#rNK zL$}S&Rg%mghd1AeJM`>2WN=KzgGb=cP{SW` zqy=2AR!%MBLHN2adWbKYdWi_;PoH*J!^W==wj&lzBTv#?QLm9Um5Nz1aEc=GtGcZp z#(L3oL<5&AB|Xyz(eSY1+7(=FJPUK-Rl({b1@KEaFO89n)kJV2lF&3c_<$jyhs>b{nGpsgNQIl383UaGC{x*Z$6 z6&r2E#@n&+l{whoKLzjiZz`J+_2Tr0)2+x*J2KR~up^1kJbXH!zCc~!BP1MmPmLJP z@K_@E4xcMmmK`59Ez{2CONLp)gCw63aER6{!0!X+(D7zr=cIr};BsQnOB6(_dW!Jj z`^0&152Y=_ycR<#Ftx!``rXS{(G#~t+SO8DskDw{Gw))jYABAaDP#+?=8A|(bA zBeL~1BDY*J5b^=LYZh;n4HG0{TY3MoB|^1kc_BMreu4%mbwbgwM4jc|4evJ57FadC zMSXCXY#=uzl)rS|Q`;UGc)p__r$ zl$@Sc4(X$8@|0$r&B^IgN*{dc8lWOr!T;w?-vv9DrJOr<_8fMTBLDwEG!bw0(dou+)Bqdf=MX zYb`YsvPw$iBCIuukPkdYzlW=VnRwC3vNo+m=Gd&775S|ZA##(q{?gb8l@_P z@S)^!Hna^T@QoxH=8W)}tsO?B0PB#XqZO7Uk=k(NHkB;0sbOg|d@e<(1%8$moIe!p zTlB0bE6nVPzJf=tTZts<(OpH==QJ?4{O?U|^X?SIhWYo_7XODT^%PbpcZsr(hut-! z>=fqgx&#-TJilaTuk{Kq))esuw-n)+BKC&59ZV5lm&{L)205I&SHT$(0p1S2+N1HQ ze6BECD5i78L^dy{Gn%l&Q%EHrhc6X#ykDDq0&0u9Cs)um>3H7o-&TGgn;TQE(H8s6f1%a;Uq!<;dKN7!ObAz z(A!^gL#0!MuR4=-5qd)>So^;dy5EePg-HYa7*dY@VIp?noOnL4-lHBIss@i%xZx@{ zta8KGIsTkj751zjQG1809S>EwXqAhqT=Y68sIKi-7~gwic$20o1lJ!``=Zs(hbvsH z%EeSJw#n16{=C{BtA<7@+-Q{>Rk=}}XL}wDhD+?5{3N{ZP#f-J5J6W%*FfLk8uMKy zivD+N#W=Q;4i0GTO2rNspP?J%_E^ipgh-oaP9)@fDv{7k^XxE_1!X_;YsMV=UoR?? z@I_aEo`Ijbjt$M_k7s6&>ym3GcF61(QIJjnG$2|EVDK*|IN^t(2ev# zq4AR<>cQx0J}!C*Bzo~BC@LaO5BU(3`BKo6z4ZT`m0(nV&iU+jez))cwsW1Y583bA zY&nFFw~rkszpP2ySJPBFa5yMxQp_25b-E}qk!GUFmxyM)XCh>VzuD2LXr^|e=A<7@ zXQH%}Qag;9MyNd_n(vsY6=uY%vph7wbTuQ+(Zj9t@0$Lg_(J4he$n8 zpMPtk6_b338V=nO3n!AgaH5ALlB$LIF;IyJAc{bCj`C(#oV#fQymD4=FGP1WqL^bL zk3$uh2OWRbVq8GTMid%ckzfUy+Ynxa%AuRX-Q8@fVa3Bky3o_BMTgjCcw-(8bWCeKjBl&qPPZm#ibXcmb1RhTPL;DJBlm`K`wM+nhG@RT-{<^J}h zNC?lw7%6gU*`CVk)t0$)mvg7G%1aK1I3n{2X65s^ytG1I=JFnkmeWokuhc7|q2|Kp zvWxJn{3(2>nA~S$r??`sBiaxtgahG3;90PK1VfA?@bAF#5iSnZ&J>ufJWGe5K5&|E z>)pT(3A-fTQ1}#JaAVy9%&959$$P7UH3e8_+#^HSkTbg8= (datetime.now() - timedelta(days=180))] - least_inflow = last_6_months['amount'].min() - avg_salary = group['amount'].mean() - - # Calculate days since last transaction + if last_6_months.empty: + least_inflow = 0 + else: + least_inflow = last_6_months['amount'].min() + + # Handle average salary calculation + if group['amount'].notna().any(): + avg_salary = group['amount'].mean() + else: + avg_salary = 0 + + # Calculate days_since_last_trx with NaN handling group['days_since_last_trx'] = group['trx_start_date'].diff().dt.days median_interval = group['days_since_last_trx'].median() + if pd.isna(median_interval): + median_interval = 30 # Default to 30 days if no interval data last_date = group['trx_start_date'].max() next_date = last_date + timedelta(days=median_interval) next_amount = avg_salary - # Boolean flags + # Boolean flags with NaN handling days_since_last = (datetime.now() - last_date).days has_45d = days_since_last <= 45 has_2m = len(group[group['trx_start_date'] >= (datetime.now() - timedelta(days=60))]) >= 2 @@ -78,7 +100,9 @@ class SalaryEarnerAnalyzer: }) final_df = pd.DataFrame(results) - final_df = final_df.dropna() + # Drop rows where all numeric columns are NaN + numeric_cols = ['num_months', 'least_inflow_6m', 'avg_monthly_salary', 'estimated_next_amount'] + final_df = final_df.dropna(subset=numeric_cols, how='all') return final_df def analyze_salary_earners(self, final_df):