Added new salary-related terms and improved image outputs in salary.ipynb
This commit is contained in:
@@ -0,0 +1,58 @@
|
||||
"""
|
||||
Consistent amount transaction analysis module.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
from .config import MODEL_CONFIG
|
||||
|
||||
class ConsistentAmountAnalyzer:
|
||||
def __init__(self, df):
|
||||
self.df = df
|
||||
self.const_df = None
|
||||
|
||||
def calculate_coefficient_of_variation(self, group):
|
||||
"""Calculate coefficient of variation for a group of transactions."""
|
||||
amounts = group[group['initiated_by'] == 'C']['amount']
|
||||
mean = amounts.mean()
|
||||
std = amounts.std(ddof=0)
|
||||
|
||||
if mean == 0:
|
||||
return float('nan')
|
||||
return std / mean
|
||||
|
||||
def flag_consistent_amounts(self, group, cv_threshold=None):
|
||||
"""Flag accounts with low variance in transaction amounts."""
|
||||
if cv_threshold is None:
|
||||
cv_threshold = MODEL_CONFIG['cv_threshold']
|
||||
|
||||
filtered_group = group[group['initiated_by'] == 'C']
|
||||
cv = self.calculate_coefficient_of_variation(filtered_group)
|
||||
is_consistent = cv <= cv_threshold if not pd.isna(cv) else False
|
||||
|
||||
return pd.Series(
|
||||
[is_consistent] * len(group),
|
||||
index=group.index,
|
||||
name='is_consistent_amount'
|
||||
)
|
||||
|
||||
def identify_consistent_amount_accounts(self, cv_threshold=None):
|
||||
"""Identify accounts with consistent transaction amounts."""
|
||||
if cv_threshold is None:
|
||||
cv_threshold = MODEL_CONFIG['cv_threshold']
|
||||
|
||||
self.df = self.df.groupby('accountid').apply(
|
||||
lambda group: self.flag_consistent_amounts(group, cv_threshold)
|
||||
).reset_index(level=0, drop=True)
|
||||
|
||||
self.const_df = self.df.copy()
|
||||
return self.df
|
||||
|
||||
def get_consistent_amount_data(self):
|
||||
"""Get transactions identified as having consistent amounts."""
|
||||
if self.const_df is None:
|
||||
self.identify_consistent_amount_accounts()
|
||||
|
||||
return self.const_df[
|
||||
(self.const_df['is_consistent_amount']) &
|
||||
(self.const_df['initiated_by'] == 'C')
|
||||
]
|
||||
Reference in New Issue
Block a user