Files
AnalysisTesting/salary_analytics/consistent_amount_analyzer.py
salakojoshua1234_gmail.com 8acfb436f3 Enhance API with data loading functionality and update README.
- Added `/load-data` endpoint to load transaction data from either a database or a CSV file.
- Updated `SalaryAnalyticsPipeline` and `DataLoader` to support loading from CSV.
- Implemented data validation and error handling for loading processes.
- Revised README to include new data loading instructions and workflow steps.
- Added checks to ensure data is loaded before running analysis endpoints.
2025-05-01 22:57:55 +01:00

64 lines
2.2 KiB
Python

"""
Consistent amount transaction analysis module.
"""
import pandas as pd
from .config import MODEL_CONFIG
class ConsistentAmountAnalyzer:
def __init__(self, df):
self.df = df
self.const_df = None
def calculate_coefficient_of_variation(self, group):
"""Calculate coefficient of variation for a group of transactions."""
amounts = group[group['initiated_by'] == 'C']['amount']
mean = amounts.mean()
std = amounts.std(ddof=0)
if mean == 0:
return float('nan')
return std / mean
def flag_consistent_amounts(self, group, cv_threshold=None):
"""Flag accounts with low variance in transaction amounts."""
if cv_threshold is None:
cv_threshold = MODEL_CONFIG['cv_threshold']
filtered_group = group[group['initiated_by'] == 'C']
cv = self.calculate_coefficient_of_variation(filtered_group)
is_consistent = cv <= cv_threshold if not pd.isna(cv) else False
return pd.Series(
[is_consistent] * len(group),
index=group.index,
name='is_consistent_amount'
)
def identify_consistent_amount_accounts(self, cv_threshold=None):
"""Identify accounts with consistent transaction amounts."""
if cv_threshold is None:
cv_threshold = MODEL_CONFIG['cv_threshold']
# Create a copy of the original DataFrame
self.const_df = self.df.copy()
# Calculate consistent amount flags
consistent_flags = self.const_df.groupby('accountid').apply(
lambda group: self.flag_consistent_amounts(group, cv_threshold)
).reset_index(level=0, drop=True)
# Add the flags to the original DataFrame
self.const_df['is_consistent_amount'] = consistent_flags
return self.const_df
def get_consistent_amount_data(self):
"""Get transactions identified as having consistent amounts."""
if self.const_df is None:
self.identify_consistent_amount_accounts()
return self.const_df[
(self.const_df['is_consistent_amount']) &
(self.const_df['initiated_by'] == 'C')
]