""" Keyword-based salary transaction analysis module. """ import re import pandas as pd from .config import SALARY_KEYWORDS class KeywordAnalyzer: def __init__(self, df): self.df = df self.desc_df = None def identify_salary_transactions(self): """ Identifies potential salary-related transactions based on keywords and month-year patterns in the 'description' column. """ month_year_patterns = [ r"\b(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s?\d{2,4}\b", r"\b(?:JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER)\s?\d{2,4}\b" ] escaped_keywords = [re.escape(keyword.lower()) for keyword in SALARY_KEYWORDS] combined_pattern = ( r'\b(?:' + '|'.join(escaped_keywords) + r')\b|' + '|'.join(month_year_patterns) ) self.df['is_salary_related'] = self.df['description'].str.lower().str.contains( combined_pattern, na=False, regex=True ) self.desc_df = self.df.copy() return self.df def get_salary_related_data(self): """Get transactions identified as salary-related.""" if self.desc_df is None: self.identify_salary_transactions() return self.desc_df[ (self.desc_df['is_salary_related'] == True) & (self.desc_df['initiated_by'] == 'C') ]