Files
CHIEFSOFT\ameye e869785624 first commit
2025-05-17 03:52:41 -04:00

47 lines
1.4 KiB
Python

"""
Keyword-based salary transaction analysis module.
"""
import re
import pandas as pd
from .config import SALARY_KEYWORDS
class KeywordAnalyzer:
def __init__(self, df):
self.df = df
self.desc_df = None
def identify_salary_transactions(self):
"""
Identifies potential salary-related transactions based on keywords
and month-year patterns in the 'description' column.
"""
month_year_patterns = [
r"\b(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s?\d{2,4}\b",
r"\b(?:JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER)\s?\d{2,4}\b"
]
escaped_keywords = [re.escape(keyword.lower()) for keyword in SALARY_KEYWORDS]
combined_pattern = (
r'\b(?:' + '|'.join(escaped_keywords) + r')\b|' +
'|'.join(month_year_patterns)
)
self.df['is_salary_related'] = self.df['description'].str.lower().str.contains(
combined_pattern,
na=False,
regex=True
)
self.desc_df = self.df.copy()
return self.df
def get_salary_related_data(self):
"""Get transactions identified as salary-related."""
if self.desc_df is None:
self.identify_salary_transactions()
return self.desc_df[
(self.desc_df['is_salary_related'] == True) &
(self.desc_df['initiated_by'] == 'C')
]