Enhance salary analytics API with database operations and performance logging

- Introduced `DatabaseOperations` class for managing batch results in the database.
- Added functionality to create a batch results table and save batch processing results.
- Updated API endpoints to log execution time and handle batch processing errors more effectively.
- Improved response handling in analysis endpoints and added batch metadata to results.
- Suppressed warnings and improved logging throughout the application.
This commit is contained in:
2025-05-10 16:56:23 +01:00
parent 305e5da4ec
commit 1a4e539626
4 changed files with 298 additions and 23 deletions
+11 -3
View File
@@ -6,8 +6,16 @@ import pandas as pd
import matplotlib.pyplot as plt
from matplotlib_venn import venn3
from datetime import datetime, timedelta
import logging
from .config import MODEL_CONFIG, OUTPUT_PATHS
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
class SalaryEarnerAnalyzer:
def __init__(self, df):
self.df = df
@@ -124,7 +132,7 @@ class SalaryEarnerAnalyzer:
# Generate final table
self.final_table = self.generate_salary_earners_table(all_three_hypotheses)
print(f"Found {self.final_table['accountid'].nunique()} verified salary earners")
logger.info(f"Found {self.final_table['accountid'].nunique()} verified salary earners")
# Generate likely salary earner table
green_section = self.filter_venn_section(
@@ -142,11 +150,11 @@ class SalaryEarnerAnalyzer:
self.likely_salary_earner = pd.concat([yellow_section, green_section])
self.likely_salary_earner = self.likely_salary_earner.drop_duplicates(subset=['id'])
self.likely_salary_earner = self.generate_salary_earners_table(self.likely_salary_earner)
print(f"Found {self.likely_salary_earner['accountid'].nunique()} likely salary earners")
logger.info(f"Found {self.likely_salary_earner['accountid'].nunique()} likely salary earners")
# Analyze high earners
self.high_earner_details, total_high_earners = self.analyze_salary_earners(self.final_table)
print(f"\nTotal High Earners: {total_high_earners}")
logger.info(f"\nTotal High Earners: {total_high_earners}")
# Plot hypothesis overlap
self.plot_hypothesis_overlap(