Enhance salary analytics API with database operations and performance logging

- Introduced `DatabaseOperations` class for managing batch results in the database. - Added functionality to create a batch results table and save batch processing results. - Updated API endpoints to log execution time and handle batch processing errors more effectively. - Improved response handling in analysis endpoints and added batch metadata to results. - Suppressed warnings and improved logging throughout the application.
2025-05-10 16:56:23 +01:00
parent 305e5da4ec
commit 1a4e539626
4 changed files with 298 additions and 23 deletions
@@ -6,8 +6,16 @@ import pandas as pd
 import matplotlib.pyplot as plt
 from matplotlib_venn import venn3
 from datetime import datetime, timedelta
+import logging
 from .config import MODEL_CONFIG, OUTPUT_PATHS

+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
 class SalaryEarnerAnalyzer:
    def __init__(self, df):
        self.df = df
@@ -124,7 +132,7 @@ class SalaryEarnerAnalyzer:

        # Generate final table
        self.final_table = self.generate_salary_earners_table(all_three_hypotheses)
-        print(f"Found {self.final_table['accountid'].nunique()} verified salary earners")
+        logger.info(f"Found {self.final_table['accountid'].nunique()} verified salary earners")

        # Generate likely salary earner table
        green_section = self.filter_venn_section(
@@ -142,11 +150,11 @@ class SalaryEarnerAnalyzer:
        self.likely_salary_earner = pd.concat([yellow_section, green_section])
        self.likely_salary_earner = self.likely_salary_earner.drop_duplicates(subset=['id'])
        self.likely_salary_earner = self.generate_salary_earners_table(self.likely_salary_earner)
-        print(f"Found {self.likely_salary_earner['accountid'].nunique()} likely salary earners")
+        logger.info(f"Found {self.likely_salary_earner['accountid'].nunique()} likely salary earners")

        # Analyze high earners
        self.high_earner_details, total_high_earners = self.analyze_salary_earners(self.final_table)
-        print(f"\nTotal High Earners: {total_high_earners}")
+        logger.info(f"\nTotal High Earners: {total_high_earners}")

        # Plot hypothesis overlap
        self.plot_hypothesis_overlap(