Files
AnalysisTesting/salary_analytics/api.py
T

212 lines
7.8 KiB
Python

"""
FastAPI application for salary analytics.
"""
from fastapi import FastAPI, HTTPException, BackgroundTasks
from fastapi.responses import FileResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional, Dict
import os
import socket
import logging
from .main import SalaryAnalyticsPipeline
from .config import OUTPUT_PATHS
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
app = FastAPI(
title="Salary Analytics API",
description="API for analyzing and predicting salary patterns from transaction data",
version="1.0.0"
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allows all origins
allow_credentials=True,
allow_methods=["*"], # Allows all methods
allow_headers=["*"], # Allows all headers
)
# Global pipeline instance
pipeline = None
class AnalysisResponse(BaseModel):
"""Response model for analysis endpoints."""
message: str
data: Optional[Dict] = None
file_path: Optional[str] = None
@app.on_event("startup")
async def startup_event():
"""Initialize the pipeline on startup."""
global pipeline
try:
logger.info("Initializing pipeline...")
pipeline = SalaryAnalyticsPipeline()
if not pipeline.load_data():
logger.error("Failed to load data during startup")
raise Exception("Failed to load data during startup")
# Print network information
hostname = socket.gethostname()
ip_address = socket.gethostbyname(hostname)
logger.info(f"Server running on hostname: {hostname}")
logger.info(f"Server IP address: {ip_address}")
logger.info(f"Server is accessible at:")
logger.info(f"- http://localhost:8000")
logger.info(f"- http://127.0.0.1:8000")
logger.info(f"- http://{ip_address}:8000")
logger.info("Pipeline initialized successfully")
except Exception as e:
logger.error(f"Error during startup: {str(e)}")
raise
@app.get("/")
async def root():
"""Root endpoint."""
logger.info("Root endpoint accessed")
return {"message": "Welcome to Salary Analytics API"}
@app.get("/health")
async def health_check():
"""Health check endpoint."""
logger.info("Health check endpoint accessed")
return {"status": "healthy"}
@app.post("/analyze/keyword", response_model=AnalysisResponse)
async def analyze_keyword():
"""Run keyword-based salary transaction analysis."""
try:
logger.info("Starting keyword analysis...")
data = pipeline.run_keyword_analysis()
logger.info(f"Keyword analysis completed. Found {len(data)} matches")
return AnalysisResponse(
message="Keyword analysis completed successfully",
data={"count": len(data)}
)
except Exception as e:
logger.error(f"Error in keyword analysis: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/analyze/consistent-amount", response_model=AnalysisResponse)
async def analyze_consistent_amount():
"""Run consistent amount transaction analysis."""
try:
logger.info("Starting consistent amount analysis...")
data = pipeline.run_consistent_amount_analysis()
logger.info(f"Consistent amount analysis completed. Found {len(data)} matches")
return AnalysisResponse(
message="Consistent amount analysis completed successfully",
data={"count": len(data)}
)
except Exception as e:
logger.error(f"Error in consistent amount analysis: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/analyze/transaction-type", response_model=AnalysisResponse)
async def analyze_transaction_type():
"""Run transaction type analysis."""
try:
logger.info("Starting transaction type analysis...")
data = pipeline.run_transaction_type_analysis()
logger.info(f"Transaction type analysis completed. Found {len(data)} matches")
return AnalysisResponse(
message="Transaction type analysis completed successfully",
data={"count": len(data)}
)
except Exception as e:
logger.error(f"Error in transaction type analysis: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/generate/reports", response_model=AnalysisResponse)
async def generate_reports(background_tasks: BackgroundTasks):
"""Generate salary earner reports."""
try:
logger.info("Starting report generation...")
reports = pipeline.generate_salary_earner_reports()
logger.info("Reports generated successfully")
return AnalysisResponse(
message="Reports generated successfully",
data={
"verified_salary_earners": len(reports['final_table']),
"likely_salary_earners": len(reports['likely_salary_earner']),
"high_earners": reports['total_high_earners']
}
)
except Exception as e:
logger.error(f"Error in report generation: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/train/models", response_model=AnalysisResponse)
async def train_models():
"""Train salary prediction models."""
try:
logger.info("Starting model training...")
pipeline.train_salary_prediction_models()
logger.info("Models trained successfully")
return AnalysisResponse(
message="Models trained successfully"
)
except Exception as e:
logger.error(f"Error in model training: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/download/{report_type}")
async def download_report(report_type: str):
"""Download generated reports."""
try:
logger.info(f"Attempting to download report: {report_type}")
file_paths = {
"high_earners": OUTPUT_PATHS["high_earner_details"],
"likely_earners": OUTPUT_PATHS["likely_salary_earner"],
"final_table": OUTPUT_PATHS["final_table"],
"consistent_plot": OUTPUT_PATHS["consistent_earners_plot"],
"inconsistent_plot": OUTPUT_PATHS["inconsistent_earners_plot"],
"hypothesis_plot": OUTPUT_PATHS["hypothesis_overlap_plot"]
}
if report_type not in file_paths:
logger.error(f"Report type not found: {report_type}")
raise HTTPException(status_code=404, detail="Report type not found")
file_path = file_paths[report_type]
if not os.path.exists(file_path):
logger.error(f"Report file not found: {file_path}")
raise HTTPException(status_code=404, detail="Report file not found")
logger.info(f"Successfully found report file: {file_path}")
return FileResponse(
path=file_path,
filename=os.path.basename(file_path),
media_type="application/octet-stream"
)
except Exception as e:
logger.error(f"Error downloading report: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/run/pipeline", response_model=AnalysisResponse)
async def run_full_pipeline():
"""Run the complete salary analytics pipeline."""
try:
logger.info("Starting full pipeline...")
success = pipeline.run_full_pipeline()
if not success:
logger.error("Pipeline failed")
raise HTTPException(status_code=500, detail="Pipeline failed")
logger.info("Pipeline completed successfully")
return AnalysisResponse(
message="Pipeline completed successfully"
)
except Exception as e:
logger.error(f"Error in pipeline: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))