Added new salary-related terms and improved image outputs in salary.ipynb
This commit is contained in:
@@ -0,0 +1,212 @@
|
||||
"""
|
||||
FastAPI application for salary analytics.
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI, HTTPException, BackgroundTasks
|
||||
from fastapi.responses import FileResponse
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, Dict
|
||||
import os
|
||||
import socket
|
||||
import logging
|
||||
|
||||
from .main import SalaryAnalyticsPipeline
|
||||
from .config import OUTPUT_PATHS
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = FastAPI(
|
||||
title="Salary Analytics API",
|
||||
description="API for analyzing and predicting salary patterns from transaction data",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
# Add CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # Allows all origins
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"], # Allows all methods
|
||||
allow_headers=["*"], # Allows all headers
|
||||
)
|
||||
|
||||
# Global pipeline instance
|
||||
pipeline = None
|
||||
|
||||
class AnalysisResponse(BaseModel):
|
||||
"""Response model for analysis endpoints."""
|
||||
message: str
|
||||
data: Optional[Dict] = None
|
||||
file_path: Optional[str] = None
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
"""Initialize the pipeline on startup."""
|
||||
global pipeline
|
||||
try:
|
||||
logger.info("Initializing pipeline...")
|
||||
pipeline = SalaryAnalyticsPipeline()
|
||||
if not pipeline.load_data():
|
||||
logger.error("Failed to load data during startup")
|
||||
raise Exception("Failed to load data during startup")
|
||||
|
||||
# Print network information
|
||||
hostname = socket.gethostname()
|
||||
ip_address = socket.gethostbyname(hostname)
|
||||
logger.info(f"Server running on hostname: {hostname}")
|
||||
logger.info(f"Server IP address: {ip_address}")
|
||||
logger.info(f"Server is accessible at:")
|
||||
logger.info(f"- http://localhost:8000")
|
||||
logger.info(f"- http://127.0.0.1:8000")
|
||||
logger.info(f"- http://{ip_address}:8000")
|
||||
logger.info("Pipeline initialized successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Error during startup: {str(e)}")
|
||||
raise
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""Root endpoint."""
|
||||
logger.info("Root endpoint accessed")
|
||||
return {"message": "Welcome to Salary Analytics API"}
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint."""
|
||||
logger.info("Health check endpoint accessed")
|
||||
return {"status": "healthy"}
|
||||
|
||||
@app.post("/analyze/keyword", response_model=AnalysisResponse)
|
||||
async def analyze_keyword():
|
||||
"""Run keyword-based salary transaction analysis."""
|
||||
try:
|
||||
logger.info("Starting keyword analysis...")
|
||||
data = pipeline.run_keyword_analysis()
|
||||
logger.info(f"Keyword analysis completed. Found {len(data)} matches")
|
||||
return AnalysisResponse(
|
||||
message="Keyword analysis completed successfully",
|
||||
data={"count": len(data)}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in keyword analysis: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/analyze/consistent-amount", response_model=AnalysisResponse)
|
||||
async def analyze_consistent_amount():
|
||||
"""Run consistent amount transaction analysis."""
|
||||
try:
|
||||
logger.info("Starting consistent amount analysis...")
|
||||
data = pipeline.run_consistent_amount_analysis()
|
||||
logger.info(f"Consistent amount analysis completed. Found {len(data)} matches")
|
||||
return AnalysisResponse(
|
||||
message="Consistent amount analysis completed successfully",
|
||||
data={"count": len(data)}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in consistent amount analysis: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/analyze/transaction-type", response_model=AnalysisResponse)
|
||||
async def analyze_transaction_type():
|
||||
"""Run transaction type analysis."""
|
||||
try:
|
||||
logger.info("Starting transaction type analysis...")
|
||||
data = pipeline.run_transaction_type_analysis()
|
||||
logger.info(f"Transaction type analysis completed. Found {len(data)} matches")
|
||||
return AnalysisResponse(
|
||||
message="Transaction type analysis completed successfully",
|
||||
data={"count": len(data)}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in transaction type analysis: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/generate/reports", response_model=AnalysisResponse)
|
||||
async def generate_reports(background_tasks: BackgroundTasks):
|
||||
"""Generate salary earner reports."""
|
||||
try:
|
||||
logger.info("Starting report generation...")
|
||||
reports = pipeline.generate_salary_earner_reports()
|
||||
logger.info("Reports generated successfully")
|
||||
return AnalysisResponse(
|
||||
message="Reports generated successfully",
|
||||
data={
|
||||
"verified_salary_earners": len(reports['final_table']),
|
||||
"likely_salary_earners": len(reports['likely_salary_earner']),
|
||||
"high_earners": reports['total_high_earners']
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in report generation: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/train/models", response_model=AnalysisResponse)
|
||||
async def train_models():
|
||||
"""Train salary prediction models."""
|
||||
try:
|
||||
logger.info("Starting model training...")
|
||||
pipeline.train_salary_prediction_models()
|
||||
logger.info("Models trained successfully")
|
||||
return AnalysisResponse(
|
||||
message="Models trained successfully"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in model training: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/download/{report_type}")
|
||||
async def download_report(report_type: str):
|
||||
"""Download generated reports."""
|
||||
try:
|
||||
logger.info(f"Attempting to download report: {report_type}")
|
||||
file_paths = {
|
||||
"high_earners": OUTPUT_PATHS["high_earner_details"],
|
||||
"likely_earners": OUTPUT_PATHS["likely_salary_earner"],
|
||||
"final_table": OUTPUT_PATHS["final_table"],
|
||||
"consistent_plot": OUTPUT_PATHS["consistent_earners_plot"],
|
||||
"inconsistent_plot": OUTPUT_PATHS["inconsistent_earners_plot"],
|
||||
"hypothesis_plot": OUTPUT_PATHS["hypothesis_overlap_plot"]
|
||||
}
|
||||
|
||||
if report_type not in file_paths:
|
||||
logger.error(f"Report type not found: {report_type}")
|
||||
raise HTTPException(status_code=404, detail="Report type not found")
|
||||
|
||||
file_path = file_paths[report_type]
|
||||
if not os.path.exists(file_path):
|
||||
logger.error(f"Report file not found: {file_path}")
|
||||
raise HTTPException(status_code=404, detail="Report file not found")
|
||||
|
||||
logger.info(f"Successfully found report file: {file_path}")
|
||||
return FileResponse(
|
||||
path=file_path,
|
||||
filename=os.path.basename(file_path),
|
||||
media_type="application/octet-stream"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading report: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/run/pipeline", response_model=AnalysisResponse)
|
||||
async def run_full_pipeline():
|
||||
"""Run the complete salary analytics pipeline."""
|
||||
try:
|
||||
logger.info("Starting full pipeline...")
|
||||
success = pipeline.run_full_pipeline()
|
||||
if not success:
|
||||
logger.error("Pipeline failed")
|
||||
raise HTTPException(status_code=500, detail="Pipeline failed")
|
||||
|
||||
logger.info("Pipeline completed successfully")
|
||||
return AnalysisResponse(
|
||||
message="Pipeline completed successfully"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in pipeline: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
Reference in New Issue
Block a user