diff --git a/.gitignore b/.gitignore index 547f431..18e8883 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,7 @@ output/models/inconsistent_scaler.joblib .env __pycache__ __pycache__/* +output/* +.idea/* + diff --git a/salary_analytics/api.py b/salary_analytics/api.py index ed4e7e5..2407d31 100644 --- a/salary_analytics/api.py +++ b/salary_analytics/api.py @@ -298,8 +298,11 @@ async def get_file_if_csv(source: str, file: Optional[UploadFile] = File(None)): async def run_streaming_pipeline( source: str = "db", batch_size: int = 10000, - file: Optional[UploadFile] = Depends(get_file_if_csv) -): + UploadFile: str=''): + + file = None + if len(UploadFile) > 0 : + file : Optional[UploadFile] = Depends(get_file_if_csv) """ Run the complete salary analytics pipeline in batches. @@ -326,7 +329,9 @@ async def run_streaming_pipeline( responses = [] batch_number = 0 - + + print("Sal************************", source) + def preprocess_chunk(chunk): """Preprocess a chunk of data with the same logic as DataLoader.""" # Convert dates