Enhance XLS upload functionality and update requirements. Added Flask, Flask-SQLAlchemy, and Alembic to requirements. Modified database schema in upload_xls.py for improved data handling and added SQLAlchemy configuration in config.py.

This commit is contained in:
2025-06-09 15:34:18 +01:00
parent f478a52a2f
commit c00bb71d2a
14 changed files with 427 additions and 33 deletions
+17
View File
@@ -0,0 +1,17 @@
from flask import Flask
import os
from .extensions import db, migrate
def create_app():
app = Flask(__name__)
app.config.from_object('salary_analytics.config')
# Initialize extensions
db.init_app(app)
migrate.init_app(app, db)
# Register blueprints or CLI commands here if needed
from . import commands
app.cli.add_command(commands.upload_xls_cli)
return app
+70
View File
@@ -0,0 +1,70 @@
import click
import pandas as pd
from datetime import datetime
from flask.cli import with_appcontext
from salary_analytics.app.extensions import db
from salary_analytics.app.models import RawTransaction
@click.group()
def commands():
"""Management commands for the salary analytics application."""
pass
@commands.command('upload-xls')
@click.argument('xls_path')
@with_appcontext
def upload_xls_cli(xls_path):
"""Uploads data from an XLS file to the analytics_raw_transactions table.
Args:
xls_path (str): The path to the XLS file.
"""
print(f"Attempting to upload data from {xls_path}...")
try:
df = pd.read_excel(xls_path, dtype=str)
# Convert date columns to datetime
date_cols = ["ENTRY_DATE", "VALUE_DATE", "PSTD_DATE", "TRAN_DATE"]
for col in date_cols:
if col in df.columns:
df[col] = pd.to_datetime(df[col], errors='coerce')
df[col] = df[col].fillna(pd.Timestamp.now())
# Convert numeric columns
for col in ["TRAN_AMT", "BALANCE"]:
if col in df.columns:
df[col] = pd.to_numeric(df[col].str.replace(",", ""), errors='coerce')
# Truncate TRAN_PARTICULAR to 100 chars
if "TRAN_PARTICULAR" in df.columns:
df["TRAN_PARTICULAR"] = df["TRAN_PARTICULAR"].astype(str).str.slice(0, 100)
# Prepare data for insertion into the database
records = []
for index, row in df.iterrows():
record = RawTransaction(
cust_id=row.get('CUST_ID'),
accountid=row.get('ACCOUNTID'),
tran_id=row.get('TRAN_ID'),
entry_date=row.get('ENTRY_DATE'),
value_date=row.get('VALUE_DATE'),
pstd_date=row.get('PSTD_DATE'),
tran_date=row.get('TRAN_DATE'),
tran_sub_ty=row.get('TRAN_SUB_TY'),
part_tran_ty=row.get('PART_TRAN_TY'),
channel=row.get('CHANNEL'),
tran_amt=row.get('TRAN_AMT'),
balance=row.get('BALANCE'),
isreverse=row.get('ISREVERSE'),
reverse=row.get('REVERSE'),
tran_particular=(row.get('TRAN_PARTICULAR') or '')[:100]
)
records.append(record)
db.session.add_all(records)
db.session.commit()
print(f"Successfully uploaded {len(records)} records to analytics_raw_transactions")
except Exception as e:
db.session.rollback()
print(f"Error uploading data: {str(e)}")
+5
View File
@@ -0,0 +1,5 @@
from flask_sqlalchemy import SQLAlchemy
from flask_migrate import Migrate
db = SQLAlchemy()
migrate = Migrate()
+24
View File
@@ -0,0 +1,24 @@
from .extensions import db
class RawTransaction(db.Model):
__tablename__ = 'analytics_raw_transactions'
id = db.Column(db.Integer, primary_key=True)
cust_id = db.Column(db.String(10))
accountid = db.Column(db.String(10))
tran_id = db.Column(db.String(12))
entry_date = db.Column(db.TIMESTAMP, nullable=True)
value_date = db.Column(db.TIMESTAMP, nullable=True)
pstd_date = db.Column(db.TIMESTAMP, nullable=True)
tran_date = db.Column(db.TIMESTAMP, nullable=True)
tran_sub_ty = db.Column(db.String(4))
part_tran_ty = db.Column(db.String(4))
channel = db.Column(db.String(32))
tran_amt = db.Column(db.Numeric(20, 2))
balance = db.Column(db.Numeric(20, 2))
isreverse = db.Column(db.String(4))
reverse = db.Column(db.String(4))
tran_particular = db.Column(db.String(100))
def __repr__(self):
return f'<RawTransaction {self.tran_id}>'
+7
View File
@@ -30,6 +30,13 @@ DB_CONFIG = {
"host": os.getenv("DB_HOST")
}
# SQLAlchemy Configuration
SQLALCHEMY_DATABASE_URI = (
f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}@"
f"{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['name']}"
)
SQLALCHEMY_TRACK_MODIFICATIONS = False
# Table Configuration
TABLE_NAME = "customer_account_transaction_hx"
BATCH_RESULTS_TABLE = "salary_analytics_batch_results"