Files
FloatBackOfffice/CRONS/weather/update_weather_data.php
dev-chiefworks f76abffdcd first commit
2022-05-31 16:21:53 -04:00

398 lines
13 KiB
PHP

<?php
echo "[" . date("Y-m-d H:i:s") . "] update_weather_data job is starting.\n";
require '../../backend.php';
require 'GearmanForClient.php';
$db_host = $savvyext->cfgReadChar('database.host');
$db_name = $savvyext->cfgReadChar('database.name');
$db_user = $savvyext->cfgReadChar('database.user');
$db_pass = $savvyext->cfgReadChar('database.pass');
$db_port = $savvyext->cfgReadLong('database.port');
$connstr = "host=${db_host} port=${db_port} dbname=${db_name} user=${db_user} password=${db_pass}";
$con = pg_connect($connstr) or die("Could not connect to server\n");
/*
// Init
INSERT INTO global_settings (key,description,value) VALUES ('last_weather_parsedemail_item','',0);
INSERT INTO global_settings (key,description,value) VALUES ('last_weather_quote','',0);
// Reset
TRUNCATE TABLE weather;
UPDATE global_settings SET value='0' WHERE key='last_weather_parsedemail_item';
UPDATE global_settings SET value='0' WHERE key='last_weather_quote';
*/
// Box the data set (select min id from weather AND select max id ON parsedemail_item AND quotes)
$min_parsedemail_item_id = get_global_setting('last_weather_parsedemail_item', 0);
$min_quote_id = get_global_setting('last_weather_quote', 0);
$max_parsedemail_item_id = get_max_id('parsedemail_item');
$max_quote_id = get_max_id('quotes');
// 1) check if have weather data for coor and date
// if exist, don't call weather API. Just link and instert trip with existing weather data
process_existing_weather_data();
// 2) get coor and date that doesn't exist in weather table
$chunk_data = [];
$i = 0;
$rs = retrieve_coor_and_date_with_non_existing_weather_data();
if ($rs && pg_num_rows($rs) > 0) {
echo "[" . date("Y-m-d H:i:s") . "] Processing fire job to fetchApiData worker.\n";
} else {
echo "[" . date("Y-m-d H:i:s") . "] There is no job to fire to fetchApiData worker.\n";
}
while ($rs && $row = pg_fetch_array($rs)) {
$chunk_data[] = $row;
$i++;
if ($i == 300) {
$data = process_group_data_by_date($chunk_data);
foreach ($data as $item) {
GearmanForClient::getInstance()->fetchApiData($item);
}
$chunk_data = [];
$i = 0;
}
}
// process the rest of chunk_data
// Why we do this: because above chunk_data just process when $i == 300
// Some cases some last records have about < 300.
if (!empty($chunk_data)) {
$data = process_group_data_by_date($chunk_data);
foreach ($data as $item) {
GearmanForClient::getInstance()->fetchApiData($item);
}
}
update_global_setting('last_weather_parsedemail_item',$max_parsedemail_item_id);
update_global_setting('last_weather_quote',$max_quote_id);
pg_close($con);
echo "[" . date("Y-m-d H:i:s") . "] update_weather_data job complete.\n";
/*********************************** Function ***********************************/
// query to group travel_date based on root_id
function query_group_travel_date_based_on_root_id() : string
{
global $min_parsedemail_item_id, $min_quote_id, $max_parsedemail_item_id, $max_quote_id;
$date_group_query = "
(
SELECT
tpc.root_id,
tpc.root_type,
to_char(pitm.travel_date, 'YYYY-MM-DD') AS travel_date
FROM
trip_price_comparison tpc
INNER JOIN parsedemail_item pitm ON pitm.id = tpc.data_source_id
AND tpc.data_source = 1
WHERE
tpc.data_source_id > ${min_parsedemail_item_id}
AND tpc.data_source_id <= ${max_parsedemail_item_id}
AND pitm.travel_date IS NOT NULL
GROUP BY
tpc.root_id,
tpc.root_type,
to_char(pitm.travel_date, 'YYYY-MM-DD')
)
UNION
(
SELECT
tpc.root_id,
tpc.root_type,
to_char(q.travel_date, 'YYYY-MM-DD') AS travel_date
FROM
trip_price_comparison tpc
INNER JOIN quotes q ON q.id = tpc.data_source_id
AND tpc.data_source = 2
WHERE
tpc.data_source_id > ${min_quote_id}
AND tpc.data_source_id <= ${max_quote_id}
AND q.travel_date IS NOT NULL
GROUP BY
tpc.root_id,
tpc.root_type,
to_char(q.travel_date, 'YYYY-MM-DD')
)
";
// process to get lat, long , timezone for root_id
$query = " SELECT
sub.*,
a.country,
a.latitude AS location_start_lat,
a.longitude AS location_start_lng,
a.geometry AS start_geometry,
at.timezone
FROM (
SELECT
subtable.*,
CASE WHEN subtable.root_type = 1 THEN
parsedemail_item.location_start_id
WHEN subtable.root_type = 2 THEN
quotes.location_start_id
END AS location_start_id
FROM (" . $date_group_query . ") AS subtable
LEFT JOIN parsedemail_item ON (parsedemail_item.id = subtable.root_id
AND subtable.root_type = 1)
LEFT JOIN quotes ON (quotes.id = subtable.root_id
AND subtable.root_type = 2)) AS sub
JOIN address a ON a.id = sub.location_start_id
JOIN address_timezone at ON a.timezone = at.id
";
return $query;
}
function process_existing_weather_data()
{
global $con;
// check if have weather data for coor and date
// if exist, don't call weather API. Just link and instert trip with existing weather data
$query = query_group_travel_date_based_on_root_id();
$exist_query = "SELECT
*
FROM (" . $query . ") AS sub2
WHERE
EXISTS (
SELECT
1
FROM
weather
WHERE
weather.date = to_date(sub2.travel_date, 'YYYY-MM-DD')
AND weather.latitude = sub2.location_start_lat
AND weather.longitude = sub2.location_start_lng)
";
$rs = pg_query($con, $exist_query);
if (!$rs) {
echo "Cannot execute query: $exist_query\n";
}
if ($rs && pg_num_rows($rs) > 0) {
echo "[" . date("Y-m-d H:i:s") . "] Processing fire job to processExistingWeatherDataWorker worker.\n";
} else {
echo "[" . date("Y-m-d H:i:s") . "] There is no job to fire to processExistingWeatherDataWorker worker.\n";
}
while ($rs && $row = pg_fetch_array($rs)) {
GearmanForClient::getInstance()->processExistingWeatherData($row);
}
}
function retrieve_coor_and_date_with_non_existing_weather_data()
{
global $con;
$query = query_group_travel_date_based_on_root_id();
$non_exist_query = "SELECT
*
FROM (" . $query . ") AS sub2
WHERE
NOT EXISTS (
SELECT
1
FROM
weather
WHERE
weather.date = to_date(sub2.travel_date, 'YYYY-MM-DD')
AND weather.latitude = sub2.location_start_lat
AND weather.longitude = sub2.location_start_lng)
";
$rs = pg_query($con, $non_exist_query);
if (!$rs) {
echo "Cannot execute query: $non_exist_query\n";
}
return $rs;
}
function process_group_data_by_date(array $data) : array
{
/** Group array by YYYY-MM (year, month)
* Why we have group with YYYY-MM format:
* Because when use worldweatheronline API
* In API document write:
* If you wish to retrieve weather between two dates, use this parameter to specify the ending date.
* Important: the enddate parameter must have the same month and year as the date parameter.
*/
$result = array();
foreach ($data as $element) {
$root_id = $element['root_id'];
$root_type = $element['root_type'];
$travel_date = $element['travel_date'];
$month_year = substr($travel_date, 0, 7); // just get YYYY-MM
$result[$root_id][$root_type][$month_year][] = $element;
}
/** result sample
*$result = [
* '32232' => [ // root_id
* '1' => [ // root_type
* '2020-04' => [
* [
* 'root_id' => 32232,
* 'root_type' => 1,
* 'travel_date' => '2020-04-01',
* 'location_start_lat' => 34.43242342,
* 'location_start_lng' => 34.43242342,
* 'timezone' => 'Asia/Ho_Chi_Minh'
* ],
* [
* 'root_id' => 32232,
* 'root_type' => 1,
* 'travel_date' => '2020-04-05',
* 'location_start_lat' => 34.43242342,
* 'location_start_lng' => 34.43242342,
* 'timezone' => 'Asia/Ho_Chi_Minh'
* ],
* ],
* '2020-07' => [
* [
* 'root_id' => 32232,
* 'root_type' => 1,
* 'travel_date' => '2020-07-03',
* 'location_start_lat' => 34.43242342,
* 'location_start_lng' => 34.43242342,
* 'timezone' => 'Asia/Ho_Chi_Minh'
* ],
* [
* 'root_id' => 32232,
* 'root_type' => 1,
* 'travel_date' => '2020-07-04',
* 'location_start_lat' => 34.43242342,
* 'location_start_lng' => 34.43242342,
* 'timezone' => 'Asia/Ho_Chi_Minh'
* ],
* ],
* ]
* ]
*];
*/
$returnData = [];
foreach ($result as $root_id => $root_type) {
foreach ($root_type as $key => $month_year_array) {
foreach ($month_year_array as $month_year) {
$date_list = array_column($month_year, 'travel_date');
$date_list = array_unique($date_list); // Removes duplicate values from an array
$min_max_date = get_min_max_date($date_list); // format: YYYY-MM-DDDD
$returnData[] = [
'root_id' => $month_year[0]['root_id'],
'root_type' => $month_year[0]['root_type'],
'date_list' => $date_list,
'min_max_date' => $min_max_date,
'location_start_lat' => $month_year[0]['location_start_lat'],
'location_start_lng' => $month_year[0]['location_start_lng'],
'start_geometry' => $month_year[0]['start_geometry'],
'timezone' => $month_year[0]['timezone']
];
}
}
}
/** returnData sample
* $returnData = [
* [
* 'root_id' => 32232,
* 'root_type' => 1,
* 'min_max_date' => [
* 'min_date' => '2016-11-14',
* 'max_date' => '2016-11-27'
* ],
* 'date_list' => [
* '2016-11-14',
* '2016-11-18',
* '2016-11-27',
* ],
* 'location_start_lat' => 34.43242342,
* 'location_start_lng' => 36.43243342,
* 'start_geometry' => '0101000020E6100000DA5E6633629A5EC0052C5ED152E44240',
* 'timezone' => 'Asia/Ho_Chi_Minh'
* ]
* .
* .
* .
* ]
*
*/
return $returnData;
}
function get_global_setting($key, $val = 0)
{
global $con;
$q = "SELECT value FROM global_settings WHERE lower(key)=lower('" . pg_escape_string($key) . "')";
$r = pg_query($con, $q);
if ($r && pg_num_rows($r) && $f = pg_fetch_row($r)) {
return $f[0];
}
return $val;
}
function update_global_setting($key, $val)
{
global $con;
$q = "UPDATE global_settings SET value='" . pg_escape_string($val) . "' WHERE lower(key)=lower('" . pg_escape_string($key) . "')";
$r = pg_query($con, $q);
if ($r) {
return pg_affected_rows($r);
}
return 0;
}
function get_max_id($what)
{
global $con;
$q = "SELECT max(id) FROM ${what}";
$r = pg_query($con, $q);
if ($r && pg_num_rows($r) && $f = pg_fetch_row($r)) {
return $f[0];
}
return 0;
}
function get_min_max_date(array $date_list) : array
{
usort($date_list, function($a, $b) {
$dateTimestamp1 = strtotime($a);
$dateTimestamp2 = strtotime($b);
return $dateTimestamp1 < $dateTimestamp2 ? -1: 1;
});
$min_date = '';
$max_date = '';
$leng = count($date_list);
if ($leng == 1) {
$min_date = $date_list[0];
$max_date = $date_list[0];
} else {
$min_date = $date_list[0];
$max_date = $date_list[$leng - 1];
}
return [
'min_date' => $min_date,
'max_date' => $max_date
];
}