398 lines
13 KiB
PHP
398 lines
13 KiB
PHP
<?php
|
|
|
|
echo "[" . date("Y-m-d H:i:s") . "] update_weather_data job is starting.\n";
|
|
|
|
require '../../backend.php';
|
|
require 'GearmanForClient.php';
|
|
|
|
$db_host = $savvyext->cfgReadChar('database.host');
|
|
$db_name = $savvyext->cfgReadChar('database.name');
|
|
$db_user = $savvyext->cfgReadChar('database.user');
|
|
$db_pass = $savvyext->cfgReadChar('database.pass');
|
|
$db_port = $savvyext->cfgReadLong('database.port');
|
|
|
|
$connstr = "host=${db_host} port=${db_port} dbname=${db_name} user=${db_user} password=${db_pass}";
|
|
$con = pg_connect($connstr) or die("Could not connect to server\n");
|
|
|
|
/*
|
|
// Init
|
|
INSERT INTO global_settings (key,description,value) VALUES ('last_weather_parsedemail_item','',0);
|
|
INSERT INTO global_settings (key,description,value) VALUES ('last_weather_quote','',0);
|
|
// Reset
|
|
TRUNCATE TABLE weather;
|
|
UPDATE global_settings SET value='0' WHERE key='last_weather_parsedemail_item';
|
|
UPDATE global_settings SET value='0' WHERE key='last_weather_quote';
|
|
*/
|
|
|
|
// Box the data set (select min id from weather AND select max id ON parsedemail_item AND quotes)
|
|
$min_parsedemail_item_id = get_global_setting('last_weather_parsedemail_item', 0);
|
|
$min_quote_id = get_global_setting('last_weather_quote', 0);
|
|
$max_parsedemail_item_id = get_max_id('parsedemail_item');
|
|
$max_quote_id = get_max_id('quotes');
|
|
|
|
|
|
// 1) check if have weather data for coor and date
|
|
// if exist, don't call weather API. Just link and instert trip with existing weather data
|
|
process_existing_weather_data();
|
|
|
|
// 2) get coor and date that doesn't exist in weather table
|
|
$chunk_data = [];
|
|
$i = 0;
|
|
|
|
$rs = retrieve_coor_and_date_with_non_existing_weather_data();
|
|
if ($rs && pg_num_rows($rs) > 0) {
|
|
echo "[" . date("Y-m-d H:i:s") . "] Processing fire job to fetchApiData worker.\n";
|
|
} else {
|
|
echo "[" . date("Y-m-d H:i:s") . "] There is no job to fire to fetchApiData worker.\n";
|
|
}
|
|
|
|
while ($rs && $row = pg_fetch_array($rs)) {
|
|
$chunk_data[] = $row;
|
|
$i++;
|
|
|
|
if ($i == 300) {
|
|
$data = process_group_data_by_date($chunk_data);
|
|
foreach ($data as $item) {
|
|
GearmanForClient::getInstance()->fetchApiData($item);
|
|
}
|
|
|
|
$chunk_data = [];
|
|
$i = 0;
|
|
}
|
|
}
|
|
|
|
// process the rest of chunk_data
|
|
// Why we do this: because above chunk_data just process when $i == 300
|
|
// Some cases some last records have about < 300.
|
|
if (!empty($chunk_data)) {
|
|
$data = process_group_data_by_date($chunk_data);
|
|
foreach ($data as $item) {
|
|
GearmanForClient::getInstance()->fetchApiData($item);
|
|
}
|
|
}
|
|
|
|
update_global_setting('last_weather_parsedemail_item',$max_parsedemail_item_id);
|
|
update_global_setting('last_weather_quote',$max_quote_id);
|
|
|
|
pg_close($con);
|
|
|
|
echo "[" . date("Y-m-d H:i:s") . "] update_weather_data job complete.\n";
|
|
|
|
/*********************************** Function ***********************************/
|
|
|
|
// query to group travel_date based on root_id
|
|
function query_group_travel_date_based_on_root_id() : string
|
|
{
|
|
global $min_parsedemail_item_id, $min_quote_id, $max_parsedemail_item_id, $max_quote_id;
|
|
|
|
$date_group_query = "
|
|
(
|
|
SELECT
|
|
tpc.root_id,
|
|
tpc.root_type,
|
|
to_char(pitm.travel_date, 'YYYY-MM-DD') AS travel_date
|
|
FROM
|
|
trip_price_comparison tpc
|
|
INNER JOIN parsedemail_item pitm ON pitm.id = tpc.data_source_id
|
|
AND tpc.data_source = 1
|
|
WHERE
|
|
tpc.data_source_id > ${min_parsedemail_item_id}
|
|
AND tpc.data_source_id <= ${max_parsedemail_item_id}
|
|
AND pitm.travel_date IS NOT NULL
|
|
GROUP BY
|
|
tpc.root_id,
|
|
tpc.root_type,
|
|
to_char(pitm.travel_date, 'YYYY-MM-DD')
|
|
)
|
|
UNION
|
|
(
|
|
SELECT
|
|
tpc.root_id,
|
|
tpc.root_type,
|
|
to_char(q.travel_date, 'YYYY-MM-DD') AS travel_date
|
|
FROM
|
|
trip_price_comparison tpc
|
|
INNER JOIN quotes q ON q.id = tpc.data_source_id
|
|
AND tpc.data_source = 2
|
|
WHERE
|
|
tpc.data_source_id > ${min_quote_id}
|
|
AND tpc.data_source_id <= ${max_quote_id}
|
|
AND q.travel_date IS NOT NULL
|
|
GROUP BY
|
|
tpc.root_id,
|
|
tpc.root_type,
|
|
to_char(q.travel_date, 'YYYY-MM-DD')
|
|
)
|
|
";
|
|
|
|
// process to get lat, long , timezone for root_id
|
|
$query = " SELECT
|
|
sub.*,
|
|
a.country,
|
|
a.latitude AS location_start_lat,
|
|
a.longitude AS location_start_lng,
|
|
a.geometry AS start_geometry,
|
|
at.timezone
|
|
FROM (
|
|
SELECT
|
|
subtable.*,
|
|
CASE WHEN subtable.root_type = 1 THEN
|
|
parsedemail_item.location_start_id
|
|
WHEN subtable.root_type = 2 THEN
|
|
quotes.location_start_id
|
|
END AS location_start_id
|
|
FROM (" . $date_group_query . ") AS subtable
|
|
LEFT JOIN parsedemail_item ON (parsedemail_item.id = subtable.root_id
|
|
AND subtable.root_type = 1)
|
|
LEFT JOIN quotes ON (quotes.id = subtable.root_id
|
|
AND subtable.root_type = 2)) AS sub
|
|
JOIN address a ON a.id = sub.location_start_id
|
|
JOIN address_timezone at ON a.timezone = at.id
|
|
";
|
|
|
|
return $query;
|
|
}
|
|
|
|
|
|
function process_existing_weather_data()
|
|
{
|
|
global $con;
|
|
|
|
// check if have weather data for coor and date
|
|
// if exist, don't call weather API. Just link and instert trip with existing weather data
|
|
$query = query_group_travel_date_based_on_root_id();
|
|
$exist_query = "SELECT
|
|
*
|
|
FROM (" . $query . ") AS sub2
|
|
WHERE
|
|
EXISTS (
|
|
SELECT
|
|
1
|
|
FROM
|
|
weather
|
|
WHERE
|
|
weather.date = to_date(sub2.travel_date, 'YYYY-MM-DD')
|
|
AND weather.latitude = sub2.location_start_lat
|
|
AND weather.longitude = sub2.location_start_lng)
|
|
";
|
|
|
|
$rs = pg_query($con, $exist_query);
|
|
if (!$rs) {
|
|
echo "Cannot execute query: $exist_query\n";
|
|
}
|
|
|
|
if ($rs && pg_num_rows($rs) > 0) {
|
|
echo "[" . date("Y-m-d H:i:s") . "] Processing fire job to processExistingWeatherDataWorker worker.\n";
|
|
} else {
|
|
echo "[" . date("Y-m-d H:i:s") . "] There is no job to fire to processExistingWeatherDataWorker worker.\n";
|
|
}
|
|
|
|
while ($rs && $row = pg_fetch_array($rs)) {
|
|
GearmanForClient::getInstance()->processExistingWeatherData($row);
|
|
}
|
|
}
|
|
|
|
function retrieve_coor_and_date_with_non_existing_weather_data()
|
|
{
|
|
global $con;
|
|
|
|
$query = query_group_travel_date_based_on_root_id();
|
|
$non_exist_query = "SELECT
|
|
*
|
|
FROM (" . $query . ") AS sub2
|
|
WHERE
|
|
NOT EXISTS (
|
|
SELECT
|
|
1
|
|
FROM
|
|
weather
|
|
WHERE
|
|
weather.date = to_date(sub2.travel_date, 'YYYY-MM-DD')
|
|
AND weather.latitude = sub2.location_start_lat
|
|
AND weather.longitude = sub2.location_start_lng)
|
|
";
|
|
|
|
$rs = pg_query($con, $non_exist_query);
|
|
if (!$rs) {
|
|
echo "Cannot execute query: $non_exist_query\n";
|
|
}
|
|
|
|
return $rs;
|
|
}
|
|
|
|
function process_group_data_by_date(array $data) : array
|
|
{
|
|
/** Group array by YYYY-MM (year, month)
|
|
* Why we have group with YYYY-MM format:
|
|
* Because when use worldweatheronline API
|
|
* In API document write:
|
|
* If you wish to retrieve weather between two dates, use this parameter to specify the ending date.
|
|
* Important: the enddate parameter must have the same month and year as the date parameter.
|
|
*/
|
|
|
|
$result = array();
|
|
foreach ($data as $element) {
|
|
$root_id = $element['root_id'];
|
|
$root_type = $element['root_type'];
|
|
$travel_date = $element['travel_date'];
|
|
$month_year = substr($travel_date, 0, 7); // just get YYYY-MM
|
|
|
|
$result[$root_id][$root_type][$month_year][] = $element;
|
|
}
|
|
|
|
/** result sample
|
|
*$result = [
|
|
* '32232' => [ // root_id
|
|
* '1' => [ // root_type
|
|
* '2020-04' => [
|
|
* [
|
|
* 'root_id' => 32232,
|
|
* 'root_type' => 1,
|
|
* 'travel_date' => '2020-04-01',
|
|
* 'location_start_lat' => 34.43242342,
|
|
* 'location_start_lng' => 34.43242342,
|
|
* 'timezone' => 'Asia/Ho_Chi_Minh'
|
|
* ],
|
|
* [
|
|
* 'root_id' => 32232,
|
|
* 'root_type' => 1,
|
|
* 'travel_date' => '2020-04-05',
|
|
* 'location_start_lat' => 34.43242342,
|
|
* 'location_start_lng' => 34.43242342,
|
|
* 'timezone' => 'Asia/Ho_Chi_Minh'
|
|
* ],
|
|
* ],
|
|
* '2020-07' => [
|
|
* [
|
|
* 'root_id' => 32232,
|
|
* 'root_type' => 1,
|
|
* 'travel_date' => '2020-07-03',
|
|
* 'location_start_lat' => 34.43242342,
|
|
* 'location_start_lng' => 34.43242342,
|
|
* 'timezone' => 'Asia/Ho_Chi_Minh'
|
|
* ],
|
|
* [
|
|
* 'root_id' => 32232,
|
|
* 'root_type' => 1,
|
|
* 'travel_date' => '2020-07-04',
|
|
* 'location_start_lat' => 34.43242342,
|
|
* 'location_start_lng' => 34.43242342,
|
|
* 'timezone' => 'Asia/Ho_Chi_Minh'
|
|
* ],
|
|
* ],
|
|
* ]
|
|
* ]
|
|
*];
|
|
*/
|
|
|
|
$returnData = [];
|
|
foreach ($result as $root_id => $root_type) {
|
|
foreach ($root_type as $key => $month_year_array) {
|
|
foreach ($month_year_array as $month_year) {
|
|
$date_list = array_column($month_year, 'travel_date');
|
|
$date_list = array_unique($date_list); // Removes duplicate values from an array
|
|
$min_max_date = get_min_max_date($date_list); // format: YYYY-MM-DDDD
|
|
|
|
$returnData[] = [
|
|
'root_id' => $month_year[0]['root_id'],
|
|
'root_type' => $month_year[0]['root_type'],
|
|
'date_list' => $date_list,
|
|
'min_max_date' => $min_max_date,
|
|
'location_start_lat' => $month_year[0]['location_start_lat'],
|
|
'location_start_lng' => $month_year[0]['location_start_lng'],
|
|
'start_geometry' => $month_year[0]['start_geometry'],
|
|
'timezone' => $month_year[0]['timezone']
|
|
];
|
|
}
|
|
}
|
|
}
|
|
|
|
/** returnData sample
|
|
* $returnData = [
|
|
* [
|
|
* 'root_id' => 32232,
|
|
* 'root_type' => 1,
|
|
* 'min_max_date' => [
|
|
* 'min_date' => '2016-11-14',
|
|
* 'max_date' => '2016-11-27'
|
|
* ],
|
|
* 'date_list' => [
|
|
* '2016-11-14',
|
|
* '2016-11-18',
|
|
* '2016-11-27',
|
|
* ],
|
|
* 'location_start_lat' => 34.43242342,
|
|
* 'location_start_lng' => 36.43243342,
|
|
* 'start_geometry' => '0101000020E6100000DA5E6633629A5EC0052C5ED152E44240',
|
|
* 'timezone' => 'Asia/Ho_Chi_Minh'
|
|
* ]
|
|
* .
|
|
* .
|
|
* .
|
|
* ]
|
|
*
|
|
*/
|
|
|
|
return $returnData;
|
|
}
|
|
|
|
function get_global_setting($key, $val = 0)
|
|
{
|
|
global $con;
|
|
$q = "SELECT value FROM global_settings WHERE lower(key)=lower('" . pg_escape_string($key) . "')";
|
|
$r = pg_query($con, $q);
|
|
if ($r && pg_num_rows($r) && $f = pg_fetch_row($r)) {
|
|
return $f[0];
|
|
}
|
|
return $val;
|
|
}
|
|
|
|
function update_global_setting($key, $val)
|
|
{
|
|
global $con;
|
|
$q = "UPDATE global_settings SET value='" . pg_escape_string($val) . "' WHERE lower(key)=lower('" . pg_escape_string($key) . "')";
|
|
$r = pg_query($con, $q);
|
|
if ($r) {
|
|
return pg_affected_rows($r);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
function get_max_id($what)
|
|
{
|
|
global $con;
|
|
$q = "SELECT max(id) FROM ${what}";
|
|
$r = pg_query($con, $q);
|
|
if ($r && pg_num_rows($r) && $f = pg_fetch_row($r)) {
|
|
return $f[0];
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
function get_min_max_date(array $date_list) : array
|
|
{
|
|
usort($date_list, function($a, $b) {
|
|
$dateTimestamp1 = strtotime($a);
|
|
$dateTimestamp2 = strtotime($b);
|
|
|
|
return $dateTimestamp1 < $dateTimestamp2 ? -1: 1;
|
|
});
|
|
|
|
$min_date = '';
|
|
$max_date = '';
|
|
|
|
$leng = count($date_list);
|
|
if ($leng == 1) {
|
|
$min_date = $date_list[0];
|
|
$max_date = $date_list[0];
|
|
} else {
|
|
$min_date = $date_list[0];
|
|
$max_date = $date_list[$leng - 1];
|
|
}
|
|
|
|
return [
|
|
'min_date' => $min_date,
|
|
'max_date' => $max_date
|
|
];
|
|
}
|