first commit
This commit is contained in:
@@ -0,0 +1,397 @@
|
||||
<?php
|
||||
|
||||
echo "[" . date("Y-m-d H:i:s") . "] update_weather_data job is starting.\n";
|
||||
|
||||
require '../../backend.php';
|
||||
require 'GearmanForClient.php';
|
||||
|
||||
$db_host = $savvyext->cfgReadChar('database.host');
|
||||
$db_name = $savvyext->cfgReadChar('database.name');
|
||||
$db_user = $savvyext->cfgReadChar('database.user');
|
||||
$db_pass = $savvyext->cfgReadChar('database.pass');
|
||||
$db_port = $savvyext->cfgReadLong('database.port');
|
||||
|
||||
$connstr = "host=${db_host} port=${db_port} dbname=${db_name} user=${db_user} password=${db_pass}";
|
||||
$con = pg_connect($connstr) or die("Could not connect to server\n");
|
||||
|
||||
/*
|
||||
// Init
|
||||
INSERT INTO global_settings (key,description,value) VALUES ('last_weather_parsedemail_item','',0);
|
||||
INSERT INTO global_settings (key,description,value) VALUES ('last_weather_quote','',0);
|
||||
// Reset
|
||||
TRUNCATE TABLE weather;
|
||||
UPDATE global_settings SET value='0' WHERE key='last_weather_parsedemail_item';
|
||||
UPDATE global_settings SET value='0' WHERE key='last_weather_quote';
|
||||
*/
|
||||
|
||||
// Box the data set (select min id from weather AND select max id ON parsedemail_item AND quotes)
|
||||
$min_parsedemail_item_id = get_global_setting('last_weather_parsedemail_item', 0);
|
||||
$min_quote_id = get_global_setting('last_weather_quote', 0);
|
||||
$max_parsedemail_item_id = get_max_id('parsedemail_item');
|
||||
$max_quote_id = get_max_id('quotes');
|
||||
|
||||
|
||||
// 1) check if have weather data for coor and date
|
||||
// if exist, don't call weather API. Just link and instert trip with existing weather data
|
||||
process_existing_weather_data();
|
||||
|
||||
// 2) get coor and date that doesn't exist in weather table
|
||||
$chunk_data = [];
|
||||
$i = 0;
|
||||
|
||||
$rs = retrieve_coor_and_date_with_non_existing_weather_data();
|
||||
if ($rs && pg_num_rows($rs) > 0) {
|
||||
echo "[" . date("Y-m-d H:i:s") . "] Processing fire job to fetchApiData worker.\n";
|
||||
} else {
|
||||
echo "[" . date("Y-m-d H:i:s") . "] There is no job to fire to fetchApiData worker.\n";
|
||||
}
|
||||
|
||||
while ($rs && $row = pg_fetch_array($rs)) {
|
||||
$chunk_data[] = $row;
|
||||
$i++;
|
||||
|
||||
if ($i == 300) {
|
||||
$data = process_group_data_by_date($chunk_data);
|
||||
foreach ($data as $item) {
|
||||
GearmanForClient::getInstance()->fetchApiData($item);
|
||||
}
|
||||
|
||||
$chunk_data = [];
|
||||
$i = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// process the rest of chunk_data
|
||||
// Why we do this: because above chunk_data just process when $i == 300
|
||||
// Some cases some last records have about < 300.
|
||||
if (!empty($chunk_data)) {
|
||||
$data = process_group_data_by_date($chunk_data);
|
||||
foreach ($data as $item) {
|
||||
GearmanForClient::getInstance()->fetchApiData($item);
|
||||
}
|
||||
}
|
||||
|
||||
update_global_setting('last_weather_parsedemail_item',$max_parsedemail_item_id);
|
||||
update_global_setting('last_weather_quote',$max_quote_id);
|
||||
|
||||
pg_close($con);
|
||||
|
||||
echo "[" . date("Y-m-d H:i:s") . "] update_weather_data job complete.\n";
|
||||
|
||||
/*********************************** Function ***********************************/
|
||||
|
||||
// query to group travel_date based on root_id
|
||||
function query_group_travel_date_based_on_root_id() : string
|
||||
{
|
||||
global $min_parsedemail_item_id, $min_quote_id, $max_parsedemail_item_id, $max_quote_id;
|
||||
|
||||
$date_group_query = "
|
||||
(
|
||||
SELECT
|
||||
tpc.root_id,
|
||||
tpc.root_type,
|
||||
to_char(pitm.travel_date, 'YYYY-MM-DD') AS travel_date
|
||||
FROM
|
||||
trip_price_comparison tpc
|
||||
INNER JOIN parsedemail_item pitm ON pitm.id = tpc.data_source_id
|
||||
AND tpc.data_source = 1
|
||||
WHERE
|
||||
tpc.data_source_id > ${min_parsedemail_item_id}
|
||||
AND tpc.data_source_id <= ${max_parsedemail_item_id}
|
||||
AND pitm.travel_date IS NOT NULL
|
||||
GROUP BY
|
||||
tpc.root_id,
|
||||
tpc.root_type,
|
||||
to_char(pitm.travel_date, 'YYYY-MM-DD')
|
||||
)
|
||||
UNION
|
||||
(
|
||||
SELECT
|
||||
tpc.root_id,
|
||||
tpc.root_type,
|
||||
to_char(q.travel_date, 'YYYY-MM-DD') AS travel_date
|
||||
FROM
|
||||
trip_price_comparison tpc
|
||||
INNER JOIN quotes q ON q.id = tpc.data_source_id
|
||||
AND tpc.data_source = 2
|
||||
WHERE
|
||||
tpc.data_source_id > ${min_quote_id}
|
||||
AND tpc.data_source_id <= ${max_quote_id}
|
||||
AND q.travel_date IS NOT NULL
|
||||
GROUP BY
|
||||
tpc.root_id,
|
||||
tpc.root_type,
|
||||
to_char(q.travel_date, 'YYYY-MM-DD')
|
||||
)
|
||||
";
|
||||
|
||||
// process to get lat, long , timezone for root_id
|
||||
$query = " SELECT
|
||||
sub.*,
|
||||
a.country,
|
||||
a.latitude AS location_start_lat,
|
||||
a.longitude AS location_start_lng,
|
||||
a.geometry AS start_geometry,
|
||||
at.timezone
|
||||
FROM (
|
||||
SELECT
|
||||
subtable.*,
|
||||
CASE WHEN subtable.root_type = 1 THEN
|
||||
parsedemail_item.location_start_id
|
||||
WHEN subtable.root_type = 2 THEN
|
||||
quotes.location_start_id
|
||||
END AS location_start_id
|
||||
FROM (" . $date_group_query . ") AS subtable
|
||||
LEFT JOIN parsedemail_item ON (parsedemail_item.id = subtable.root_id
|
||||
AND subtable.root_type = 1)
|
||||
LEFT JOIN quotes ON (quotes.id = subtable.root_id
|
||||
AND subtable.root_type = 2)) AS sub
|
||||
JOIN address a ON a.id = sub.location_start_id
|
||||
JOIN address_timezone at ON a.timezone = at.id
|
||||
";
|
||||
|
||||
return $query;
|
||||
}
|
||||
|
||||
|
||||
function process_existing_weather_data()
|
||||
{
|
||||
global $con;
|
||||
|
||||
// check if have weather data for coor and date
|
||||
// if exist, don't call weather API. Just link and instert trip with existing weather data
|
||||
$query = query_group_travel_date_based_on_root_id();
|
||||
$exist_query = "SELECT
|
||||
*
|
||||
FROM (" . $query . ") AS sub2
|
||||
WHERE
|
||||
EXISTS (
|
||||
SELECT
|
||||
1
|
||||
FROM
|
||||
weather
|
||||
WHERE
|
||||
weather.date = to_date(sub2.travel_date, 'YYYY-MM-DD')
|
||||
AND weather.latitude = sub2.location_start_lat
|
||||
AND weather.longitude = sub2.location_start_lng)
|
||||
";
|
||||
|
||||
$rs = pg_query($con, $exist_query);
|
||||
if (!$rs) {
|
||||
echo "Cannot execute query: $exist_query\n";
|
||||
}
|
||||
|
||||
if ($rs && pg_num_rows($rs) > 0) {
|
||||
echo "[" . date("Y-m-d H:i:s") . "] Processing fire job to processExistingWeatherDataWorker worker.\n";
|
||||
} else {
|
||||
echo "[" . date("Y-m-d H:i:s") . "] There is no job to fire to processExistingWeatherDataWorker worker.\n";
|
||||
}
|
||||
|
||||
while ($rs && $row = pg_fetch_array($rs)) {
|
||||
GearmanForClient::getInstance()->processExistingWeatherData($row);
|
||||
}
|
||||
}
|
||||
|
||||
function retrieve_coor_and_date_with_non_existing_weather_data()
|
||||
{
|
||||
global $con;
|
||||
|
||||
$query = query_group_travel_date_based_on_root_id();
|
||||
$non_exist_query = "SELECT
|
||||
*
|
||||
FROM (" . $query . ") AS sub2
|
||||
WHERE
|
||||
NOT EXISTS (
|
||||
SELECT
|
||||
1
|
||||
FROM
|
||||
weather
|
||||
WHERE
|
||||
weather.date = to_date(sub2.travel_date, 'YYYY-MM-DD')
|
||||
AND weather.latitude = sub2.location_start_lat
|
||||
AND weather.longitude = sub2.location_start_lng)
|
||||
";
|
||||
|
||||
$rs = pg_query($con, $non_exist_query);
|
||||
if (!$rs) {
|
||||
echo "Cannot execute query: $non_exist_query\n";
|
||||
}
|
||||
|
||||
return $rs;
|
||||
}
|
||||
|
||||
function process_group_data_by_date(array $data) : array
|
||||
{
|
||||
/** Group array by YYYY-MM (year, month)
|
||||
* Why we have group with YYYY-MM format:
|
||||
* Because when use worldweatheronline API
|
||||
* In API document write:
|
||||
* If you wish to retrieve weather between two dates, use this parameter to specify the ending date.
|
||||
* Important: the enddate parameter must have the same month and year as the date parameter.
|
||||
*/
|
||||
|
||||
$result = array();
|
||||
foreach ($data as $element) {
|
||||
$root_id = $element['root_id'];
|
||||
$root_type = $element['root_type'];
|
||||
$travel_date = $element['travel_date'];
|
||||
$month_year = substr($travel_date, 0, 7); // just get YYYY-MM
|
||||
|
||||
$result[$root_id][$root_type][$month_year][] = $element;
|
||||
}
|
||||
|
||||
/** result sample
|
||||
*$result = [
|
||||
* '32232' => [ // root_id
|
||||
* '1' => [ // root_type
|
||||
* '2020-04' => [
|
||||
* [
|
||||
* 'root_id' => 32232,
|
||||
* 'root_type' => 1,
|
||||
* 'travel_date' => '2020-04-01',
|
||||
* 'location_start_lat' => 34.43242342,
|
||||
* 'location_start_lng' => 34.43242342,
|
||||
* 'timezone' => 'Asia/Ho_Chi_Minh'
|
||||
* ],
|
||||
* [
|
||||
* 'root_id' => 32232,
|
||||
* 'root_type' => 1,
|
||||
* 'travel_date' => '2020-04-05',
|
||||
* 'location_start_lat' => 34.43242342,
|
||||
* 'location_start_lng' => 34.43242342,
|
||||
* 'timezone' => 'Asia/Ho_Chi_Minh'
|
||||
* ],
|
||||
* ],
|
||||
* '2020-07' => [
|
||||
* [
|
||||
* 'root_id' => 32232,
|
||||
* 'root_type' => 1,
|
||||
* 'travel_date' => '2020-07-03',
|
||||
* 'location_start_lat' => 34.43242342,
|
||||
* 'location_start_lng' => 34.43242342,
|
||||
* 'timezone' => 'Asia/Ho_Chi_Minh'
|
||||
* ],
|
||||
* [
|
||||
* 'root_id' => 32232,
|
||||
* 'root_type' => 1,
|
||||
* 'travel_date' => '2020-07-04',
|
||||
* 'location_start_lat' => 34.43242342,
|
||||
* 'location_start_lng' => 34.43242342,
|
||||
* 'timezone' => 'Asia/Ho_Chi_Minh'
|
||||
* ],
|
||||
* ],
|
||||
* ]
|
||||
* ]
|
||||
*];
|
||||
*/
|
||||
|
||||
$returnData = [];
|
||||
foreach ($result as $root_id => $root_type) {
|
||||
foreach ($root_type as $key => $month_year_array) {
|
||||
foreach ($month_year_array as $month_year) {
|
||||
$date_list = array_column($month_year, 'travel_date');
|
||||
$date_list = array_unique($date_list); // Removes duplicate values from an array
|
||||
$min_max_date = get_min_max_date($date_list); // format: YYYY-MM-DDDD
|
||||
|
||||
$returnData[] = [
|
||||
'root_id' => $month_year[0]['root_id'],
|
||||
'root_type' => $month_year[0]['root_type'],
|
||||
'date_list' => $date_list,
|
||||
'min_max_date' => $min_max_date,
|
||||
'location_start_lat' => $month_year[0]['location_start_lat'],
|
||||
'location_start_lng' => $month_year[0]['location_start_lng'],
|
||||
'start_geometry' => $month_year[0]['start_geometry'],
|
||||
'timezone' => $month_year[0]['timezone']
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** returnData sample
|
||||
* $returnData = [
|
||||
* [
|
||||
* 'root_id' => 32232,
|
||||
* 'root_type' => 1,
|
||||
* 'min_max_date' => [
|
||||
* 'min_date' => '2016-11-14',
|
||||
* 'max_date' => '2016-11-27'
|
||||
* ],
|
||||
* 'date_list' => [
|
||||
* '2016-11-14',
|
||||
* '2016-11-18',
|
||||
* '2016-11-27',
|
||||
* ],
|
||||
* 'location_start_lat' => 34.43242342,
|
||||
* 'location_start_lng' => 36.43243342,
|
||||
* 'start_geometry' => '0101000020E6100000DA5E6633629A5EC0052C5ED152E44240',
|
||||
* 'timezone' => 'Asia/Ho_Chi_Minh'
|
||||
* ]
|
||||
* .
|
||||
* .
|
||||
* .
|
||||
* ]
|
||||
*
|
||||
*/
|
||||
|
||||
return $returnData;
|
||||
}
|
||||
|
||||
function get_global_setting($key, $val = 0)
|
||||
{
|
||||
global $con;
|
||||
$q = "SELECT value FROM global_settings WHERE lower(key)=lower('" . pg_escape_string($key) . "')";
|
||||
$r = pg_query($con, $q);
|
||||
if ($r && pg_num_rows($r) && $f = pg_fetch_row($r)) {
|
||||
return $f[0];
|
||||
}
|
||||
return $val;
|
||||
}
|
||||
|
||||
function update_global_setting($key, $val)
|
||||
{
|
||||
global $con;
|
||||
$q = "UPDATE global_settings SET value='" . pg_escape_string($val) . "' WHERE lower(key)=lower('" . pg_escape_string($key) . "')";
|
||||
$r = pg_query($con, $q);
|
||||
if ($r) {
|
||||
return pg_affected_rows($r);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
function get_max_id($what)
|
||||
{
|
||||
global $con;
|
||||
$q = "SELECT max(id) FROM ${what}";
|
||||
$r = pg_query($con, $q);
|
||||
if ($r && pg_num_rows($r) && $f = pg_fetch_row($r)) {
|
||||
return $f[0];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
function get_min_max_date(array $date_list) : array
|
||||
{
|
||||
usort($date_list, function($a, $b) {
|
||||
$dateTimestamp1 = strtotime($a);
|
||||
$dateTimestamp2 = strtotime($b);
|
||||
|
||||
return $dateTimestamp1 < $dateTimestamp2 ? -1: 1;
|
||||
});
|
||||
|
||||
$min_date = '';
|
||||
$max_date = '';
|
||||
|
||||
$leng = count($date_list);
|
||||
if ($leng == 1) {
|
||||
$min_date = $date_list[0];
|
||||
$max_date = $date_list[0];
|
||||
} else {
|
||||
$min_date = $date_list[0];
|
||||
$max_date = $date_list[$leng - 1];
|
||||
}
|
||||
|
||||
return [
|
||||
'min_date' => $min_date,
|
||||
'max_date' => $max_date
|
||||
];
|
||||
}
|
||||
Reference in New Issue
Block a user