cfgReadChar('database.host'); $db_name = $savvyext->cfgReadChar('database.name'); $db_user = $savvyext->cfgReadChar('database.user'); $db_pass = $savvyext->cfgReadChar('database.pass'); $db_port = $savvyext->cfgReadLong('database.port'); $connstr = "host=${db_host} port=${db_port} dbname=${db_name} user=${db_user} password=${db_pass}"; $con = pg_connect($connstr) or die("Could not connect to server\n"); /* // Init INSERT INTO global_settings (key,description,value) VALUES ('last_weather_parsedemail_item','',0); INSERT INTO global_settings (key,description,value) VALUES ('last_weather_quote','',0); // Reset TRUNCATE TABLE weather; UPDATE global_settings SET value='0' WHERE key='last_weather_parsedemail_item'; UPDATE global_settings SET value='0' WHERE key='last_weather_quote'; */ // Box the data set (select min id from weather AND select max id ON parsedemail_item AND quotes) $min_parsedemail_item_id = get_global_setting('last_weather_parsedemail_item', 0); $min_quote_id = get_global_setting('last_weather_quote', 0); $max_parsedemail_item_id = get_max_id('parsedemail_item'); $max_quote_id = get_max_id('quotes'); // 1) check if have weather data for coor and date // if exist, don't call weather API. Just link and instert trip with existing weather data process_existing_weather_data(); // 2) get coor and date that doesn't exist in weather table $chunk_data = []; $i = 0; $rs = retrieve_coor_and_date_with_non_existing_weather_data(); if ($rs && pg_num_rows($rs) > 0) { echo "[" . date("Y-m-d H:i:s") . "] Processing fire job to fetchApiData worker.\n"; } else { echo "[" . date("Y-m-d H:i:s") . "] There is no job to fire to fetchApiData worker.\n"; } while ($rs && $row = pg_fetch_array($rs)) { $chunk_data[] = $row; $i++; if ($i == 300) { $data = process_group_data_by_date($chunk_data); foreach ($data as $item) { GearmanForClient::getInstance()->fetchApiData($item); } $chunk_data = []; $i = 0; } } // process the rest of chunk_data // Why we do this: because above chunk_data just process when $i == 300 // Some cases some last records have about < 300. if (!empty($chunk_data)) { $data = process_group_data_by_date($chunk_data); foreach ($data as $item) { GearmanForClient::getInstance()->fetchApiData($item); } } update_global_setting('last_weather_parsedemail_item',$max_parsedemail_item_id); update_global_setting('last_weather_quote',$max_quote_id); pg_close($con); echo "[" . date("Y-m-d H:i:s") . "] update_weather_data job complete.\n"; /*********************************** Function ***********************************/ // query to group travel_date based on root_id function query_group_travel_date_based_on_root_id() : string { global $min_parsedemail_item_id, $min_quote_id, $max_parsedemail_item_id, $max_quote_id; $date_group_query = " ( SELECT tpc.root_id, tpc.root_type, to_char(pitm.travel_date, 'YYYY-MM-DD') AS travel_date FROM trip_price_comparison tpc INNER JOIN parsedemail_item pitm ON pitm.id = tpc.data_source_id AND tpc.data_source = 1 WHERE tpc.data_source_id > ${min_parsedemail_item_id} AND tpc.data_source_id <= ${max_parsedemail_item_id} AND pitm.travel_date IS NOT NULL GROUP BY tpc.root_id, tpc.root_type, to_char(pitm.travel_date, 'YYYY-MM-DD') ) UNION ( SELECT tpc.root_id, tpc.root_type, to_char(q.travel_date, 'YYYY-MM-DD') AS travel_date FROM trip_price_comparison tpc INNER JOIN quotes q ON q.id = tpc.data_source_id AND tpc.data_source = 2 WHERE tpc.data_source_id > ${min_quote_id} AND tpc.data_source_id <= ${max_quote_id} AND q.travel_date IS NOT NULL GROUP BY tpc.root_id, tpc.root_type, to_char(q.travel_date, 'YYYY-MM-DD') ) "; // process to get lat, long , timezone for root_id $query = " SELECT sub.*, a.country, a.latitude AS location_start_lat, a.longitude AS location_start_lng, a.geometry AS start_geometry, at.timezone FROM ( SELECT subtable.*, CASE WHEN subtable.root_type = 1 THEN parsedemail_item.location_start_id WHEN subtable.root_type = 2 THEN quotes.location_start_id END AS location_start_id FROM (" . $date_group_query . ") AS subtable LEFT JOIN parsedemail_item ON (parsedemail_item.id = subtable.root_id AND subtable.root_type = 1) LEFT JOIN quotes ON (quotes.id = subtable.root_id AND subtable.root_type = 2)) AS sub JOIN address a ON a.id = sub.location_start_id JOIN address_timezone at ON a.timezone = at.id "; return $query; } function process_existing_weather_data() { global $con; // check if have weather data for coor and date // if exist, don't call weather API. Just link and instert trip with existing weather data $query = query_group_travel_date_based_on_root_id(); $exist_query = "SELECT * FROM (" . $query . ") AS sub2 WHERE EXISTS ( SELECT 1 FROM weather WHERE weather.date = to_date(sub2.travel_date, 'YYYY-MM-DD') AND weather.latitude = sub2.location_start_lat AND weather.longitude = sub2.location_start_lng) "; $rs = pg_query($con, $exist_query); if (!$rs) { echo "Cannot execute query: $exist_query\n"; } if ($rs && pg_num_rows($rs) > 0) { echo "[" . date("Y-m-d H:i:s") . "] Processing fire job to processExistingWeatherDataWorker worker.\n"; } else { echo "[" . date("Y-m-d H:i:s") . "] There is no job to fire to processExistingWeatherDataWorker worker.\n"; } while ($rs && $row = pg_fetch_array($rs)) { GearmanForClient::getInstance()->processExistingWeatherData($row); } } function retrieve_coor_and_date_with_non_existing_weather_data() { global $con; $query = query_group_travel_date_based_on_root_id(); $non_exist_query = "SELECT * FROM (" . $query . ") AS sub2 WHERE NOT EXISTS ( SELECT 1 FROM weather WHERE weather.date = to_date(sub2.travel_date, 'YYYY-MM-DD') AND weather.latitude = sub2.location_start_lat AND weather.longitude = sub2.location_start_lng) "; $rs = pg_query($con, $non_exist_query); if (!$rs) { echo "Cannot execute query: $non_exist_query\n"; } return $rs; } function process_group_data_by_date(array $data) : array { /** Group array by YYYY-MM (year, month) * Why we have group with YYYY-MM format: * Because when use worldweatheronline API * In API document write: * If you wish to retrieve weather between two dates, use this parameter to specify the ending date. * Important: the enddate parameter must have the same month and year as the date parameter. */ $result = array(); foreach ($data as $element) { $root_id = $element['root_id']; $root_type = $element['root_type']; $travel_date = $element['travel_date']; $month_year = substr($travel_date, 0, 7); // just get YYYY-MM $result[$root_id][$root_type][$month_year][] = $element; } /** result sample *$result = [ * '32232' => [ // root_id * '1' => [ // root_type * '2020-04' => [ * [ * 'root_id' => 32232, * 'root_type' => 1, * 'travel_date' => '2020-04-01', * 'location_start_lat' => 34.43242342, * 'location_start_lng' => 34.43242342, * 'timezone' => 'Asia/Ho_Chi_Minh' * ], * [ * 'root_id' => 32232, * 'root_type' => 1, * 'travel_date' => '2020-04-05', * 'location_start_lat' => 34.43242342, * 'location_start_lng' => 34.43242342, * 'timezone' => 'Asia/Ho_Chi_Minh' * ], * ], * '2020-07' => [ * [ * 'root_id' => 32232, * 'root_type' => 1, * 'travel_date' => '2020-07-03', * 'location_start_lat' => 34.43242342, * 'location_start_lng' => 34.43242342, * 'timezone' => 'Asia/Ho_Chi_Minh' * ], * [ * 'root_id' => 32232, * 'root_type' => 1, * 'travel_date' => '2020-07-04', * 'location_start_lat' => 34.43242342, * 'location_start_lng' => 34.43242342, * 'timezone' => 'Asia/Ho_Chi_Minh' * ], * ], * ] * ] *]; */ $returnData = []; foreach ($result as $root_id => $root_type) { foreach ($root_type as $key => $month_year_array) { foreach ($month_year_array as $month_year) { $date_list = array_column($month_year, 'travel_date'); $date_list = array_unique($date_list); // Removes duplicate values from an array $min_max_date = get_min_max_date($date_list); // format: YYYY-MM-DDDD $returnData[] = [ 'root_id' => $month_year[0]['root_id'], 'root_type' => $month_year[0]['root_type'], 'date_list' => $date_list, 'min_max_date' => $min_max_date, 'location_start_lat' => $month_year[0]['location_start_lat'], 'location_start_lng' => $month_year[0]['location_start_lng'], 'start_geometry' => $month_year[0]['start_geometry'], 'timezone' => $month_year[0]['timezone'] ]; } } } /** returnData sample * $returnData = [ * [ * 'root_id' => 32232, * 'root_type' => 1, * 'min_max_date' => [ * 'min_date' => '2016-11-14', * 'max_date' => '2016-11-27' * ], * 'date_list' => [ * '2016-11-14', * '2016-11-18', * '2016-11-27', * ], * 'location_start_lat' => 34.43242342, * 'location_start_lng' => 36.43243342, * 'start_geometry' => '0101000020E6100000DA5E6633629A5EC0052C5ED152E44240', * 'timezone' => 'Asia/Ho_Chi_Minh' * ] * . * . * . * ] * */ return $returnData; } function get_global_setting($key, $val = 0) { global $con; $q = "SELECT value FROM global_settings WHERE lower(key)=lower('" . pg_escape_string($key) . "')"; $r = pg_query($con, $q); if ($r && pg_num_rows($r) && $f = pg_fetch_row($r)) { return $f[0]; } return $val; } function update_global_setting($key, $val) { global $con; $q = "UPDATE global_settings SET value='" . pg_escape_string($val) . "' WHERE lower(key)=lower('" . pg_escape_string($key) . "')"; $r = pg_query($con, $q); if ($r) { return pg_affected_rows($r); } return 0; } function get_max_id($what) { global $con; $q = "SELECT max(id) FROM ${what}"; $r = pg_query($con, $q); if ($r && pg_num_rows($r) && $f = pg_fetch_row($r)) { return $f[0]; } return 0; } function get_min_max_date(array $date_list) : array { usort($date_list, function($a, $b) { $dateTimestamp1 = strtotime($a); $dateTimestamp2 = strtotime($b); return $dateTimestamp1 < $dateTimestamp2 ? -1: 1; }); $min_date = ''; $max_date = ''; $leng = count($date_list); if ($leng == 1) { $min_date = $date_list[0]; $max_date = $date_list[0]; } else { $min_date = $date_list[0]; $max_date = $date_list[$leng - 1]; } return [ 'min_date' => $min_date, 'max_date' => $max_date ]; }