first commit

This commit is contained in:
dev-chiefworks
2022-05-31 16:21:53 -04:00
commit f76abffdcd
5978 changed files with 1078901 additions and 0 deletions
+55
View File
@@ -0,0 +1,55 @@
<?php
class GearmanForClient
{
static $instances = array();
/**
* Fetch (and create if needed) an instance of this logger.
*
* @param string $server
* @param int $port
* @param string $queue
* @return self
*/
public static function getInstance($server = '127.0.0.1', $port = 4730, $queue = 'log')
{
$hash = $queue . $server . $port;
if (!array_key_exists($hash, self::$instances)) {
self::$instances[$hash] = new self($queue, $server, $port);
}
return self::$instances[$hash];
}
/** @var GearmanClient */
private $gmc;
/** @var string */
private $queue;
public function __construct($queue, $server, $port)
{
$this->gmc = new GearmanClient();
$this->queue = $queue;
$this->gmc->addServer($server, $port);
}
/**
* Fetch data from the api weather
* @param mixed $data
*/
public function fetchApiData($data)
{
$this->gmc->doBackground('fetchApiData', json_encode($data));
}
/**
* insert trip weather with existing weather data
* @param mixed $data
*/
public function processExistingWeatherData($data)
{
$this->gmc->doBackground('processExistingWeatherDataWorker', json_encode($data));
}
}
+182
View File
@@ -0,0 +1,182 @@
CREATE TABLE estimates (
id bigserial,
price numeric,
surge_price numeric,
trip_time character varying,
distance integer,
is_holiday boolean DEFAULT false,
day_of_week integer,
weather_conditions text,
created_at timestamp(6) without time zone NOT NULL,
updated_at timestamp(6) without time zone NOT NULL,
parsedemail_item_id bigint REFERENCES parsedemail_item(id),
PRIMARY KEY (id)
);
CREATE TABLE holidays (
id serial,
date date NOT NULL,
note text,
active boolean DEFAULT true,
country character varying (2) NOT NULL REFERENCES country(code),
created_at timestamp without time zone DEFAULT now() NOT NULL,
PRIMARY KEY (id),
UNIQUE (date,country)
);
CREATE TABLE trip_holidays (
id bigserial,
data_source_id bigint,
data_source smallint,
created timestamp without time zone DEFAULT now() NOT NULL,
holiday_id integer NOT NULL REFERENCES holidays(id),
normal_cost numeric,
PRIMARY KEY(id),
UNIQUE (data_source_id, data_source)
);
CREATE TABLE trip_price_comparison (
id bigserial,
data_source_id bigint,
root_id bigint,
cost numeric,
average numeric,
created timestamp without time zone DEFAULT now(),
data_source smallint,
root_type smallint,
PRIMARY KEY(id),
UNIQUE(data_source_id,data_source,root_id)
);
CREATE TABLE trip_surge_price (
id bigserial,
data_source_id bigint,
created timestamp without time zone DEFAULT now(),
data_source smallint,
PRIMARY KEY(id),
UNIQUE (data_source_id)
);
CREATE TABLE weather (
id bigserial,
temp_c numeric,
avg_temp_c numeric,
total_snow_cm numeric,
wind_speed_kmph varchar,
weather_code varchar,
precip_mm varchar,
humidity varchar,
visibility varchar,
pressure varchar,
heat_index_c varchar,
wind_chill_c varchar,
wind_gust_kmph varchar,
feels_like_c varchar,
latitude float8,
longitude float8,
geometry geometry,
created timestamp without time zone DEFAULT now(),
date date,
dewpoint float4,
precipitation float4,
precipitation_3 float4,
precipitation_6 float4,
winddirection int4,
condition int4,
time timestamp,
time_local timestamp
PRIMARY KEY(id)
);
CREATE TABLE trip_weather(
id bigserial,
data_source_id bigint,
data_source smallint,
weather_id bigint REFERENCES weather(id),
created timestamp without time zone DEFAULT now(),
PRIMARY KEY(id),
UNIQUE (data_source_id)
);
CREATE TABLE country_weather_stations (
id bigserial,
country_code bigint,
station_id bigint,
station_name varchar,
latitude varchar,
longitude varchar,
region varchar,
created timestamp without time zone DEFAULT now(),
PRIMARY KEY(id)
);
CREATE TABLE weather_services (
id bigserial,
service_name varchar,
api_key varchar,
url varchar,
active boolean DEFAULT true,
used boolean DEFAULT false,
stop_time timestamp without time zone,
remaining_requests int4,
PRIMARY KEY(id)
);
CREATE TABLE address (
id bigserial,
address varchar(200),
latitude float8,
longitude float8,
timezone int4,
geocoding_date date,
postal varchar(40),
country varchar(2),
geometry geometry,
description varchar,
PRIMARY KEY(id)
);
CREATE TABLE trackedemail_item (
id bigserial,
member_id bigserial,
PRIMARY KEY(id)
);
CREATE VIEW union_trip_and_quote_view_table AS
(
SELECT
trip.id AS data_source_id,
trip.cost,
trip.location_start_id,
trip.location_end_id,
trip.travel_date as travel_date,
1 AS data_source
FROM
parsedemail_item AS trip
WHERE
trip.cost IS NOT NULL AND trip.cost>0
)
UNION
(
SELECT
quotes.id AS data_source_id,
quotes.cost,
quotes.location_start_id,
quotes.location_end_id,
quotes.travel_date as travel_date,
2 AS data_source
FROM
quotes
WHERE
quotes.cost IS NOT NULL AND quotes.cost>0
);
CREATE TABLE global_settings (
id bigserial,
key varchar,
description varchar,
value int4,
status smallint,
added date,
PRIMARY KEY(id)
);
@@ -0,0 +1,137 @@
<?php
require '../../backend.php';
$db_host = $savvyext->cfgReadChar('database.host');
$db_name = $savvyext->cfgReadChar('database.name');
$db_user = $savvyext->cfgReadChar('database.user');
$db_pass = $savvyext->cfgReadChar('database.pass');
$db_port = $savvyext->cfgReadLong('database.port');
$connstr = "host=${db_host} port=${db_port} dbname=${db_name} user=${db_user} password=${db_pass}";
$con = null;
$worker = new GearmanWorker();
$worker->addServers("127.0.0.1:4730");
$worker->addFunction("processExistingWeatherDataWorker", function (GearmanJob $job) {
global $con, $connstr;
$con = pg_connect($connstr) or die("Could not connect to server\n");
$workload = json_decode($job->workload(), true);
execute($workload);
echo "Waiting for job...\n";
});
while($worker->work())
{
if ($worker->returnCode() != GEARMAN_SUCCESS)
{
echo "return_code: " . $worker->returnCode() . "\n";
break;
}
}
/* ********************** FUNCTION ********************** */
function execute($workload)
{
$weather_records_for_specific_date = get_weather_data_by_coor_and_date_in_weather_table($workload['travel_date'], $workload['location_start_lat'], $workload['location_start_lng']);
if (!empty($weather_records_for_specific_date)) {
insert_trip_weather_with_existing_weather_data($workload['root_id'], $workload['root_type'], $weather_records_for_specific_date, $workload['travel_date']);
}
}
function get_weather_data_by_coor_and_date_in_weather_table(string $travel_date, float $location_start_lat, float $location_start_lng): array
{
global $con;
$query = "
SELECT
id, time
FROM
weather
WHERE
date = '" . $travel_date . "'
AND latitude = " . $location_start_lat . "
AND longitude = " . $location_start_lng . "
";
$rs = pg_query($con, $query);
if (!$rs) {
echo "Cannot execute query: $query\n";
}
$result = [];
while ($rs && $row = pg_fetch_object($rs)) {
$result[] = [
'id' => $row->id,
'time' => $row->time
];
}
return $result;
}
function insert_trip_weather_with_existing_weather_data(int $root_id, int $root_type, array $weather_records_for_specific_date, string $date)
{
global $con;
// find the trip/quote that has the same (root_id, root_type and travel_date) in trip_price_comparison table
$find_trip_query = "
(SELECT
tpc.data_source_id,
tpc.data_source,
pitm.travel_date
FROM
trip_price_comparison tpc
INNER JOIN parsedemail_item pitm ON pitm.id = tpc.data_source_id
AND tpc.data_source = 1
AND root_id = ${root_id}
AND root_type = ${root_type}
WHERE
pitm.travel_date IS NOT NULL
AND to_char(pitm.travel_date, 'YYYY-MM-DD') = '${date}')
UNION (
SELECT
tpc.data_source_id,
tpc.data_source,
q.completed as travel_date
FROM
trip_price_comparison tpc
INNER JOIN quotes q ON q.id = tpc.data_source_id
AND tpc.data_source = 2
AND root_id = ${root_id}
AND root_type = ${root_type}
WHERE
q.completed IS NOT NULL
AND to_char(q.completed, 'YYYY-MM-DD') = '${date}')
";
$rs = pg_query($con, $find_trip_query);
if (!$rs) {
echo "Cannot execute query: $find_trip_query\n";
}
echo "[" . date("Y-m-d H:i:s") . "] Processing trip_weather with root_id = ${root_id}, root_type = ${root_type}, date = ${date}\n";
while ($rs && $row = pg_fetch_object($rs)) {
$travel_date = $row->travel_date; // format: YYYY-MM-DD HH:II:SS
// convert $travel_date format from YYYY-MM-DD HH:II:SS to YYYY-MM-DD HH:00:00
$travel_date = substr($travel_date, 0, 13) . ':00:00';
// purpose: to compare travel date with weather date (YYYY-MM-DD HH:00:00)
$key = array_search($travel_date, array_column($weather_records_for_specific_date, 'time'));
if ($key === FALSE) {
continue;
}
$weather_id = $weather_records_for_specific_date[$key]['id'];
$insert_query = "INSERT INTO trip_weather(data_source_id, data_source, weather_id) VALUES
('" . pg_escape_string($row->data_source_id) . "', '" . pg_escape_string($row->data_source) . "', '" . pg_escape_string($weather_id) . "')
ON CONFLICT (data_source_id,data_source) DO NOTHING;";
$insert_rs = pg_query($con, $insert_query);
}
}
@@ -0,0 +1,100 @@
<?php
/**
* The purpose of this script: Remove duplicated weather records in weather table. (Remove duplicated records, only keep lowest weather id)
* Keep in mind: Don't deleted records exisiting in trip_weather table.
*/
echo "[".date("Y-m-d H:i:s")."] script is starting.\n";
require('../../backend.php');
$db_host = $savvyext->cfgReadChar('database.host');
$db_name = $savvyext->cfgReadChar('database.name');
$db_user = $savvyext->cfgReadChar('database.user');
$db_pass = $savvyext->cfgReadChar('database.pass');
$db_port = $savvyext->cfgReadLong('database.port');
$connstr = "host=${db_host} port=${db_port} dbname=${db_name} user=${db_user} password=${db_pass}";
$con = pg_connect($connstr) or die ("Could not connect to server\n");
$query = "
SELECT
date,
time,
latitude,
longitude,
min(id) as lowest_weather_id,
COUNT(*) as count
FROM
weather
WHERE
id NOT IN (
SELECT DISTINCT weather_id
FROM
trip_weather
)
GROUP BY
date,
time,
latitude,
longitude
HAVING COUNT(*) > 1;
";
$rs = pg_query($con, $query) or die("Cannot execute query: $query\n");
$rows = pg_num_rows($rs);
if ($rows == 0) {
echo "[".date("Y-m-d H:i:s")."] Does not any records to delete.\n";
} else {
while ($row = pg_fetch_assoc($rs)) {
$date = $row['date'];
$time = $row['time'];
$lat = $row['latitude'];
$long = $row['longitude'];
$duplicated_row_num = $row['count'];
$lowest_weather_id = $row['lowest_weather_id'];
removeDuplicatedWeatherData($date, $time, $lat, $long, $duplicated_row_num, $lowest_weather_id);
}
}
pg_close($con);
echo "[".date("Y-m-d H:i:s")."] Script completed.\n";
/********************* FUNCTIONs ************************* */
function removeDuplicatedWeatherData(string $date, string $time, string $lat, string $long, int $duplicated_row_num, int $lowest_weather_id)
{
global $con;
$query = "
DELETE FROM weather a
WHERE a.id IN (
SELECT
id
FROM
weather
WHERE
date = '" . $date . "'
AND time = '" . $time . "'
AND latitude = '" . $lat . "'
AND longitude = '" . $long . "'
AND id NOT IN (
SELECT
DISTINCT weather_id
FROM
trip_weather
)
)
AND a.id <> $lowest_weather_id
";
pg_query($con, $query) or die("Cannot execute query: $query\n");
$deleted_row_num = $duplicated_row_num - 1;
echo "[".date("Y-m-d H:i:s")."] Deleted $deleted_row_num records with date = $date, time = $time, COOR = ($lat, $long).\n";
}
?>
+397
View File
@@ -0,0 +1,397 @@
<?php
echo "[" . date("Y-m-d H:i:s") . "] update_weather_data job is starting.\n";
require '../../backend.php';
require 'GearmanForClient.php';
$db_host = $savvyext->cfgReadChar('database.host');
$db_name = $savvyext->cfgReadChar('database.name');
$db_user = $savvyext->cfgReadChar('database.user');
$db_pass = $savvyext->cfgReadChar('database.pass');
$db_port = $savvyext->cfgReadLong('database.port');
$connstr = "host=${db_host} port=${db_port} dbname=${db_name} user=${db_user} password=${db_pass}";
$con = pg_connect($connstr) or die("Could not connect to server\n");
/*
// Init
INSERT INTO global_settings (key,description,value) VALUES ('last_weather_parsedemail_item','',0);
INSERT INTO global_settings (key,description,value) VALUES ('last_weather_quote','',0);
// Reset
TRUNCATE TABLE weather;
UPDATE global_settings SET value='0' WHERE key='last_weather_parsedemail_item';
UPDATE global_settings SET value='0' WHERE key='last_weather_quote';
*/
// Box the data set (select min id from weather AND select max id ON parsedemail_item AND quotes)
$min_parsedemail_item_id = get_global_setting('last_weather_parsedemail_item', 0);
$min_quote_id = get_global_setting('last_weather_quote', 0);
$max_parsedemail_item_id = get_max_id('parsedemail_item');
$max_quote_id = get_max_id('quotes');
// 1) check if have weather data for coor and date
// if exist, don't call weather API. Just link and instert trip with existing weather data
process_existing_weather_data();
// 2) get coor and date that doesn't exist in weather table
$chunk_data = [];
$i = 0;
$rs = retrieve_coor_and_date_with_non_existing_weather_data();
if ($rs && pg_num_rows($rs) > 0) {
echo "[" . date("Y-m-d H:i:s") . "] Processing fire job to fetchApiData worker.\n";
} else {
echo "[" . date("Y-m-d H:i:s") . "] There is no job to fire to fetchApiData worker.\n";
}
while ($rs && $row = pg_fetch_array($rs)) {
$chunk_data[] = $row;
$i++;
if ($i == 300) {
$data = process_group_data_by_date($chunk_data);
foreach ($data as $item) {
GearmanForClient::getInstance()->fetchApiData($item);
}
$chunk_data = [];
$i = 0;
}
}
// process the rest of chunk_data
// Why we do this: because above chunk_data just process when $i == 300
// Some cases some last records have about < 300.
if (!empty($chunk_data)) {
$data = process_group_data_by_date($chunk_data);
foreach ($data as $item) {
GearmanForClient::getInstance()->fetchApiData($item);
}
}
update_global_setting('last_weather_parsedemail_item',$max_parsedemail_item_id);
update_global_setting('last_weather_quote',$max_quote_id);
pg_close($con);
echo "[" . date("Y-m-d H:i:s") . "] update_weather_data job complete.\n";
/*********************************** Function ***********************************/
// query to group travel_date based on root_id
function query_group_travel_date_based_on_root_id() : string
{
global $min_parsedemail_item_id, $min_quote_id, $max_parsedemail_item_id, $max_quote_id;
$date_group_query = "
(
SELECT
tpc.root_id,
tpc.root_type,
to_char(pitm.travel_date, 'YYYY-MM-DD') AS travel_date
FROM
trip_price_comparison tpc
INNER JOIN parsedemail_item pitm ON pitm.id = tpc.data_source_id
AND tpc.data_source = 1
WHERE
tpc.data_source_id > ${min_parsedemail_item_id}
AND tpc.data_source_id <= ${max_parsedemail_item_id}
AND pitm.travel_date IS NOT NULL
GROUP BY
tpc.root_id,
tpc.root_type,
to_char(pitm.travel_date, 'YYYY-MM-DD')
)
UNION
(
SELECT
tpc.root_id,
tpc.root_type,
to_char(q.travel_date, 'YYYY-MM-DD') AS travel_date
FROM
trip_price_comparison tpc
INNER JOIN quotes q ON q.id = tpc.data_source_id
AND tpc.data_source = 2
WHERE
tpc.data_source_id > ${min_quote_id}
AND tpc.data_source_id <= ${max_quote_id}
AND q.travel_date IS NOT NULL
GROUP BY
tpc.root_id,
tpc.root_type,
to_char(q.travel_date, 'YYYY-MM-DD')
)
";
// process to get lat, long , timezone for root_id
$query = " SELECT
sub.*,
a.country,
a.latitude AS location_start_lat,
a.longitude AS location_start_lng,
a.geometry AS start_geometry,
at.timezone
FROM (
SELECT
subtable.*,
CASE WHEN subtable.root_type = 1 THEN
parsedemail_item.location_start_id
WHEN subtable.root_type = 2 THEN
quotes.location_start_id
END AS location_start_id
FROM (" . $date_group_query . ") AS subtable
LEFT JOIN parsedemail_item ON (parsedemail_item.id = subtable.root_id
AND subtable.root_type = 1)
LEFT JOIN quotes ON (quotes.id = subtable.root_id
AND subtable.root_type = 2)) AS sub
JOIN address a ON a.id = sub.location_start_id
JOIN address_timezone at ON a.timezone = at.id
";
return $query;
}
function process_existing_weather_data()
{
global $con;
// check if have weather data for coor and date
// if exist, don't call weather API. Just link and instert trip with existing weather data
$query = query_group_travel_date_based_on_root_id();
$exist_query = "SELECT
*
FROM (" . $query . ") AS sub2
WHERE
EXISTS (
SELECT
1
FROM
weather
WHERE
weather.date = to_date(sub2.travel_date, 'YYYY-MM-DD')
AND weather.latitude = sub2.location_start_lat
AND weather.longitude = sub2.location_start_lng)
";
$rs = pg_query($con, $exist_query);
if (!$rs) {
echo "Cannot execute query: $exist_query\n";
}
if ($rs && pg_num_rows($rs) > 0) {
echo "[" . date("Y-m-d H:i:s") . "] Processing fire job to processExistingWeatherDataWorker worker.\n";
} else {
echo "[" . date("Y-m-d H:i:s") . "] There is no job to fire to processExistingWeatherDataWorker worker.\n";
}
while ($rs && $row = pg_fetch_array($rs)) {
GearmanForClient::getInstance()->processExistingWeatherData($row);
}
}
function retrieve_coor_and_date_with_non_existing_weather_data()
{
global $con;
$query = query_group_travel_date_based_on_root_id();
$non_exist_query = "SELECT
*
FROM (" . $query . ") AS sub2
WHERE
NOT EXISTS (
SELECT
1
FROM
weather
WHERE
weather.date = to_date(sub2.travel_date, 'YYYY-MM-DD')
AND weather.latitude = sub2.location_start_lat
AND weather.longitude = sub2.location_start_lng)
";
$rs = pg_query($con, $non_exist_query);
if (!$rs) {
echo "Cannot execute query: $non_exist_query\n";
}
return $rs;
}
function process_group_data_by_date(array $data) : array
{
/** Group array by YYYY-MM (year, month)
* Why we have group with YYYY-MM format:
* Because when use worldweatheronline API
* In API document write:
* If you wish to retrieve weather between two dates, use this parameter to specify the ending date.
* Important: the enddate parameter must have the same month and year as the date parameter.
*/
$result = array();
foreach ($data as $element) {
$root_id = $element['root_id'];
$root_type = $element['root_type'];
$travel_date = $element['travel_date'];
$month_year = substr($travel_date, 0, 7); // just get YYYY-MM
$result[$root_id][$root_type][$month_year][] = $element;
}
/** result sample
*$result = [
* '32232' => [ // root_id
* '1' => [ // root_type
* '2020-04' => [
* [
* 'root_id' => 32232,
* 'root_type' => 1,
* 'travel_date' => '2020-04-01',
* 'location_start_lat' => 34.43242342,
* 'location_start_lng' => 34.43242342,
* 'timezone' => 'Asia/Ho_Chi_Minh'
* ],
* [
* 'root_id' => 32232,
* 'root_type' => 1,
* 'travel_date' => '2020-04-05',
* 'location_start_lat' => 34.43242342,
* 'location_start_lng' => 34.43242342,
* 'timezone' => 'Asia/Ho_Chi_Minh'
* ],
* ],
* '2020-07' => [
* [
* 'root_id' => 32232,
* 'root_type' => 1,
* 'travel_date' => '2020-07-03',
* 'location_start_lat' => 34.43242342,
* 'location_start_lng' => 34.43242342,
* 'timezone' => 'Asia/Ho_Chi_Minh'
* ],
* [
* 'root_id' => 32232,
* 'root_type' => 1,
* 'travel_date' => '2020-07-04',
* 'location_start_lat' => 34.43242342,
* 'location_start_lng' => 34.43242342,
* 'timezone' => 'Asia/Ho_Chi_Minh'
* ],
* ],
* ]
* ]
*];
*/
$returnData = [];
foreach ($result as $root_id => $root_type) {
foreach ($root_type as $key => $month_year_array) {
foreach ($month_year_array as $month_year) {
$date_list = array_column($month_year, 'travel_date');
$date_list = array_unique($date_list); // Removes duplicate values from an array
$min_max_date = get_min_max_date($date_list); // format: YYYY-MM-DDDD
$returnData[] = [
'root_id' => $month_year[0]['root_id'],
'root_type' => $month_year[0]['root_type'],
'date_list' => $date_list,
'min_max_date' => $min_max_date,
'location_start_lat' => $month_year[0]['location_start_lat'],
'location_start_lng' => $month_year[0]['location_start_lng'],
'start_geometry' => $month_year[0]['start_geometry'],
'timezone' => $month_year[0]['timezone']
];
}
}
}
/** returnData sample
* $returnData = [
* [
* 'root_id' => 32232,
* 'root_type' => 1,
* 'min_max_date' => [
* 'min_date' => '2016-11-14',
* 'max_date' => '2016-11-27'
* ],
* 'date_list' => [
* '2016-11-14',
* '2016-11-18',
* '2016-11-27',
* ],
* 'location_start_lat' => 34.43242342,
* 'location_start_lng' => 36.43243342,
* 'start_geometry' => '0101000020E6100000DA5E6633629A5EC0052C5ED152E44240',
* 'timezone' => 'Asia/Ho_Chi_Minh'
* ]
* .
* .
* .
* ]
*
*/
return $returnData;
}
function get_global_setting($key, $val = 0)
{
global $con;
$q = "SELECT value FROM global_settings WHERE lower(key)=lower('" . pg_escape_string($key) . "')";
$r = pg_query($con, $q);
if ($r && pg_num_rows($r) && $f = pg_fetch_row($r)) {
return $f[0];
}
return $val;
}
function update_global_setting($key, $val)
{
global $con;
$q = "UPDATE global_settings SET value='" . pg_escape_string($val) . "' WHERE lower(key)=lower('" . pg_escape_string($key) . "')";
$r = pg_query($con, $q);
if ($r) {
return pg_affected_rows($r);
}
return 0;
}
function get_max_id($what)
{
global $con;
$q = "SELECT max(id) FROM ${what}";
$r = pg_query($con, $q);
if ($r && pg_num_rows($r) && $f = pg_fetch_row($r)) {
return $f[0];
}
return 0;
}
function get_min_max_date(array $date_list) : array
{
usort($date_list, function($a, $b) {
$dateTimestamp1 = strtotime($a);
$dateTimestamp2 = strtotime($b);
return $dateTimestamp1 < $dateTimestamp2 ? -1: 1;
});
$min_date = '';
$max_date = '';
$leng = count($date_list);
if ($leng == 1) {
$min_date = $date_list[0];
$max_date = $date_list[0];
} else {
$min_date = $date_list[0];
$max_date = $date_list[$leng - 1];
}
return [
'min_date' => $min_date,
'max_date' => $max_date
];
}
@@ -0,0 +1,393 @@
<?php
require '../../backend.php';
$db_host = $savvyext->cfgReadChar('database.host');
$db_name = $savvyext->cfgReadChar('database.name');
$db_user = $savvyext->cfgReadChar('database.user');
$db_pass = $savvyext->cfgReadChar('database.pass');
$db_port = $savvyext->cfgReadLong('database.port');
$connstr = "host=${db_host} port=${db_port} dbname=${db_name} user=${db_user} password=${db_pass}";
$con = null;
$worker = new GearmanWorker();
$worker->addServers("127.0.0.1:4730");
$worker->addFunction("fetchApiData", function (GearmanJob $job) {
global $con, $connstr;
$con = pg_connect($connstr) or die("Could not connect to server\n");
$workload = json_decode($job->workload(), true);
execute($workload);
echo "Waiting for job...\n";
});
while($worker->work())
{
if ($worker->returnCode() != GEARMAN_SUCCESS)
{
echo "return_code: " . $worker->returnCode() . "\n";
break;
}
}
/* ********************** FUNCTION ********************** */
function execute($workload)
{
$api_account = get_api_account();
if (empty($api_account)) {
echo "The API key not found.\n";
exit();
}
$data = array_merge(['workload' => $workload], ['api_account' => $api_account]);
insert_weather($data);
}
function insert_weather(array $data)
{
global $con;
$api_service_info = $data['api_account'];
$workload = $data['workload'];
$weather_api_key = trim($api_service_info['api_key']);
$weather_url = trim($api_service_info['url']);
$weather_service_name = trim($api_service_info['service_name']);
/** workload sample
*$workload =
* [
* 'root_id' => 32232,
* 'root_type' => 1,
* 'min_max_date' => [
* 'min_date' => '2016-11-14',
* 'max_date' => '2016-11-27'
* ],
* 'date_list' => [
* '2016-11-14',
* '2016-11-18',
* '2016-11-27',
* ],
* 'location_start_lat' => 34.43242342,
* 'location_start_lng' => 36.43243342,
* 'start_geometry' => '0101000020E6100000DA5E6633629A5EC0052C5ED152E44240',
* 'timezone' => 'Asia/Ho_Chi_Minh'
* ]
*
*/
$location_start_lat = $workload['location_start_lat'];
$location_start_lng = $workload['location_start_lng'];
$min_max_date = $workload['min_max_date'];
$date_list = $workload['date_list'];
$start_geometry = $workload['start_geometry'];
$root_id = $workload['root_id'];
$root_type = $workload['root_type'];
// tp = 1 => get every hour
$url = "$weather_url?key=" . $weather_api_key . "&q=" . $location_start_lat . "," . $location_start_lng . "&tp=1&format=json&date=" . $min_max_date['min_date'] . "&enddate=" . $min_max_date['max_date'];
// Collect data
$response = get_weather_data_from_API($url);
$response = format_weatheronline($response, $location_start_lat, $location_start_lng, $start_geometry);
if (!empty($response)) {
foreach ($date_list as $date) {
// retrieve weather data in weather table, if exists => return records, else insert data that was retrieved from API response.
$weather_records_for_specific_date = get_weather_data_by_coor_and_date_in_weather_table($date, $location_start_lat, $location_start_lng);
if (empty($weather_records_for_specific_date)) {
// insert weather data with specific date
// With each date => we will insert 24 record (each record is one hour)
$insert_query = generate_insert_query($response[$date]);
// save hourly data into weather data
$rs = pg_query($con, $insert_query);
if (!$rs) {
echo "Cannot execute query: $insert_query\n";
}
$weather_records_for_specific_date = pg_fetch_all($rs);
}
// link hour to a trip (in trip_weather table)
link_trip_and_weather($root_id, $root_type, $weather_records_for_specific_date, $date);
echo "[" . date("Y-m-d H:i:s") . "] With DATE = ${date}, COOR($location_start_lat, $location_start_lng) and ROOT_ID = ${root_id}: Insert weather into weather table
and link hour to a trip (in trip_weather table) \n";
}
}
}
function get_weather_data_by_coor_and_date_in_weather_table(string $travel_date, float $location_start_lat, float $location_start_lng): array
{
global $con;
$query = "
SELECT
id, time
FROM
weather
WHERE
date = '" . $travel_date . "'
AND latitude = " . $location_start_lat . "
AND longitude = " . $location_start_lng . "
";
$rs = pg_query($con, $query);
if (!$rs) {
echo "Cannot execute query: $query\n";
}
$result = [];
while ($rs && $row = pg_fetch_object($rs)) {
$result[] = [
'id' => $row->id,
'time' => $row->time
];
}
if (!empty($result)) {
echo "[" . date("Y-m-d H:i:s") . "] Travel date = ${travel_date} and COOR(${location_start_lat}, ${location_start_lng}) are already existing in weather table.\n";
}
return $result;
}
function link_trip_and_weather(int $root_id, int $root_type, array $weather_records_for_specific_date, string $date)
{
global $con;
// find the trip/quote that has the same (root_id, root_type and travel_date) in trip_price_comparison table
$find_trip_query = "
(SELECT
tpc.data_source_id,
tpc.data_source,
pitm.travel_date
FROM
trip_price_comparison tpc
INNER JOIN parsedemail_item pitm ON pitm.id = tpc.data_source_id
AND tpc.data_source = 1
AND root_id = ${root_id}
AND root_type = ${root_type}
WHERE
pitm.travel_date IS NOT NULL
AND to_char(pitm.travel_date, 'YYYY-MM-DD') = '${date}')
UNION (
SELECT
tpc.data_source_id,
tpc.data_source,
q.completed as travel_date
FROM
trip_price_comparison tpc
INNER JOIN quotes q ON q.id = tpc.data_source_id
AND tpc.data_source = 2
AND root_id = ${root_id}
AND root_type = ${root_type}
WHERE
q.completed IS NOT NULL
AND to_char(q.completed, 'YYYY-MM-DD') = '${date}')
";
$rs = pg_query($con, $find_trip_query);
if (!$rs) {
echo "Cannot execute query: $find_trip_query\n";
}
while ($row = pg_fetch_object($rs)) {
$travel_date = $row->travel_date; // format: YYYY-MM-DD HH:II:SS
// convert $travel_date format from YYYY-MM-DD HH:II:SS to YYYY-MM-DD HH:00:00
$travel_date = substr($travel_date, 0, 13) . ':00:00';
// purpose: to compare travel date with weather date (YYYY-MM-DD HH:00:00)
$key = array_search($travel_date, array_column($weather_records_for_specific_date, 'time'));
if ($key === FALSE) {
continue;
}
$weather_id = $weather_records_for_specific_date[$key]['id'];
$insert_query = "INSERT INTO trip_weather(data_source_id, data_source, weather_id) VALUES
('" . pg_escape_string($row->data_source_id) . "', '" . pg_escape_string($row->data_source) . "', '" . pg_escape_string($weather_id) . "')
ON CONFLICT (data_source_id,data_source) DO NOTHING;";
$insert_rs = pg_query($con, $insert_query);
if (!$insert_rs) {
echo "Cannot execute query: $insert_query\n";
}
}
}
function get_weather_data_from_API($url)
{
// Get cURL resource
$curl = curl_init();
// Set some options - we are passing in a useragent too here
curl_setopt_array($curl, [
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_URL => $url,
CURLOPT_HTTPHEADER, [
'content-type: application/json',
],
]);
// Send the request & save response to $result
$result = curl_exec($curl);
// Check HTTP status code
switch ($http_code = curl_getinfo($curl, CURLINFO_HTTP_CODE)) {
case 200:
break;
case 429:
echo "The API key has reached calls per day allowed limit. \n";
echo $result;
exit; // terminate worker
default:
echo "Something went wrong at call API with url: ${url}.\n";
echo $result;
exit; // terminate worker
}
// Close request to clear up some resources
curl_close($curl);
return json_decode($result);
}
/**
* Get the first api count
* @param none
* @return array
*/
function get_api_account()
{
global $con;
$query = "SELECT id, service_name, api_key, url
FROM weather_services
WHERE
active = TRUE
LIMIT 1";
$rs = pg_query($con, $query);
if (!$rs) {
echo "Cannot execute query: $query\n";
}
return pg_fetch_all($rs) ? pg_fetch_all($rs)[0] : [];
}
function format_weatheronline($data, $lat, $long, $geometry)
{
$weather = $data->data->weather;
$response_data = [];
foreach ($weather as $weather_by_date) {
foreach ($weather_by_date->hourly as $hourly) {
// becuase response time format: hmm (1hour: 100) -> we will convert to YYYY-MM-DD H:i:s
$hour = $hourly->time / 100;
$date = $weather_by_date->date . " ${hour}:00:00";
$date = DateTime::createFromFormat('Y-m-d G:i:s', $date);
$date = $date->format('Y-m-d H:i:s');
$response_data[$weather_by_date->date][] = [
'time' => $date,
'tempC' => $hourly->tempC,
'avgtempC' => $weather_by_date->avgtempC,
'totalSnow_cm' => $weather_by_date->totalSnow_cm,
'windspeedKmph' => $hourly->windspeedKmph,
'weatherCode' => $hourly->weatherCode,
'precipMM' => $hourly->precipMM,
'humidity' => $hourly->humidity,
'visibility' => $hourly->visibility,
'pressure' => $hourly->pressure,
'HeatIndexC' => $hourly->HeatIndexC,
'WindChillC' => $hourly->WindChillC,
'WindGustKmph' => $hourly->WindGustKmph,
'FeelsLikeC' => $hourly->FeelsLikeC,
'date' => $weather_by_date->date,
'location_start_lat' => $lat,
'location_start_lng' => $long,
'start_geometry' => $geometry,
'dewpoint' => 'null',
'precipitation' => 'null',
'precipitation_3' => 'null',
'precipitation_6' => 'null',
'winddirection' => 'null',
'condition' => 'null',
];
}
}
return $response_data;
}
function generate_insert_query($data)
{
$value = [];
foreach ($data as $element) {
$value[] =
"(
" . $element['tempC'] . " ,
" . $element['avgtempC'] . " ,
" . $element['totalSnow_cm'] . " ,
'" . $element['windspeedKmph'] . "' ,
'" . $element['weatherCode'] . "' ,
'" . $element['precipMM'] . "' ,
'" . $element['humidity'] . "' ,
'" . $element['visibility'] . "' ,
'" . $element['pressure'] . "' ,
'" . $element['HeatIndexC'] . "' ,
'" . $element['WindChillC'] . "' ,
'" . $element['WindGustKmph'] . "' ,
'" . $element['FeelsLikeC'] . "' ,
" . $element['location_start_lat'] . " ,
" . $element['location_start_lng'] . " ,
'" . $element['start_geometry'] . "' ,
'" . $element['date'] . "',
" . $element['dewpoint'] . " ,
" . $element['precipitation'] . " ,
" . $element['precipitation_3'] . " ,
" . $element['precipitation_6'] . " ,
" . $element['winddirection'] . " ,
" . $element['condition'] . " ,
'" . $element['time'] . "'
)";
}
$query = "INSERT INTO weather(
temp_c,
avg_temp_c,
total_snow_cm,
wind_speed_kmph,
weather_code,
precip_mm,
humidity,
visibility,
pressure,
heat_index_c,
wind_chill_c,
wind_gust_kmph,
feels_like_c,
latitude,
longitude,
geometry,
date,
dewpoint,
precipitation,
precipitation_3,
precipitation_6,
winddirection,
condition,
time
) VALUES" . implode(',', $value) . ' RETURNING id, time';
return $query;
}