first commit
This commit is contained in:
@@ -0,0 +1,355 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file classes/statistics/PKPStatisticsHelper.php
|
||||
*
|
||||
* Copyright (c) 2013-2021 Simon Fraser University
|
||||
* Copyright (c) 2003-2021 John Willinsky
|
||||
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
||||
*
|
||||
* @class PKPStatisticsHelper
|
||||
*
|
||||
* @ingroup statistics
|
||||
*
|
||||
* @brief Statistics helper class.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace PKP\statistics;
|
||||
|
||||
use APP\facades\Repo;
|
||||
use GeoIp2\Database\Reader;
|
||||
use InvalidArgumentException;
|
||||
use PKP\cache\CacheManager;
|
||||
use PKP\cache\FileCache;
|
||||
use PKP\context\Context;
|
||||
use PKP\file\PrivateFileManager;
|
||||
use PKP\site\Site;
|
||||
use Sokil\IsoCodes\IsoCodesFactory;
|
||||
|
||||
abstract class PKPStatisticsHelper
|
||||
{
|
||||
// Dimensions:
|
||||
// 1) publication object dimension:
|
||||
public const STATISTICS_DIMENSION_CONTEXT_ID = 'context_id';
|
||||
public const STATISTICS_DIMENSION_SUBMISSION_ID = 'submission_id';
|
||||
public const STATISTICS_DIMENSION_ASSOC_TYPE = 'assoc_type';
|
||||
public const STATISTICS_DIMENSION_FILE_TYPE = 'file_type';
|
||||
public const STATISTICS_DIMENSION_SUBMISSION_FILE_ID = 'submission_file_id';
|
||||
public const STATISTICS_DIMENSION_REPRESENTATION_ID = 'representation_id';
|
||||
|
||||
// 2) time dimension:
|
||||
public const STATISTICS_DIMENSION_YEAR = 'year';
|
||||
public const STATISTICS_DIMENSION_MONTH = 'month';
|
||||
public const STATISTICS_DIMENSION_DAY = 'day'; // used as API parameter for timelines
|
||||
public const STATISTICS_DIMENSION_DATE = 'date';
|
||||
|
||||
// 3) geography dimension:
|
||||
public const STATISTICS_DIMENSION_COUNTRY = 'country';
|
||||
public const STATISTICS_DIMENSION_REGION = 'region';
|
||||
public const STATISTICS_DIMENSION_CITY = 'city';
|
||||
|
||||
// Metrics:
|
||||
public const STATISTICS_METRIC = 'metric';
|
||||
public const STATISTICS_METRIC_UNIQUE = 'metric_unique';
|
||||
|
||||
// Ordering:
|
||||
public const STATISTICS_ORDER_ASC = 'ASC';
|
||||
public const STATISTICS_ORDER_DESC = 'DESC';
|
||||
|
||||
// File type to be used in publication object dimension.
|
||||
public const STATISTICS_FILE_TYPE_HTML = 1;
|
||||
public const STATISTICS_FILE_TYPE_PDF = 2;
|
||||
public const STATISTICS_FILE_TYPE_OTHER = 3;
|
||||
public const STATISTICS_FILE_TYPE_DOC = 4;
|
||||
|
||||
// Set the earliest date used
|
||||
public const STATISTICS_EARLIEST_DATE = '2001-01-01';
|
||||
|
||||
/** These are rules defined by the COUNTER project.
|
||||
* See https://www.projectcounter.org/code-of-practice-five-sections/7-processing-rules-underlying-counter-reporting-data/#doubleclick
|
||||
*/
|
||||
public const COUNTER_DOUBLE_CLICK_TIME_FILTER_SECONDS = 30;
|
||||
|
||||
// geotraphy settings
|
||||
public const STATISTICS_SETTING_COUNTRY = 'country';
|
||||
public const STATISTICS_SETTING_REGION = 'country+region';
|
||||
public const STATISTICS_SETTING_CITY = 'country+region+city';
|
||||
|
||||
public FileCache $geoDataCache;
|
||||
public FileCache $institutionDataCache;
|
||||
|
||||
/**
|
||||
* Get the usage stats directory path.
|
||||
*/
|
||||
public static function getUsageStatsDirPath(): string
|
||||
{
|
||||
$fileMgr = new PrivateFileManager();
|
||||
return realpath($fileMgr->getBasePath()) . '/usageStats';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the path to the salt file.
|
||||
*/
|
||||
public static function getSaltFileName(): string
|
||||
{
|
||||
return self::getUsageStatsDirPath() . '/salt';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the path to the Geo DB file.
|
||||
*/
|
||||
public static function getGeoDBPath(): string
|
||||
{
|
||||
return self::getUsageStatsDirPath() . '/IPGeoDB.mmdb';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get document type based on the mimetype
|
||||
* The mimetypes considered here are subset of those used in PKPFileService::getDocumentType()
|
||||
*
|
||||
* @return int One of the StatisticsHelper::STATISTICS_FILE_TYPE_ constants
|
||||
*/
|
||||
public static function getDocumentType(string $mimetype): int
|
||||
{
|
||||
switch ($mimetype) {
|
||||
case 'application/pdf':
|
||||
case 'application/x-pdf':
|
||||
case 'text/pdf':
|
||||
case 'text/x-pdf':
|
||||
return self::STATISTICS_FILE_TYPE_PDF;
|
||||
case 'application/msword':
|
||||
case 'application/word':
|
||||
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
|
||||
return self::STATISTICS_FILE_TYPE_DOC;
|
||||
case 'text/html':
|
||||
return self::STATISTICS_FILE_TYPE_HTML;
|
||||
default:
|
||||
return self::STATISTICS_FILE_TYPE_OTHER;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Hash (SHA256) the given IP using the given SALT.
|
||||
*
|
||||
* NB: This implementation was taken from OA-S directly. See
|
||||
* http://sourceforge.net/p/openaccessstati/code-0/3/tree/trunk/logfile-parser/lib/logutils.php
|
||||
* We just do not implement the PHP4 part as OJS dropped PHP4 support.
|
||||
*
|
||||
*/
|
||||
public static function hashIp(string $ip, string $salt): string
|
||||
{
|
||||
return hash('sha256', $ip . $salt);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new salt, write it to the salt file and return it
|
||||
*/
|
||||
public static function createNewSalt(string $saltFileName): string
|
||||
{
|
||||
if (function_exists('mcrypt_create_iv')) {
|
||||
$newSalt = bin2hex(mcrypt_create_iv(16, MCRYPT_DEV_URANDOM | MCRYPT_RAND));
|
||||
} elseif (function_exists('openssl_random_pseudo_bytes')) {
|
||||
$newSalt = bin2hex(openssl_random_pseudo_bytes(16, $cstrong));
|
||||
} elseif (file_exists('/dev/urandom')) {
|
||||
$newSalt = bin2hex(file_get_contents('/dev/urandom', false, null, 0, 16));
|
||||
} else {
|
||||
$newSalt = random_int(0, PHP_INT_MAX);
|
||||
}
|
||||
file_put_contents($saltFileName, $newSalt, LOCK_EX);
|
||||
return $newSalt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve Geo data (country, region, city) using IP and based on the site i.e. context settings
|
||||
*/
|
||||
public function getGeoData(Site $site, Context $context, string $ip, string $hashedIp, bool $flushCache = false): array
|
||||
{
|
||||
$country = $region = $city = null;
|
||||
$enableGeoUsageStats = $context->getEnableGeoUsageStats($site);
|
||||
if ($enableGeoUsageStats != 'disabled') {
|
||||
$geoIPArray = $this->getLocation($ip, $hashedIp, $flushCache);
|
||||
$country = $geoIPArray['country'];
|
||||
if ($enableGeoUsageStats == self::STATISTICS_SETTING_CITY || $enableGeoUsageStats == self::STATISTICS_SETTING_REGION) {
|
||||
$region = $geoIPArray['region'];
|
||||
if ($enableGeoUsageStats == self::STATISTICS_SETTING_CITY) {
|
||||
$city = $geoIPArray['city'];
|
||||
}
|
||||
}
|
||||
}
|
||||
return [$country, $region, $city];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get location based on the IP, use cache if exists.
|
||||
*
|
||||
* @param string $ip User IP
|
||||
* @param string $hashedIp Hashed user IP
|
||||
* @param bool $flush If true empty cache
|
||||
*
|
||||
* @return array Cached Geo data
|
||||
* [
|
||||
* hashedIP => [
|
||||
* 'country' => string Country ISO code,
|
||||
* 'region' => string Region ISO code
|
||||
* 'city' => string City name
|
||||
* ]
|
||||
* ]
|
||||
*
|
||||
*/
|
||||
public function getLocation(string $ip, string $hashedIp, bool $flush = false): array
|
||||
{
|
||||
if (!isset($this->geoDataCache)) {
|
||||
$geoCacheManager = CacheManager::getManager();
|
||||
/** @var FileCache */
|
||||
$this->geoDataCache = $geoCacheManager->getCache('geoIP', 'all', [&$this, 'geoDataCacheMiss']);
|
||||
}
|
||||
|
||||
if ($flush) {
|
||||
// Salt and thus hashed IPs changed, empty the cache.
|
||||
$this->geoDataCacheMiss($this->geoDataCache);
|
||||
}
|
||||
|
||||
$cachedGeoData = $this->geoDataCache->getContents();
|
||||
if (array_key_exists($hashedIp, $cachedGeoData)) {
|
||||
return $cachedGeoData[$hashedIp];
|
||||
}
|
||||
|
||||
$reader = $countryIsoCode = $regionIsoCode = $cityName = null;
|
||||
try {
|
||||
$reader = new Reader($this->getGeoDBPath());
|
||||
} catch (\MaxMind\Db\Reader\InvalidDatabaseException $e) {
|
||||
error_log('There was a problem reading the Geo database at ' . $this->getGeoDBPath() . '. Error: ' . $e->getMessage());
|
||||
} catch (InvalidArgumentException $e) {
|
||||
error_log('There was a problem reading the Geo database at ' . $this->getGeoDBPath() . '. Error: ' . $e->getMessage());
|
||||
}
|
||||
if (isset($reader)) {
|
||||
try {
|
||||
$geoIPRecord = $reader->city($ip);
|
||||
$countryIsoCode = $geoIPRecord->country->isoCode;
|
||||
// When found, up to three characters long subdivision portion of the ISO 3166-2 code is returned
|
||||
// s. https://github.com/maxmind/GeoIP2-php/blob/main/src/Record/Subdivision.php#L20
|
||||
$regionIsoCode = $geoIPRecord->mostSpecificSubdivision->isoCode;
|
||||
// DB-IP IP to City Lite database does not provide region Iso code but name,
|
||||
// thus try to get the region Iso code by the name,
|
||||
// but we need country for that
|
||||
if (!isset($regionIsoCode) && isset($countryIsoCode)) {
|
||||
$regionName = $geoIPRecord->mostSpecificSubdivision->name;
|
||||
if (isset($regionName)) {
|
||||
$isoCodes = app(IsoCodesFactory::class);
|
||||
$allCountryRegions = $isoCodes->getSubdivisions()->getAllByCountryCode($countryIsoCode);
|
||||
foreach ($allCountryRegions as $countryRegion) {
|
||||
if ($countryRegion->getName() == $regionName) {
|
||||
$regionIsoCodeArray = explode('-', $countryRegion->getCode());
|
||||
$regionIsoCode = $regionIsoCodeArray[1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
$cityName = $geoIPRecord->city->name;
|
||||
} catch (\BadMethodCallException $e) {
|
||||
error_log('There was a problem using city method on the Geo database at ' . $this->getGeoDBPath() . '. Error: ' . $e->getMessage());
|
||||
} catch (\GeoIp2\Exception\AddressNotFoundException $e) {
|
||||
error_log('There was a problem finding IP in the Geo database at ' . $this->getGeoDBPath() . '. Error: ' . $e->getMessage());
|
||||
} catch (\MaxMind\Db\Reader\InvalidDatabaseException $e) {
|
||||
error_log('There was a problem reading the Geo database at ' . $this->getGeoDBPath() . '. Error: ' . $e->getMessage());
|
||||
}
|
||||
}
|
||||
$cachedGeoData[$hashedIp]['country'] = $countryIsoCode;
|
||||
$cachedGeoData[$hashedIp]['region'] = $regionIsoCode;
|
||||
$cachedGeoData[$hashedIp]['city'] = $cityName;
|
||||
$this->geoDataCache->setEntireCache($cachedGeoData);
|
||||
return $cachedGeoData[$hashedIp];
|
||||
}
|
||||
|
||||
/**
|
||||
* Geo cache miss callback.
|
||||
*/
|
||||
public function geoDataCacheMiss(FileCache $cache): array
|
||||
{
|
||||
$cache->setEntireCache([]);
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get institution IDs for a given context based on the IP, use cache if exists.
|
||||
*
|
||||
* @param string $contextId Context ID
|
||||
* @param string $ip User IP
|
||||
* @param string $hashedIp Hashed user IP
|
||||
* @param bool $flush If true empty cache
|
||||
*
|
||||
* @return array Cached Geo data
|
||||
* [
|
||||
* hashedIP => contextId => institutionIds[]
|
||||
* ]
|
||||
*
|
||||
*/
|
||||
public function getInstitutionIds(int $contextId, string $ip, string $hashedIp, bool $flush = false): array
|
||||
{
|
||||
if (!isset($this->institutionDataCache)) {
|
||||
$institutionCacheManager = CacheManager::getManager();
|
||||
/** @var FileCache */
|
||||
$this->institutionDataCache = $institutionCacheManager->getCache('institutionIP', 'all', [&$this, 'institutionDataCacheMiss']);
|
||||
}
|
||||
|
||||
if ($flush) {
|
||||
// Salt and thus hashed IPs changed, empty the cache.
|
||||
$this->institutionDataCacheMiss($this->institutionDataCache);
|
||||
}
|
||||
|
||||
$cachedInstitutionData = $this->institutionDataCache->getContents();
|
||||
if (array_key_exists($hashedIp, $cachedInstitutionData) && array_key_exists($contextId, $cachedInstitutionData[$hashedIp])) {
|
||||
return $cachedInstitutionData[$hashedIp][$contextId];
|
||||
}
|
||||
$institutionIds = Repo::institution()->getCollector()
|
||||
->filterByContextIds([$contextId])
|
||||
->filterByIps([$ip])
|
||||
->getIds()
|
||||
->toArray();
|
||||
|
||||
$cachedInstitutionData[$hashedIp][$contextId] = $institutionIds;
|
||||
$this->institutionDataCache->setEntireCache($cachedInstitutionData);
|
||||
return $cachedInstitutionData[$hashedIp][$contextId];
|
||||
}
|
||||
|
||||
/**
|
||||
* Institution cache miss callback.
|
||||
*/
|
||||
public function institutionDataCacheMiss(FileCache $cache): array
|
||||
{
|
||||
$cache->setEntireCache([]);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
if (!PKP_STRICT_MODE) {
|
||||
class_alias('\PKP\statistics\PKPStatisticsHelper', '\PKPStatisticsHelper');
|
||||
foreach ([
|
||||
'STATISTICS_DIMENSION_CONTEXT_ID',
|
||||
'STATISTICS_DIMENSION_SUBMISSION_ID',
|
||||
'STATISTICS_DIMENSION_REPRESENTATION_ID',
|
||||
'STATISTICS_DIMENSION_ASSOC_TYPE',
|
||||
'STATISTICS_DIMENSION_FILE_TYPE',
|
||||
'STATISTICS_DIMENSION_YEAR',
|
||||
'STATISTICS_DIMENSION_MONTH',
|
||||
'STATISTICS_DIMENSION_DAY',
|
||||
'STATISTICS_DIMENSION_DATE',
|
||||
'STATISTICS_DIMENSION_COUNTRY',
|
||||
'STATISTICS_DIMENSION_REGION',
|
||||
'STATISTICS_DIMENSION_CITY',
|
||||
'STATISTICS_METRIC',
|
||||
'STATISTICS_METRIC_UNIQUE',
|
||||
'STATISTICS_ORDER_ASC',
|
||||
'STATISTICS_ORDER_DESC',
|
||||
'STATISTICS_FILE_TYPE_HTML',
|
||||
'STATISTICS_FILE_TYPE_PDF',
|
||||
'STATISTICS_FILE_TYPE_OTHER',
|
||||
'STATISTICS_FILE_TYPE_DOC',
|
||||
'STATISTICS_EARLIEST_DATE',
|
||||
'COUNTER_DOUBLE_CLICK_TIME_FILTER_SECONDS',
|
||||
] as $constantName) {
|
||||
define($constantName, constant('\PKPStatisticsHelper::' . $constantName));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,209 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file classes/statistics/PKPTemporaryItemInvestigationsDAO.php
|
||||
*
|
||||
* Copyright (c) 2022 Simon Fraser University
|
||||
* Copyright (c) 2022 John Willinsky
|
||||
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
||||
*
|
||||
* @class PKPTemporaryItemInvestigationsDAO
|
||||
*
|
||||
* @ingroup statistics
|
||||
*
|
||||
* @brief Operations for retrieving and adding unique item (submission) investigations (abstract, primary and supp file views).
|
||||
*/
|
||||
|
||||
namespace PKP\statistics;
|
||||
|
||||
use Illuminate\Support\Facades\DB;
|
||||
use PKP\config\Config;
|
||||
use PKP\db\DAORegistry;
|
||||
|
||||
class PKPTemporaryItemInvestigationsDAO
|
||||
{
|
||||
/**
|
||||
* The name of the table.
|
||||
* This table contains all usage (clicks) for an item (submission),
|
||||
* considering abstract, primary and supp file views.
|
||||
*/
|
||||
public string $table = 'usage_stats_unique_item_investigations_temporary_records';
|
||||
|
||||
/**
|
||||
* Add the passed usage statistic record.
|
||||
*/
|
||||
public function insert(object $entryData, int $lineNumber, string $loadId): void
|
||||
{
|
||||
$insertData = $this->getInsertData($entryData);
|
||||
$insertData['line_number'] = $lineNumber;
|
||||
$insertData['load_id'] = $loadId;
|
||||
|
||||
DB::table($this->table)->insert($insertData);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Laravel optimized array of data to insert into the table based on the log entry
|
||||
*/
|
||||
protected function getInsertData(object $entryData): array
|
||||
{
|
||||
return [
|
||||
'date' => $entryData->time,
|
||||
'ip' => $entryData->ip,
|
||||
'user_agent' => substr($entryData->userAgent, 0, 255),
|
||||
'context_id' => $entryData->contextId,
|
||||
'submission_id' => $entryData->submissionId,
|
||||
'representation_id' => $entryData->representationId,
|
||||
'submission_file_id' => $entryData->submissionFileId,
|
||||
'assoc_type' => $entryData->assocType,
|
||||
'file_type' => $entryData->fileType,
|
||||
'country' => !empty($entryData->country) ? $entryData->country : '',
|
||||
'region' => !empty($entryData->region) ? $entryData->region : '',
|
||||
'city' => !empty($entryData->city) ? $entryData->city : '',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete all temporary records associated
|
||||
* with the passed load id.
|
||||
*/
|
||||
public function deleteByLoadId(string $loadId): void
|
||||
{
|
||||
DB::table($this->table)->where('load_id', '=', $loadId)->delete();
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove Unique Clicks
|
||||
* If multiple transactions represent the same item and occur in the same user-sessions, only one unique activity MUST be counted for that item.
|
||||
* Unique item is a submission.
|
||||
* A user session is defined by the combination of IP address + user agent + transaction date + hour of day.
|
||||
* Only the last unique activity will be retained (and thus counted), all the other will be removed.
|
||||
*
|
||||
* See https://www.projectcounter.org/code-of-practice-five-sections/7-processing-rules-underlying-counter-reporting-data/#counting
|
||||
*/
|
||||
public function compileUniqueClicks(string $loadId): void
|
||||
{
|
||||
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
|
||||
DB::statement(
|
||||
"
|
||||
DELETE FROM {$this->table} usui
|
||||
WHERE EXISTS (
|
||||
SELECT * FROM (
|
||||
SELECT 1 FROM {$this->table} usuit
|
||||
WHERE usui.load_id = ? AND usuit.load_id = usui.load_id AND
|
||||
usuit.context_id = usui.context_id AND
|
||||
usuit.ip = usui.ip AND
|
||||
usuit.user_agent = usui.user_agent AND
|
||||
usuit.submission_id = usui.submission_id AND
|
||||
EXTRACT(HOUR FROM usuit.date) = EXTRACT(HOUR FROM usui.date) AND
|
||||
usui.line_number < usuit.line_number
|
||||
) AS tmp
|
||||
)
|
||||
",
|
||||
[$loadId]
|
||||
);
|
||||
} else {
|
||||
DB::statement(
|
||||
"
|
||||
DELETE FROM usui USING {$this->table} usui
|
||||
INNER JOIN {$this->table} usuit ON (
|
||||
usuit.load_id = usui.load_id AND
|
||||
usuit.context_id = usui.context_id AND
|
||||
usuit.ip = usui.ip AND
|
||||
usuit.user_agent = usui.user_agent AND
|
||||
usuit.submission_id = usui.submission_id
|
||||
)
|
||||
WHERE usui.load_id = ? AND
|
||||
TIMESTAMPDIFF(HOUR, usui.date, usuit.date) = 0 AND
|
||||
usui.line_number < usuit.line_number
|
||||
",
|
||||
[$loadId]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load unique geographical usage on the submission level
|
||||
*/
|
||||
public function compileSubmissionGeoDailyMetrics(string $loadId): void
|
||||
{
|
||||
// construct metric_unique upsert
|
||||
$metricUniqueUpsertSql = "
|
||||
INSERT INTO metrics_submission_geo_daily (load_id, context_id, submission_id, date, country, region, city, metric, metric_unique)
|
||||
SELECT * FROM (SELECT load_id, context_id, submission_id, DATE(date) as date, country, region, city, 0 as metric, count(*) as metric_unique_tmp
|
||||
FROM {$this->table}
|
||||
WHERE load_id = ? AND submission_id IS NOT NULL AND (country <> '' OR region <> '' OR city <> '')
|
||||
GROUP BY load_id, context_id, submission_id, DATE(date), country, region, city) AS t
|
||||
";
|
||||
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
|
||||
$metricUniqueUpsertSql .= '
|
||||
ON CONFLICT ON CONSTRAINT msgd_uc_load_context_submission_c_r_c_date DO UPDATE
|
||||
SET metric_unique = excluded.metric_unique;
|
||||
';
|
||||
} else {
|
||||
$metricUniqueUpsertSql .= '
|
||||
ON DUPLICATE KEY UPDATE metric_unique = metric_unique_tmp;
|
||||
';
|
||||
}
|
||||
DB::statement($metricUniqueUpsertSql, [$loadId]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load unique COUNTER item (submission) investigations
|
||||
*/
|
||||
public function compileCounterSubmissionDailyMetrics(string $loadId): void
|
||||
{
|
||||
// construct metric_investigations_unique upsert
|
||||
$metricInvestigationsUniqueUpsertSql = "
|
||||
INSERT INTO metrics_counter_submission_daily (load_id, context_id, submission_id, date, metric_investigations, metric_investigations_unique, metric_requests, metric_requests_unique)
|
||||
SELECT * FROM (SELECT load_id, context_id, submission_id, DATE(date) as date, 0 as metric_investigations, count(*) as metric, 0 as metric_requests, 0 as metric_requests_unique
|
||||
FROM {$this->table}
|
||||
WHERE load_id = ? AND submission_id IS NOT NULL
|
||||
GROUP BY load_id, context_id, submission_id, DATE(date)) AS t
|
||||
";
|
||||
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
|
||||
$metricInvestigationsUniqueUpsertSql .= '
|
||||
ON CONFLICT ON CONSTRAINT msd_uc_load_id_context_id_submission_id_date DO UPDATE
|
||||
SET metric_investigations_unique = excluded.metric_investigations_unique;
|
||||
';
|
||||
} else {
|
||||
$metricInvestigationsUniqueUpsertSql .= '
|
||||
ON DUPLICATE KEY UPDATE metric_investigations_unique = metric;
|
||||
';
|
||||
}
|
||||
DB::statement($metricInvestigationsUniqueUpsertSql, [$loadId]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load unique institutional COUNTER item (submission) investigations
|
||||
*/
|
||||
public function compileCounterSubmissionInstitutionDailyMetrics(string $loadId): void
|
||||
{
|
||||
// construct metric_investigations_unique upsert
|
||||
$metricInvestigationsUniqueUpsertSql = "
|
||||
INSERT INTO metrics_counter_submission_institution_daily (load_id, context_id, submission_id, date, institution_id, metric_investigations, metric_investigations_unique, metric_requests, metric_requests_unique)
|
||||
SELECT * FROM (
|
||||
SELECT usui.load_id, usui.context_id, usui.submission_id, DATE(usui.date) as date, usi.institution_id, 0 as metric_investigations, count(*) as metric, 0 as metric_requests, 0 as metric_requests_unique
|
||||
FROM {$this->table} usui
|
||||
JOIN usage_stats_institution_temporary_records usi on (usi.load_id = usui.load_id AND usi.line_number = usui.line_number)
|
||||
WHERE usui.load_id = ? AND submission_id IS NOT NULL AND usi.institution_id = ?
|
||||
GROUP BY usui.load_id, usui.context_id, usui.submission_id, DATE(usui.date), usi.institution_id) AS t
|
||||
";
|
||||
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
|
||||
$metricInvestigationsUniqueUpsertSql .= '
|
||||
ON CONFLICT ON CONSTRAINT msid_uc_load_id_context_id_submission_id_institution_id_date DO UPDATE
|
||||
SET metric_investigations_unique = excluded.metric_investigations_unique;
|
||||
';
|
||||
} else {
|
||||
$metricInvestigationsUniqueUpsertSql .= '
|
||||
ON DUPLICATE KEY UPDATE metric_investigations_unique = metric;
|
||||
';
|
||||
}
|
||||
|
||||
/** @var TemporaryInstitutionsDAO */
|
||||
$temporaryInstitutionsDAO = DAORegistry::getDAO('TemporaryInstitutionsDAO');
|
||||
$institutionIds = $temporaryInstitutionsDAO->getInstitutionIdsByLoadId($loadId);
|
||||
foreach ($institutionIds as $institutionId) {
|
||||
DB::statement($metricInvestigationsUniqueUpsertSql, [$loadId, (int) $institutionId]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,183 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file classes/statistics/PKPTemporaryItemRequestsDAO.php
|
||||
*
|
||||
* Copyright (c) 2022 Simon Fraser University
|
||||
* Copyright (c) 2022 John Willinsky
|
||||
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
||||
*
|
||||
* @class PKPTemporaryItemRequestsDAO
|
||||
*
|
||||
* @ingroup statistics
|
||||
*
|
||||
* @brief Operations for retrieving and adding unique item (submission) requests (primary files downloads).
|
||||
*/
|
||||
|
||||
namespace PKP\statistics;
|
||||
|
||||
use APP\core\Application;
|
||||
use Illuminate\Support\Facades\DB;
|
||||
use PKP\config\Config;
|
||||
use PKP\db\DAORegistry;
|
||||
|
||||
class PKPTemporaryItemRequestsDAO
|
||||
{
|
||||
/**
|
||||
* The name of the table.
|
||||
* This table contains all primary files downloads.
|
||||
*/
|
||||
public string $table = 'usage_stats_unique_item_requests_temporary_records';
|
||||
|
||||
/**
|
||||
* Add the passed usage statistic record.
|
||||
*/
|
||||
public function insert(object $entryData, int $lineNumber, string $loadId): void
|
||||
{
|
||||
$insertData = $this->getInsertData($entryData);
|
||||
$insertData['line_number'] = $lineNumber;
|
||||
$insertData['load_id'] = $loadId;
|
||||
|
||||
DB::table($this->table)->insert($insertData);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Laravel optimized array of data to insert into the table based on the log entry
|
||||
*/
|
||||
protected function getInsertData(object $entryData): array
|
||||
{
|
||||
return [
|
||||
'date' => $entryData->time,
|
||||
'ip' => $entryData->ip,
|
||||
'user_agent' => substr($entryData->userAgent, 0, 255),
|
||||
'context_id' => $entryData->contextId,
|
||||
'submission_id' => $entryData->submissionId,
|
||||
'representation_id' => $entryData->representationId,
|
||||
'submission_file_id' => $entryData->submissionFileId,
|
||||
'assoc_type' => $entryData->assocType,
|
||||
'file_type' => $entryData->fileType,
|
||||
'country' => !empty($entryData->country) ? $entryData->country : '',
|
||||
'region' => !empty($entryData->region) ? $entryData->region : '',
|
||||
'city' => !empty($entryData->city) ? $entryData->city : '',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete all temporary records associated
|
||||
* with the passed load id.
|
||||
*/
|
||||
public function deleteByLoadId(string $loadId): void
|
||||
{
|
||||
DB::table($this->table)->where('load_id', '=', $loadId)->delete();
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove unique clicks
|
||||
* If multiple transactions represent the same item and occur in the same user-sessions, only one unique activity MUST be counted for that item.
|
||||
* Unique item is a submission.
|
||||
* A user session is defined by the combination of IP address + user agent + transaction date + hour of day.
|
||||
* Only the last unique activity will be retained (and thus counted), all the other will be removed.
|
||||
*
|
||||
* See https://www.projectcounter.org/code-of-practice-five-sections/7-processing-rules-underlying-counter-reporting-data/#counting
|
||||
*/
|
||||
public function compileUniqueClicks(string $loadId): void
|
||||
{
|
||||
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
|
||||
DB::statement(
|
||||
"
|
||||
DELETE FROM {$this->table} usur
|
||||
WHERE EXISTS (
|
||||
SELECT * FROM (
|
||||
SELECT 1 FROM {$this->table} usurt
|
||||
WHERE usur.load_id = ? AND usurt.load_id = usur.load_id AND
|
||||
usurt.context_id = usur.context_id AND
|
||||
usurt.ip = usur.ip AND
|
||||
usurt.user_agent = usur.user_agent AND
|
||||
usurt.submission_id = usur.submission_id AND
|
||||
EXTRACT(HOUR FROM usurt.date) = EXTRACT(HOUR FROM usur.date) AND
|
||||
usur.line_number < usurt.line_number
|
||||
) AS tmp
|
||||
)
|
||||
",
|
||||
[$loadId]
|
||||
);
|
||||
} else {
|
||||
DB::statement(
|
||||
"
|
||||
DELETE FROM usur USING {$this->table} usur
|
||||
INNER JOIN {$this->table} usurt ON (
|
||||
usurt.load_id = usur.load_id AND
|
||||
usurt.context_id = usur.context_id AND
|
||||
usurt.ip = usur.ip AND
|
||||
usurt.user_agent = usur.user_agent AND
|
||||
usurt.submission_id = usur.submission_id
|
||||
)
|
||||
WHERE usur.load_id = ? AND TIMESTAMPDIFF(HOUR, usur.date, usurt.date) = 0 AND usur.line_number < usurt.line_number
|
||||
",
|
||||
[$loadId]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load unique COUNTER item (submission) requests (primary files downloads)
|
||||
*/
|
||||
public function compileCounterSubmissionDailyMetrics(string $loadId): void
|
||||
{
|
||||
// construct metric_requests_unique upsert
|
||||
// assoc_type should always be Application::ASSOC_TYPE_SUBMISSION_FILE, but include the condition however
|
||||
$metricRequestsUniqueUpsertSql = "
|
||||
INSERT INTO metrics_counter_submission_daily (load_id, context_id, submission_id, date, metric_investigations, metric_investigations_unique, metric_requests, metric_requests_unique)
|
||||
SELECT * FROM (SELECT load_id, context_id, submission_id, DATE(date) as date, 0 as metric_investigations, 0 as metric_investigations_unique, 0 as metric_requests, count(*) as metric
|
||||
FROM {$this->table}
|
||||
WHERE load_id = ? AND assoc_type = ?
|
||||
GROUP BY load_id, context_id, submission_id, DATE(date)) AS t
|
||||
";
|
||||
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
|
||||
$metricRequestsUniqueUpsertSql .= '
|
||||
ON CONFLICT ON CONSTRAINT msd_uc_load_id_context_id_submission_id_date DO UPDATE
|
||||
SET metric_requests_unique = excluded.metric_requests_unique;
|
||||
';
|
||||
} else {
|
||||
$metricRequestsUniqueUpsertSql .= '
|
||||
ON DUPLICATE KEY UPDATE metric_requests_unique = metric;
|
||||
';
|
||||
}
|
||||
DB::statement($metricRequestsUniqueUpsertSql, [$loadId, Application::ASSOC_TYPE_SUBMISSION_FILE]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load unique institutional COUNTER item (submission) requests (primary files downloads)
|
||||
*/
|
||||
public function compileCounterSubmissionInstitutionDailyMetrics(string $loadId): void
|
||||
{
|
||||
// construct metric_requests_unique upsert
|
||||
// assoc_type should always be Application::ASSOC_TYPE_SUBMISSION_FILE, but include the condition however
|
||||
$metricRequestsUniqueUpsertSql = "
|
||||
INSERT INTO metrics_counter_submission_institution_daily (load_id, context_id, submission_id, date, institution_id, metric_investigations, metric_investigations_unique, metric_requests, metric_requests_unique)
|
||||
SELECT * FROM (
|
||||
SELECT usur.load_id, usur.context_id, usur.submission_id, DATE(usur.date) as date, usi.institution_id, 0 as metric_investigations, 0 as metric_investigations_unique, 0 as metric_requests, count(*) as metric
|
||||
FROM {$this->table} usur
|
||||
JOIN usage_stats_institution_temporary_records usi on (usi.load_id = usur.load_id AND usi.line_number = usur.line_number)
|
||||
WHERE usur.load_id = ? AND usur.assoc_type = ? AND usi.institution_id = ?
|
||||
GROUP BY usur.load_id, usur.context_id, usur.submission_id, DATE(usur.date), usi.institution_id) AS t
|
||||
";
|
||||
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
|
||||
$metricRequestsUniqueUpsertSql .= '
|
||||
ON CONFLICT ON CONSTRAINT msid_uc_load_id_context_id_submission_id_institution_id_date DO UPDATE
|
||||
SET metric_requests_unique = excluded.metric_requests_unique;
|
||||
';
|
||||
} else {
|
||||
$metricRequestsUniqueUpsertSql .= '
|
||||
ON DUPLICATE KEY UPDATE metric_requests_unique = metric;
|
||||
';
|
||||
}
|
||||
|
||||
/** @var TemporaryInstitutionsDAO */
|
||||
$temporaryInstitutionsDAO = DAORegistry::getDAO('TemporaryInstitutionsDAO');
|
||||
$institutionIds = $temporaryInstitutionsDAO->getInstitutionIdsByLoadId($loadId);
|
||||
foreach ($institutionIds as $institutionId) {
|
||||
DB::statement($metricRequestsUniqueUpsertSql, [$loadId, Application::ASSOC_TYPE_SUBMISSION_FILE, (int) $institutionId]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,320 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file classes/statistics/PKPTemporaryTotalsDAO.php
|
||||
*
|
||||
* Copyright (c) 2022 Simon Fraser University
|
||||
* Copyright (c) 2022 John Willinsky
|
||||
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
||||
*
|
||||
* @class PKPTemporaryTotalsDAO
|
||||
*
|
||||
* @ingroup statistics
|
||||
*
|
||||
* @brief Operations for retrieving and adding total usage.
|
||||
*
|
||||
* It considers:
|
||||
* context index page views,
|
||||
* submission abstract, primary and supp file views,
|
||||
* geo submission usage,
|
||||
* COUNTER submission stats.
|
||||
*/
|
||||
|
||||
namespace PKP\statistics;
|
||||
|
||||
use APP\core\Application;
|
||||
use DateTimeImmutable;
|
||||
use Illuminate\Support\Facades\DB;
|
||||
use PKP\config\Config;
|
||||
use PKP\db\DAORegistry;
|
||||
|
||||
abstract class PKPTemporaryTotalsDAO
|
||||
{
|
||||
/**
|
||||
* The name of the table. This table contains all usage events.
|
||||
*/
|
||||
public string $table = 'usage_stats_total_temporary_records';
|
||||
|
||||
/**
|
||||
* Add the passed usage statistic record.
|
||||
*/
|
||||
public function insert(object $entryData, int $lineNumber, string $loadId): void
|
||||
{
|
||||
$insertData = $this->getInsertData($entryData);
|
||||
$insertData['line_number'] = $lineNumber;
|
||||
$insertData['load_id'] = $loadId;
|
||||
|
||||
DB::table($this->table)->insert($insertData);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Laravel optimized array of data to insert into the table based on the log entry
|
||||
*/
|
||||
protected function getInsertData(object $entryData): array
|
||||
{
|
||||
return [
|
||||
'date' => $entryData->time,
|
||||
'ip' => $entryData->ip,
|
||||
'user_agent' => substr($entryData->userAgent, 0, 255),
|
||||
'canonical_url' => $entryData->canonicalUrl,
|
||||
'context_id' => $entryData->contextId,
|
||||
'submission_id' => $entryData->submissionId,
|
||||
'representation_id' => $entryData->representationId,
|
||||
'submission_file_id' => $entryData->submissionFileId,
|
||||
'assoc_type' => $entryData->assocType,
|
||||
'file_type' => $entryData->fileType,
|
||||
'country' => !empty($entryData->country) ? $entryData->country : '',
|
||||
'region' => !empty($entryData->region) ? $entryData->region : '',
|
||||
'city' => !empty($entryData->city) ? $entryData->city : '',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete all temporary records associated
|
||||
* with the passed load id.
|
||||
*/
|
||||
public function deleteByLoadId(string $loadId): void
|
||||
{
|
||||
DB::table($this->table)->where('load_id', '=', $loadId)->delete();
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove Double Clicks according to COUNTER guidelines
|
||||
* Remove the potential of over-counting which could occur when a user clicks the same link multiple times.
|
||||
* Double-clicks, i.e. two clicks in succession, on a link by the same user within a 30-second period MUST be counted as one action.
|
||||
* When two actions are made for the same URL within 30 seconds the first request MUST be removed and the second retained.
|
||||
* A user is identified by IP address combined with the browser’s user-agent.
|
||||
*
|
||||
* See https://www.projectcounter.org/code-of-practice-five-sections/7-processing-rules-underlying-counter-reporting-data/#doubleclick
|
||||
*/
|
||||
public function removeDoubleClicks(string $loadId, int $counterDoubleClickTimeFilter): void
|
||||
{
|
||||
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
|
||||
DB::statement(
|
||||
"
|
||||
DELETE FROM {$this->table} ust
|
||||
WHERE EXISTS (
|
||||
SELECT * FROM (
|
||||
SELECT 1 FROM {$this->table} ustt
|
||||
WHERE ust.load_id = ? AND ustt.load_id = ust.load_id AND
|
||||
ustt.context_id = ust.context_id AND
|
||||
ustt.ip = ust.ip AND ustt.user_agent = ust.user_agent AND ustt.canonical_url = ust.canonical_url AND
|
||||
EXTRACT(EPOCH FROM (ustt.date - ust.date)) < ? AND
|
||||
EXTRACT(EPOCH FROM (ustt.date - ust.date)) > 0 AND
|
||||
ust.line_number < ustt.line_number) AS tmp
|
||||
)
|
||||
",
|
||||
[$loadId, $counterDoubleClickTimeFilter]
|
||||
);
|
||||
} else {
|
||||
DB::statement(
|
||||
"
|
||||
DELETE FROM ust USING {$this->table} ust
|
||||
INNER JOIN {$this->table} ustt ON (
|
||||
ustt.load_id = ust.load_id AND
|
||||
ustt.context_id = ust.context_id AND
|
||||
ustt.ip = ust.ip AND
|
||||
ustt.user_agent = ust.user_agent AND
|
||||
ustt.canonical_url = ust.canonical_url
|
||||
)
|
||||
WHERE ust.load_id = ? AND
|
||||
TIMESTAMPDIFF(SECOND, ust.date, ustt.date) < ? AND
|
||||
TIMESTAMPDIFF(SECOND, ust.date, ustt.date) > 0 AND
|
||||
ust.line_number < ustt.line_number
|
||||
",
|
||||
[$loadId, $counterDoubleClickTimeFilter]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load usage for context index pages
|
||||
*/
|
||||
public function compileContextMetrics(string $loadId): void
|
||||
{
|
||||
$date = DateTimeImmutable::createFromFormat('Ymd', substr($loadId, -12, 8));
|
||||
DB::table('metrics_context')->where('load_id', '=', $loadId)->orWhereDate('date', '=', $date)->delete();
|
||||
$selectContextMetrics = DB::table($this->table)
|
||||
->select(DB::raw('load_id, context_id, DATE(date) as date, count(*) as metric'))
|
||||
->where('load_id', '=', $loadId)
|
||||
->where('assoc_type', '=', Application::getContextAssocType())
|
||||
->groupBy(DB::raw('load_id, context_id, DATE(date)'));
|
||||
DB::table('metrics_context')->insertUsing(['load_id', 'context_id', 'date', 'metric'], $selectContextMetrics);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load usage for submissions (abstract, primary and supp files)
|
||||
*/
|
||||
public function compileSubmissionMetrics(string $loadId): void
|
||||
{
|
||||
$date = DateTimeImmutable::createFromFormat('Ymd', substr($loadId, -12, 8));
|
||||
DB::table('metrics_submission')->where('load_id', '=', $loadId)->orWhereDate('date', '=', $date)->delete();
|
||||
$selectSubmissionMetrics = DB::table($this->table)
|
||||
->select(DB::raw('load_id, context_id, submission_id, assoc_type, DATE(date) as date, count(*) as metric'))
|
||||
->where('load_id', '=', $loadId)
|
||||
->where('assoc_type', '=', Application::ASSOC_TYPE_SUBMISSION)
|
||||
->groupBy(DB::raw('load_id, context_id, submission_id, assoc_type, DATE(date)'));
|
||||
DB::table('metrics_submission')->insertUsing(['load_id', 'context_id', 'submission_id', 'assoc_type', 'date', 'metric'], $selectSubmissionMetrics);
|
||||
|
||||
$selectSubmissionFileMetrics = DB::table($this->table)
|
||||
->select(DB::raw('load_id, context_id, submission_id, representation_id, submission_file_id, file_type, assoc_type, DATE(date) as date, count(*) as metric'))
|
||||
->where('load_id', '=', $loadId)
|
||||
->where('assoc_type', '=', Application::ASSOC_TYPE_SUBMISSION_FILE)
|
||||
->groupBy(DB::raw('load_id, context_id, submission_id, representation_id, submission_file_id, file_type, assoc_type, DATE(date)'));
|
||||
DB::table('metrics_submission')->insertUsing(['load_id', 'context_id', 'submission_id', 'representation_id', 'submission_file_id', 'file_type', 'assoc_type', 'date', 'metric'], $selectSubmissionFileMetrics);
|
||||
|
||||
$selectSubmissionSuppFileMetrics = DB::table($this->table)
|
||||
->select(DB::raw('load_id, context_id, submission_id, representation_id, submission_file_id, file_type, assoc_type, DATE(date) as date, count(*) as metric'))
|
||||
->where('load_id', '=', $loadId)
|
||||
->where('assoc_type', '=', Application::ASSOC_TYPE_SUBMISSION_FILE_COUNTER_OTHER)
|
||||
->groupBy(DB::raw('load_id, context_id, submission_id, representation_id, submission_file_id, file_type, assoc_type, DATE(date)'));
|
||||
DB::table('metrics_submission')->insertUsing(['load_id', 'context_id', 'submission_id', 'representation_id', 'submission_file_id', 'file_type', 'assoc_type', 'date', 'metric'], $selectSubmissionSuppFileMetrics);
|
||||
}
|
||||
|
||||
// For the DB tables that contain also the unique metrics, this deletion by loadId is in a separate function,
|
||||
// differently to the deletion for the tables above (metrics_context, metrics_issue and metrics_submission)
|
||||
// The total metrics will be loaded here (s. load... functions below), unique metrics are loaded in UnsageStatsUnique... classes
|
||||
public function deleteSubmissionGeoDailyByLoadId(string $loadId): void
|
||||
{
|
||||
$date = DateTimeImmutable::createFromFormat('Ymd', substr($loadId, -12, 8));
|
||||
DB::table('metrics_submission_geo_daily')->where('load_id', '=', $loadId)->orWhereDate('date', '=', $date)->delete();
|
||||
}
|
||||
public function deleteCounterSubmissionDailyByLoadId(string $loadId): void
|
||||
{
|
||||
$date = DateTimeImmutable::createFromFormat('Ymd', substr($loadId, -12, 8));
|
||||
DB::table('metrics_counter_submission_daily')->where('load_id', '=', $loadId)->orWhereDate('date', '=', $date)->delete();
|
||||
}
|
||||
public function deleteCounterSubmissionInstitutionDailyByLoadId(string $loadId): void
|
||||
{
|
||||
$date = DateTimeImmutable::createFromFormat('Ymd', substr($loadId, -12, 8));
|
||||
DB::table('metrics_counter_submission_institution_daily')->where('load_id', '=', $loadId)->orWhereDate('date', '=', $date)->delete();
|
||||
}
|
||||
|
||||
/**
|
||||
* Load total geographical usage on the submission level
|
||||
*/
|
||||
public function compileSubmissionGeoDailyMetrics(string $loadId): void
|
||||
{
|
||||
// construct metric upsert
|
||||
$metricUpsertSql = "
|
||||
INSERT INTO metrics_submission_geo_daily (load_id, context_id, submission_id, date, country, region, city, metric, metric_unique)
|
||||
SELECT * FROM (SELECT load_id, context_id, submission_id, DATE(date) as date, country, region, city, count(*) as metric_tmp, 0 as metric_unique
|
||||
FROM {$this->table}
|
||||
WHERE load_id = ? AND submission_id IS NOT NULL AND (country <> '' OR region <> '' OR city <> '')
|
||||
GROUP BY load_id, context_id, submission_id, DATE(date), country, region, city) AS t
|
||||
";
|
||||
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
|
||||
$metricUpsertSql .= '
|
||||
ON CONFLICT ON CONSTRAINT msgd_uc_load_context_submission_c_r_c_date DO UPDATE
|
||||
SET metric = excluded.metric;
|
||||
';
|
||||
} else {
|
||||
$metricUpsertSql .= '
|
||||
ON DUPLICATE KEY UPDATE metric = metric_tmp;
|
||||
';
|
||||
}
|
||||
DB::statement($metricUpsertSql, [$loadId]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load total COUNTER submission usage (investigations and requests)
|
||||
*/
|
||||
public function compileCounterSubmissionDailyMetrics(string $loadId): void
|
||||
{
|
||||
// construct metric_investigations upsert
|
||||
$metricInvestigationsUpsertSql = "
|
||||
INSERT INTO metrics_counter_submission_daily (load_id, context_id, submission_id, date, metric_investigations, metric_investigations_unique, metric_requests, metric_requests_unique)
|
||||
SELECT * FROM (SELECT load_id, context_id, submission_id, DATE(date) as date, count(*) as metric, 0 as metric_investigations_unique, 0 as metric_requests, 0 as metric_requests_unique
|
||||
FROM {$this->table}
|
||||
WHERE load_id = ? AND submission_id IS NOT NULL
|
||||
GROUP BY load_id, context_id, submission_id, DATE(date)) AS t
|
||||
";
|
||||
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
|
||||
$metricInvestigationsUpsertSql .= '
|
||||
ON CONFLICT ON CONSTRAINT msd_uc_load_id_context_id_submission_id_date DO UPDATE
|
||||
SET metric_investigations = excluded.metric_investigations;
|
||||
';
|
||||
} else {
|
||||
$metricInvestigationsUpsertSql .= '
|
||||
ON DUPLICATE KEY UPDATE metric_investigations = metric;
|
||||
';
|
||||
}
|
||||
DB::statement($metricInvestigationsUpsertSql, [$loadId]);
|
||||
|
||||
// construct metric_requests upsert
|
||||
$metricRequestsUpsertSql = "
|
||||
INSERT INTO metrics_counter_submission_daily (load_id, context_id, submission_id, date, metric_investigations, metric_investigations_unique, metric_requests, metric_requests_unique)
|
||||
SELECT * FROM (SELECT load_id, context_id, submission_id, DATE(date) as date, 0 as metric_investigations, 0 as metric_investigations_unique, count(*) as metric, 0 as metric_requests_unique
|
||||
FROM {$this->table}
|
||||
WHERE load_id = ? AND assoc_type = ?
|
||||
GROUP BY load_id, context_id, submission_id, DATE(date)) AS t
|
||||
";
|
||||
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
|
||||
$metricRequestsUpsertSql .= '
|
||||
ON CONFLICT ON CONSTRAINT msd_uc_load_id_context_id_submission_id_date DO UPDATE
|
||||
SET metric_requests = excluded.metric_requests;
|
||||
';
|
||||
} else {
|
||||
$metricRequestsUpsertSql .= '
|
||||
ON DUPLICATE KEY UPDATE metric_requests = metric;
|
||||
';
|
||||
}
|
||||
DB::statement($metricRequestsUpsertSql, [$loadId, Application::ASSOC_TYPE_SUBMISSION_FILE]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load total institutional COUNTER submission usage (investigations and requests)
|
||||
*/
|
||||
public function compileCounterSubmissionInstitutionDailyMetrics(string $loadId): void
|
||||
{
|
||||
// construct metric_investigations upsert
|
||||
$metricInvestigationsUpsertSql = "
|
||||
INSERT INTO metrics_counter_submission_institution_daily (load_id, context_id, submission_id, date, institution_id, metric_investigations, metric_investigations_unique, metric_requests, metric_requests_unique)
|
||||
SELECT * FROM (
|
||||
SELECT ustt.load_id, ustt.context_id, ustt.submission_id, DATE(ustt.date) as date, usit.institution_id, count(*) as metric, 0 as metric_investigations_unique, 0 as metric_requests, 0 as metric_requests_unique
|
||||
FROM {$this->table} ustt
|
||||
JOIN usage_stats_institution_temporary_records usit on (usit.load_id = ustt.load_id AND usit.line_number = ustt.line_number)
|
||||
WHERE ustt.load_id = ? AND submission_id IS NOT NULL AND usit.institution_id = ?
|
||||
GROUP BY ustt.load_id, ustt.context_id, ustt.submission_id, DATE(ustt.date), usit.institution_id) AS t
|
||||
";
|
||||
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
|
||||
$metricInvestigationsUpsertSql .= '
|
||||
ON CONFLICT ON CONSTRAINT msid_uc_load_id_context_id_submission_id_institution_id_date DO UPDATE
|
||||
SET metric_investigations = excluded.metric_investigations;
|
||||
';
|
||||
} else {
|
||||
$metricInvestigationsUpsertSql .= '
|
||||
ON DUPLICATE KEY UPDATE metric_investigations = metric;
|
||||
';
|
||||
}
|
||||
|
||||
// construct metric_requests upsert
|
||||
$metricRequestsUpsertSql = "
|
||||
INSERT INTO metrics_counter_submission_institution_daily (load_id, context_id, submission_id, date, institution_id, metric_investigations, metric_investigations_unique, metric_requests, metric_requests_unique)
|
||||
SELECT * FROM (
|
||||
SELECT ustt.load_id, ustt.context_id, ustt.submission_id, DATE(ustt.date) as date, usit.institution_id, 0 as metric_investigations, 0 as metric_investigations_unique, count(*) as metric, 0 as metric_requests_unique
|
||||
FROM {$this->table} ustt
|
||||
JOIN usage_stats_institution_temporary_records usit on (usit.load_id = ustt.load_id AND usit.line_number = ustt.line_number)
|
||||
WHERE ustt.load_id = ? AND ustt.assoc_type = ? AND usit.institution_id = ?
|
||||
GROUP BY ustt.load_id, ustt.context_id, ustt.submission_id, DATE(ustt.date), usit.institution_id) AS t
|
||||
";
|
||||
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
|
||||
$metricRequestsUpsertSql .= '
|
||||
ON CONFLICT ON CONSTRAINT msid_uc_load_id_context_id_submission_id_institution_id_date DO UPDATE
|
||||
SET metric_requests = excluded.metric_requests;
|
||||
';
|
||||
} else {
|
||||
$metricRequestsUpsertSql .= '
|
||||
ON DUPLICATE KEY UPDATE metric_requests = metric;
|
||||
';
|
||||
}
|
||||
|
||||
/** @var TemporaryInstitutionsDAO */
|
||||
$temporaryInstitutionsDAO = DAORegistry::getDAO('TemporaryInstitutionsDAO');
|
||||
$institutionIds = $temporaryInstitutionsDAO->getInstitutionIdsByLoadId($loadId);
|
||||
foreach ($institutionIds as $institutionId) {
|
||||
DB::statement($metricInvestigationsUpsertSql, [$loadId, (int) $institutionId]);
|
||||
DB::statement($metricRequestsUpsertSql, [$loadId, Application::ASSOC_TYPE_SUBMISSION_FILE, (int) $institutionId]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file classes/statistics/TemporaryInstitutionsDAO.php
|
||||
*
|
||||
* Copyright (c) 2022 Simon Fraser University
|
||||
* Copyright (c) 2022 John Willinsky
|
||||
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
||||
*
|
||||
* @class TemporaryInstitutionsDAO
|
||||
*
|
||||
* @ingroup statistics
|
||||
*
|
||||
* @brief Operations for retrieving and adding the normalized data for institutions usage stats temporary records.
|
||||
*/
|
||||
|
||||
namespace PKP\statistics;
|
||||
|
||||
use Illuminate\Support\Collection;
|
||||
use Illuminate\Support\Facades\DB;
|
||||
|
||||
class TemporaryInstitutionsDAO
|
||||
{
|
||||
/** The name of the table */
|
||||
public string $table = 'usage_stats_institution_temporary_records';
|
||||
|
||||
/**
|
||||
* Insert the institution ids to normalize the data in temporary tables.
|
||||
*/
|
||||
public function insert(array $institutionIds, int $lineNumber, string $loadId): void
|
||||
{
|
||||
foreach ($institutionIds as $institutionId) {
|
||||
DB::table($this->table)->insert([
|
||||
'load_id' => $loadId,
|
||||
'line_number' => $lineNumber,
|
||||
'institution_id' => $institutionId
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete all records associated
|
||||
* with the passed load id.
|
||||
*/
|
||||
public function deleteByLoadId(string $loadId): void
|
||||
{
|
||||
DB::table($this->table)->where('load_id', '=', $loadId)->delete();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve all distinct institution IDs for the given load id.
|
||||
*/
|
||||
public function getInstitutionIdsByLoadId(string $loadId): Collection
|
||||
{
|
||||
$institutionIds = DB::table($this->table)
|
||||
->select('institution_id')
|
||||
->distinct()
|
||||
->where('load_id', '=', $loadId)
|
||||
->pluck('institution_id');
|
||||
return $institutionIds;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user