first commit

This commit is contained in:
CHIEFSOFT\ameye
2024-06-08 17:09:23 -04:00
commit df3a033196
17887 changed files with 8637778 additions and 0 deletions
@@ -0,0 +1,355 @@
<?php
/**
* @file classes/statistics/PKPStatisticsHelper.php
*
* Copyright (c) 2013-2021 Simon Fraser University
* Copyright (c) 2003-2021 John Willinsky
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
*
* @class PKPStatisticsHelper
*
* @ingroup statistics
*
* @brief Statistics helper class.
*
*/
namespace PKP\statistics;
use APP\facades\Repo;
use GeoIp2\Database\Reader;
use InvalidArgumentException;
use PKP\cache\CacheManager;
use PKP\cache\FileCache;
use PKP\context\Context;
use PKP\file\PrivateFileManager;
use PKP\site\Site;
use Sokil\IsoCodes\IsoCodesFactory;
abstract class PKPStatisticsHelper
{
// Dimensions:
// 1) publication object dimension:
public const STATISTICS_DIMENSION_CONTEXT_ID = 'context_id';
public const STATISTICS_DIMENSION_SUBMISSION_ID = 'submission_id';
public const STATISTICS_DIMENSION_ASSOC_TYPE = 'assoc_type';
public const STATISTICS_DIMENSION_FILE_TYPE = 'file_type';
public const STATISTICS_DIMENSION_SUBMISSION_FILE_ID = 'submission_file_id';
public const STATISTICS_DIMENSION_REPRESENTATION_ID = 'representation_id';
// 2) time dimension:
public const STATISTICS_DIMENSION_YEAR = 'year';
public const STATISTICS_DIMENSION_MONTH = 'month';
public const STATISTICS_DIMENSION_DAY = 'day'; // used as API parameter for timelines
public const STATISTICS_DIMENSION_DATE = 'date';
// 3) geography dimension:
public const STATISTICS_DIMENSION_COUNTRY = 'country';
public const STATISTICS_DIMENSION_REGION = 'region';
public const STATISTICS_DIMENSION_CITY = 'city';
// Metrics:
public const STATISTICS_METRIC = 'metric';
public const STATISTICS_METRIC_UNIQUE = 'metric_unique';
// Ordering:
public const STATISTICS_ORDER_ASC = 'ASC';
public const STATISTICS_ORDER_DESC = 'DESC';
// File type to be used in publication object dimension.
public const STATISTICS_FILE_TYPE_HTML = 1;
public const STATISTICS_FILE_TYPE_PDF = 2;
public const STATISTICS_FILE_TYPE_OTHER = 3;
public const STATISTICS_FILE_TYPE_DOC = 4;
// Set the earliest date used
public const STATISTICS_EARLIEST_DATE = '2001-01-01';
/** These are rules defined by the COUNTER project.
* See https://www.projectcounter.org/code-of-practice-five-sections/7-processing-rules-underlying-counter-reporting-data/#doubleclick
*/
public const COUNTER_DOUBLE_CLICK_TIME_FILTER_SECONDS = 30;
// geotraphy settings
public const STATISTICS_SETTING_COUNTRY = 'country';
public const STATISTICS_SETTING_REGION = 'country+region';
public const STATISTICS_SETTING_CITY = 'country+region+city';
public FileCache $geoDataCache;
public FileCache $institutionDataCache;
/**
* Get the usage stats directory path.
*/
public static function getUsageStatsDirPath(): string
{
$fileMgr = new PrivateFileManager();
return realpath($fileMgr->getBasePath()) . '/usageStats';
}
/**
* Get the path to the salt file.
*/
public static function getSaltFileName(): string
{
return self::getUsageStatsDirPath() . '/salt';
}
/**
* Get the path to the Geo DB file.
*/
public static function getGeoDBPath(): string
{
return self::getUsageStatsDirPath() . '/IPGeoDB.mmdb';
}
/**
* Get document type based on the mimetype
* The mimetypes considered here are subset of those used in PKPFileService::getDocumentType()
*
* @return int One of the StatisticsHelper::STATISTICS_FILE_TYPE_ constants
*/
public static function getDocumentType(string $mimetype): int
{
switch ($mimetype) {
case 'application/pdf':
case 'application/x-pdf':
case 'text/pdf':
case 'text/x-pdf':
return self::STATISTICS_FILE_TYPE_PDF;
case 'application/msword':
case 'application/word':
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
return self::STATISTICS_FILE_TYPE_DOC;
case 'text/html':
return self::STATISTICS_FILE_TYPE_HTML;
default:
return self::STATISTICS_FILE_TYPE_OTHER;
}
}
/**
* Hash (SHA256) the given IP using the given SALT.
*
* NB: This implementation was taken from OA-S directly. See
* http://sourceforge.net/p/openaccessstati/code-0/3/tree/trunk/logfile-parser/lib/logutils.php
* We just do not implement the PHP4 part as OJS dropped PHP4 support.
*
*/
public static function hashIp(string $ip, string $salt): string
{
return hash('sha256', $ip . $salt);
}
/**
* Create a new salt, write it to the salt file and return it
*/
public static function createNewSalt(string $saltFileName): string
{
if (function_exists('mcrypt_create_iv')) {
$newSalt = bin2hex(mcrypt_create_iv(16, MCRYPT_DEV_URANDOM | MCRYPT_RAND));
} elseif (function_exists('openssl_random_pseudo_bytes')) {
$newSalt = bin2hex(openssl_random_pseudo_bytes(16, $cstrong));
} elseif (file_exists('/dev/urandom')) {
$newSalt = bin2hex(file_get_contents('/dev/urandom', false, null, 0, 16));
} else {
$newSalt = random_int(0, PHP_INT_MAX);
}
file_put_contents($saltFileName, $newSalt, LOCK_EX);
return $newSalt;
}
/**
* Retrieve Geo data (country, region, city) using IP and based on the site i.e. context settings
*/
public function getGeoData(Site $site, Context $context, string $ip, string $hashedIp, bool $flushCache = false): array
{
$country = $region = $city = null;
$enableGeoUsageStats = $context->getEnableGeoUsageStats($site);
if ($enableGeoUsageStats != 'disabled') {
$geoIPArray = $this->getLocation($ip, $hashedIp, $flushCache);
$country = $geoIPArray['country'];
if ($enableGeoUsageStats == self::STATISTICS_SETTING_CITY || $enableGeoUsageStats == self::STATISTICS_SETTING_REGION) {
$region = $geoIPArray['region'];
if ($enableGeoUsageStats == self::STATISTICS_SETTING_CITY) {
$city = $geoIPArray['city'];
}
}
}
return [$country, $region, $city];
}
/**
* Get location based on the IP, use cache if exists.
*
* @param string $ip User IP
* @param string $hashedIp Hashed user IP
* @param bool $flush If true empty cache
*
* @return array Cached Geo data
* [
* hashedIP => [
* 'country' => string Country ISO code,
* 'region' => string Region ISO code
* 'city' => string City name
* ]
* ]
*
*/
public function getLocation(string $ip, string $hashedIp, bool $flush = false): array
{
if (!isset($this->geoDataCache)) {
$geoCacheManager = CacheManager::getManager();
/** @var FileCache */
$this->geoDataCache = $geoCacheManager->getCache('geoIP', 'all', [&$this, 'geoDataCacheMiss']);
}
if ($flush) {
// Salt and thus hashed IPs changed, empty the cache.
$this->geoDataCacheMiss($this->geoDataCache);
}
$cachedGeoData = $this->geoDataCache->getContents();
if (array_key_exists($hashedIp, $cachedGeoData)) {
return $cachedGeoData[$hashedIp];
}
$reader = $countryIsoCode = $regionIsoCode = $cityName = null;
try {
$reader = new Reader($this->getGeoDBPath());
} catch (\MaxMind\Db\Reader\InvalidDatabaseException $e) {
error_log('There was a problem reading the Geo database at ' . $this->getGeoDBPath() . '. Error: ' . $e->getMessage());
} catch (InvalidArgumentException $e) {
error_log('There was a problem reading the Geo database at ' . $this->getGeoDBPath() . '. Error: ' . $e->getMessage());
}
if (isset($reader)) {
try {
$geoIPRecord = $reader->city($ip);
$countryIsoCode = $geoIPRecord->country->isoCode;
// When found, up to three characters long subdivision portion of the ISO 3166-2 code is returned
// s. https://github.com/maxmind/GeoIP2-php/blob/main/src/Record/Subdivision.php#L20
$regionIsoCode = $geoIPRecord->mostSpecificSubdivision->isoCode;
// DB-IP IP to City Lite database does not provide region Iso code but name,
// thus try to get the region Iso code by the name,
// but we need country for that
if (!isset($regionIsoCode) && isset($countryIsoCode)) {
$regionName = $geoIPRecord->mostSpecificSubdivision->name;
if (isset($regionName)) {
$isoCodes = app(IsoCodesFactory::class);
$allCountryRegions = $isoCodes->getSubdivisions()->getAllByCountryCode($countryIsoCode);
foreach ($allCountryRegions as $countryRegion) {
if ($countryRegion->getName() == $regionName) {
$regionIsoCodeArray = explode('-', $countryRegion->getCode());
$regionIsoCode = $regionIsoCodeArray[1];
break;
}
}
}
}
$cityName = $geoIPRecord->city->name;
} catch (\BadMethodCallException $e) {
error_log('There was a problem using city method on the Geo database at ' . $this->getGeoDBPath() . '. Error: ' . $e->getMessage());
} catch (\GeoIp2\Exception\AddressNotFoundException $e) {
error_log('There was a problem finding IP in the Geo database at ' . $this->getGeoDBPath() . '. Error: ' . $e->getMessage());
} catch (\MaxMind\Db\Reader\InvalidDatabaseException $e) {
error_log('There was a problem reading the Geo database at ' . $this->getGeoDBPath() . '. Error: ' . $e->getMessage());
}
}
$cachedGeoData[$hashedIp]['country'] = $countryIsoCode;
$cachedGeoData[$hashedIp]['region'] = $regionIsoCode;
$cachedGeoData[$hashedIp]['city'] = $cityName;
$this->geoDataCache->setEntireCache($cachedGeoData);
return $cachedGeoData[$hashedIp];
}
/**
* Geo cache miss callback.
*/
public function geoDataCacheMiss(FileCache $cache): array
{
$cache->setEntireCache([]);
return [];
}
/**
* Get institution IDs for a given context based on the IP, use cache if exists.
*
* @param string $contextId Context ID
* @param string $ip User IP
* @param string $hashedIp Hashed user IP
* @param bool $flush If true empty cache
*
* @return array Cached Geo data
* [
* hashedIP => contextId => institutionIds[]
* ]
*
*/
public function getInstitutionIds(int $contextId, string $ip, string $hashedIp, bool $flush = false): array
{
if (!isset($this->institutionDataCache)) {
$institutionCacheManager = CacheManager::getManager();
/** @var FileCache */
$this->institutionDataCache = $institutionCacheManager->getCache('institutionIP', 'all', [&$this, 'institutionDataCacheMiss']);
}
if ($flush) {
// Salt and thus hashed IPs changed, empty the cache.
$this->institutionDataCacheMiss($this->institutionDataCache);
}
$cachedInstitutionData = $this->institutionDataCache->getContents();
if (array_key_exists($hashedIp, $cachedInstitutionData) && array_key_exists($contextId, $cachedInstitutionData[$hashedIp])) {
return $cachedInstitutionData[$hashedIp][$contextId];
}
$institutionIds = Repo::institution()->getCollector()
->filterByContextIds([$contextId])
->filterByIps([$ip])
->getIds()
->toArray();
$cachedInstitutionData[$hashedIp][$contextId] = $institutionIds;
$this->institutionDataCache->setEntireCache($cachedInstitutionData);
return $cachedInstitutionData[$hashedIp][$contextId];
}
/**
* Institution cache miss callback.
*/
public function institutionDataCacheMiss(FileCache $cache): array
{
$cache->setEntireCache([]);
return [];
}
}
if (!PKP_STRICT_MODE) {
class_alias('\PKP\statistics\PKPStatisticsHelper', '\PKPStatisticsHelper');
foreach ([
'STATISTICS_DIMENSION_CONTEXT_ID',
'STATISTICS_DIMENSION_SUBMISSION_ID',
'STATISTICS_DIMENSION_REPRESENTATION_ID',
'STATISTICS_DIMENSION_ASSOC_TYPE',
'STATISTICS_DIMENSION_FILE_TYPE',
'STATISTICS_DIMENSION_YEAR',
'STATISTICS_DIMENSION_MONTH',
'STATISTICS_DIMENSION_DAY',
'STATISTICS_DIMENSION_DATE',
'STATISTICS_DIMENSION_COUNTRY',
'STATISTICS_DIMENSION_REGION',
'STATISTICS_DIMENSION_CITY',
'STATISTICS_METRIC',
'STATISTICS_METRIC_UNIQUE',
'STATISTICS_ORDER_ASC',
'STATISTICS_ORDER_DESC',
'STATISTICS_FILE_TYPE_HTML',
'STATISTICS_FILE_TYPE_PDF',
'STATISTICS_FILE_TYPE_OTHER',
'STATISTICS_FILE_TYPE_DOC',
'STATISTICS_EARLIEST_DATE',
'COUNTER_DOUBLE_CLICK_TIME_FILTER_SECONDS',
] as $constantName) {
define($constantName, constant('\PKPStatisticsHelper::' . $constantName));
}
}
@@ -0,0 +1,209 @@
<?php
/**
* @file classes/statistics/PKPTemporaryItemInvestigationsDAO.php
*
* Copyright (c) 2022 Simon Fraser University
* Copyright (c) 2022 John Willinsky
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
*
* @class PKPTemporaryItemInvestigationsDAO
*
* @ingroup statistics
*
* @brief Operations for retrieving and adding unique item (submission) investigations (abstract, primary and supp file views).
*/
namespace PKP\statistics;
use Illuminate\Support\Facades\DB;
use PKP\config\Config;
use PKP\db\DAORegistry;
class PKPTemporaryItemInvestigationsDAO
{
/**
* The name of the table.
* This table contains all usage (clicks) for an item (submission),
* considering abstract, primary and supp file views.
*/
public string $table = 'usage_stats_unique_item_investigations_temporary_records';
/**
* Add the passed usage statistic record.
*/
public function insert(object $entryData, int $lineNumber, string $loadId): void
{
$insertData = $this->getInsertData($entryData);
$insertData['line_number'] = $lineNumber;
$insertData['load_id'] = $loadId;
DB::table($this->table)->insert($insertData);
}
/**
* Get Laravel optimized array of data to insert into the table based on the log entry
*/
protected function getInsertData(object $entryData): array
{
return [
'date' => $entryData->time,
'ip' => $entryData->ip,
'user_agent' => substr($entryData->userAgent, 0, 255),
'context_id' => $entryData->contextId,
'submission_id' => $entryData->submissionId,
'representation_id' => $entryData->representationId,
'submission_file_id' => $entryData->submissionFileId,
'assoc_type' => $entryData->assocType,
'file_type' => $entryData->fileType,
'country' => !empty($entryData->country) ? $entryData->country : '',
'region' => !empty($entryData->region) ? $entryData->region : '',
'city' => !empty($entryData->city) ? $entryData->city : '',
];
}
/**
* Delete all temporary records associated
* with the passed load id.
*/
public function deleteByLoadId(string $loadId): void
{
DB::table($this->table)->where('load_id', '=', $loadId)->delete();
}
/**
* Remove Unique Clicks
* If multiple transactions represent the same item and occur in the same user-sessions, only one unique activity MUST be counted for that item.
* Unique item is a submission.
* A user session is defined by the combination of IP address + user agent + transaction date + hour of day.
* Only the last unique activity will be retained (and thus counted), all the other will be removed.
*
* See https://www.projectcounter.org/code-of-practice-five-sections/7-processing-rules-underlying-counter-reporting-data/#counting
*/
public function compileUniqueClicks(string $loadId): void
{
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
DB::statement(
"
DELETE FROM {$this->table} usui
WHERE EXISTS (
SELECT * FROM (
SELECT 1 FROM {$this->table} usuit
WHERE usui.load_id = ? AND usuit.load_id = usui.load_id AND
usuit.context_id = usui.context_id AND
usuit.ip = usui.ip AND
usuit.user_agent = usui.user_agent AND
usuit.submission_id = usui.submission_id AND
EXTRACT(HOUR FROM usuit.date) = EXTRACT(HOUR FROM usui.date) AND
usui.line_number < usuit.line_number
) AS tmp
)
",
[$loadId]
);
} else {
DB::statement(
"
DELETE FROM usui USING {$this->table} usui
INNER JOIN {$this->table} usuit ON (
usuit.load_id = usui.load_id AND
usuit.context_id = usui.context_id AND
usuit.ip = usui.ip AND
usuit.user_agent = usui.user_agent AND
usuit.submission_id = usui.submission_id
)
WHERE usui.load_id = ? AND
TIMESTAMPDIFF(HOUR, usui.date, usuit.date) = 0 AND
usui.line_number < usuit.line_number
",
[$loadId]
);
}
}
/**
* Load unique geographical usage on the submission level
*/
public function compileSubmissionGeoDailyMetrics(string $loadId): void
{
// construct metric_unique upsert
$metricUniqueUpsertSql = "
INSERT INTO metrics_submission_geo_daily (load_id, context_id, submission_id, date, country, region, city, metric, metric_unique)
SELECT * FROM (SELECT load_id, context_id, submission_id, DATE(date) as date, country, region, city, 0 as metric, count(*) as metric_unique_tmp
FROM {$this->table}
WHERE load_id = ? AND submission_id IS NOT NULL AND (country <> '' OR region <> '' OR city <> '')
GROUP BY load_id, context_id, submission_id, DATE(date), country, region, city) AS t
";
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
$metricUniqueUpsertSql .= '
ON CONFLICT ON CONSTRAINT msgd_uc_load_context_submission_c_r_c_date DO UPDATE
SET metric_unique = excluded.metric_unique;
';
} else {
$metricUniqueUpsertSql .= '
ON DUPLICATE KEY UPDATE metric_unique = metric_unique_tmp;
';
}
DB::statement($metricUniqueUpsertSql, [$loadId]);
}
/**
* Load unique COUNTER item (submission) investigations
*/
public function compileCounterSubmissionDailyMetrics(string $loadId): void
{
// construct metric_investigations_unique upsert
$metricInvestigationsUniqueUpsertSql = "
INSERT INTO metrics_counter_submission_daily (load_id, context_id, submission_id, date, metric_investigations, metric_investigations_unique, metric_requests, metric_requests_unique)
SELECT * FROM (SELECT load_id, context_id, submission_id, DATE(date) as date, 0 as metric_investigations, count(*) as metric, 0 as metric_requests, 0 as metric_requests_unique
FROM {$this->table}
WHERE load_id = ? AND submission_id IS NOT NULL
GROUP BY load_id, context_id, submission_id, DATE(date)) AS t
";
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
$metricInvestigationsUniqueUpsertSql .= '
ON CONFLICT ON CONSTRAINT msd_uc_load_id_context_id_submission_id_date DO UPDATE
SET metric_investigations_unique = excluded.metric_investigations_unique;
';
} else {
$metricInvestigationsUniqueUpsertSql .= '
ON DUPLICATE KEY UPDATE metric_investigations_unique = metric;
';
}
DB::statement($metricInvestigationsUniqueUpsertSql, [$loadId]);
}
/**
* Load unique institutional COUNTER item (submission) investigations
*/
public function compileCounterSubmissionInstitutionDailyMetrics(string $loadId): void
{
// construct metric_investigations_unique upsert
$metricInvestigationsUniqueUpsertSql = "
INSERT INTO metrics_counter_submission_institution_daily (load_id, context_id, submission_id, date, institution_id, metric_investigations, metric_investigations_unique, metric_requests, metric_requests_unique)
SELECT * FROM (
SELECT usui.load_id, usui.context_id, usui.submission_id, DATE(usui.date) as date, usi.institution_id, 0 as metric_investigations, count(*) as metric, 0 as metric_requests, 0 as metric_requests_unique
FROM {$this->table} usui
JOIN usage_stats_institution_temporary_records usi on (usi.load_id = usui.load_id AND usi.line_number = usui.line_number)
WHERE usui.load_id = ? AND submission_id IS NOT NULL AND usi.institution_id = ?
GROUP BY usui.load_id, usui.context_id, usui.submission_id, DATE(usui.date), usi.institution_id) AS t
";
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
$metricInvestigationsUniqueUpsertSql .= '
ON CONFLICT ON CONSTRAINT msid_uc_load_id_context_id_submission_id_institution_id_date DO UPDATE
SET metric_investigations_unique = excluded.metric_investigations_unique;
';
} else {
$metricInvestigationsUniqueUpsertSql .= '
ON DUPLICATE KEY UPDATE metric_investigations_unique = metric;
';
}
/** @var TemporaryInstitutionsDAO */
$temporaryInstitutionsDAO = DAORegistry::getDAO('TemporaryInstitutionsDAO');
$institutionIds = $temporaryInstitutionsDAO->getInstitutionIdsByLoadId($loadId);
foreach ($institutionIds as $institutionId) {
DB::statement($metricInvestigationsUniqueUpsertSql, [$loadId, (int) $institutionId]);
}
}
}
@@ -0,0 +1,183 @@
<?php
/**
* @file classes/statistics/PKPTemporaryItemRequestsDAO.php
*
* Copyright (c) 2022 Simon Fraser University
* Copyright (c) 2022 John Willinsky
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
*
* @class PKPTemporaryItemRequestsDAO
*
* @ingroup statistics
*
* @brief Operations for retrieving and adding unique item (submission) requests (primary files downloads).
*/
namespace PKP\statistics;
use APP\core\Application;
use Illuminate\Support\Facades\DB;
use PKP\config\Config;
use PKP\db\DAORegistry;
class PKPTemporaryItemRequestsDAO
{
/**
* The name of the table.
* This table contains all primary files downloads.
*/
public string $table = 'usage_stats_unique_item_requests_temporary_records';
/**
* Add the passed usage statistic record.
*/
public function insert(object $entryData, int $lineNumber, string $loadId): void
{
$insertData = $this->getInsertData($entryData);
$insertData['line_number'] = $lineNumber;
$insertData['load_id'] = $loadId;
DB::table($this->table)->insert($insertData);
}
/**
* Get Laravel optimized array of data to insert into the table based on the log entry
*/
protected function getInsertData(object $entryData): array
{
return [
'date' => $entryData->time,
'ip' => $entryData->ip,
'user_agent' => substr($entryData->userAgent, 0, 255),
'context_id' => $entryData->contextId,
'submission_id' => $entryData->submissionId,
'representation_id' => $entryData->representationId,
'submission_file_id' => $entryData->submissionFileId,
'assoc_type' => $entryData->assocType,
'file_type' => $entryData->fileType,
'country' => !empty($entryData->country) ? $entryData->country : '',
'region' => !empty($entryData->region) ? $entryData->region : '',
'city' => !empty($entryData->city) ? $entryData->city : '',
];
}
/**
* Delete all temporary records associated
* with the passed load id.
*/
public function deleteByLoadId(string $loadId): void
{
DB::table($this->table)->where('load_id', '=', $loadId)->delete();
}
/**
* Remove unique clicks
* If multiple transactions represent the same item and occur in the same user-sessions, only one unique activity MUST be counted for that item.
* Unique item is a submission.
* A user session is defined by the combination of IP address + user agent + transaction date + hour of day.
* Only the last unique activity will be retained (and thus counted), all the other will be removed.
*
* See https://www.projectcounter.org/code-of-practice-five-sections/7-processing-rules-underlying-counter-reporting-data/#counting
*/
public function compileUniqueClicks(string $loadId): void
{
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
DB::statement(
"
DELETE FROM {$this->table} usur
WHERE EXISTS (
SELECT * FROM (
SELECT 1 FROM {$this->table} usurt
WHERE usur.load_id = ? AND usurt.load_id = usur.load_id AND
usurt.context_id = usur.context_id AND
usurt.ip = usur.ip AND
usurt.user_agent = usur.user_agent AND
usurt.submission_id = usur.submission_id AND
EXTRACT(HOUR FROM usurt.date) = EXTRACT(HOUR FROM usur.date) AND
usur.line_number < usurt.line_number
) AS tmp
)
",
[$loadId]
);
} else {
DB::statement(
"
DELETE FROM usur USING {$this->table} usur
INNER JOIN {$this->table} usurt ON (
usurt.load_id = usur.load_id AND
usurt.context_id = usur.context_id AND
usurt.ip = usur.ip AND
usurt.user_agent = usur.user_agent AND
usurt.submission_id = usur.submission_id
)
WHERE usur.load_id = ? AND TIMESTAMPDIFF(HOUR, usur.date, usurt.date) = 0 AND usur.line_number < usurt.line_number
",
[$loadId]
);
}
}
/**
* Load unique COUNTER item (submission) requests (primary files downloads)
*/
public function compileCounterSubmissionDailyMetrics(string $loadId): void
{
// construct metric_requests_unique upsert
// assoc_type should always be Application::ASSOC_TYPE_SUBMISSION_FILE, but include the condition however
$metricRequestsUniqueUpsertSql = "
INSERT INTO metrics_counter_submission_daily (load_id, context_id, submission_id, date, metric_investigations, metric_investigations_unique, metric_requests, metric_requests_unique)
SELECT * FROM (SELECT load_id, context_id, submission_id, DATE(date) as date, 0 as metric_investigations, 0 as metric_investigations_unique, 0 as metric_requests, count(*) as metric
FROM {$this->table}
WHERE load_id = ? AND assoc_type = ?
GROUP BY load_id, context_id, submission_id, DATE(date)) AS t
";
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
$metricRequestsUniqueUpsertSql .= '
ON CONFLICT ON CONSTRAINT msd_uc_load_id_context_id_submission_id_date DO UPDATE
SET metric_requests_unique = excluded.metric_requests_unique;
';
} else {
$metricRequestsUniqueUpsertSql .= '
ON DUPLICATE KEY UPDATE metric_requests_unique = metric;
';
}
DB::statement($metricRequestsUniqueUpsertSql, [$loadId, Application::ASSOC_TYPE_SUBMISSION_FILE]);
}
/**
* Load unique institutional COUNTER item (submission) requests (primary files downloads)
*/
public function compileCounterSubmissionInstitutionDailyMetrics(string $loadId): void
{
// construct metric_requests_unique upsert
// assoc_type should always be Application::ASSOC_TYPE_SUBMISSION_FILE, but include the condition however
$metricRequestsUniqueUpsertSql = "
INSERT INTO metrics_counter_submission_institution_daily (load_id, context_id, submission_id, date, institution_id, metric_investigations, metric_investigations_unique, metric_requests, metric_requests_unique)
SELECT * FROM (
SELECT usur.load_id, usur.context_id, usur.submission_id, DATE(usur.date) as date, usi.institution_id, 0 as metric_investigations, 0 as metric_investigations_unique, 0 as metric_requests, count(*) as metric
FROM {$this->table} usur
JOIN usage_stats_institution_temporary_records usi on (usi.load_id = usur.load_id AND usi.line_number = usur.line_number)
WHERE usur.load_id = ? AND usur.assoc_type = ? AND usi.institution_id = ?
GROUP BY usur.load_id, usur.context_id, usur.submission_id, DATE(usur.date), usi.institution_id) AS t
";
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
$metricRequestsUniqueUpsertSql .= '
ON CONFLICT ON CONSTRAINT msid_uc_load_id_context_id_submission_id_institution_id_date DO UPDATE
SET metric_requests_unique = excluded.metric_requests_unique;
';
} else {
$metricRequestsUniqueUpsertSql .= '
ON DUPLICATE KEY UPDATE metric_requests_unique = metric;
';
}
/** @var TemporaryInstitutionsDAO */
$temporaryInstitutionsDAO = DAORegistry::getDAO('TemporaryInstitutionsDAO');
$institutionIds = $temporaryInstitutionsDAO->getInstitutionIdsByLoadId($loadId);
foreach ($institutionIds as $institutionId) {
DB::statement($metricRequestsUniqueUpsertSql, [$loadId, Application::ASSOC_TYPE_SUBMISSION_FILE, (int) $institutionId]);
}
}
}
@@ -0,0 +1,320 @@
<?php
/**
* @file classes/statistics/PKPTemporaryTotalsDAO.php
*
* Copyright (c) 2022 Simon Fraser University
* Copyright (c) 2022 John Willinsky
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
*
* @class PKPTemporaryTotalsDAO
*
* @ingroup statistics
*
* @brief Operations for retrieving and adding total usage.
*
* It considers:
* context index page views,
* submission abstract, primary and supp file views,
* geo submission usage,
* COUNTER submission stats.
*/
namespace PKP\statistics;
use APP\core\Application;
use DateTimeImmutable;
use Illuminate\Support\Facades\DB;
use PKP\config\Config;
use PKP\db\DAORegistry;
abstract class PKPTemporaryTotalsDAO
{
/**
* The name of the table. This table contains all usage events.
*/
public string $table = 'usage_stats_total_temporary_records';
/**
* Add the passed usage statistic record.
*/
public function insert(object $entryData, int $lineNumber, string $loadId): void
{
$insertData = $this->getInsertData($entryData);
$insertData['line_number'] = $lineNumber;
$insertData['load_id'] = $loadId;
DB::table($this->table)->insert($insertData);
}
/**
* Get Laravel optimized array of data to insert into the table based on the log entry
*/
protected function getInsertData(object $entryData): array
{
return [
'date' => $entryData->time,
'ip' => $entryData->ip,
'user_agent' => substr($entryData->userAgent, 0, 255),
'canonical_url' => $entryData->canonicalUrl,
'context_id' => $entryData->contextId,
'submission_id' => $entryData->submissionId,
'representation_id' => $entryData->representationId,
'submission_file_id' => $entryData->submissionFileId,
'assoc_type' => $entryData->assocType,
'file_type' => $entryData->fileType,
'country' => !empty($entryData->country) ? $entryData->country : '',
'region' => !empty($entryData->region) ? $entryData->region : '',
'city' => !empty($entryData->city) ? $entryData->city : '',
];
}
/**
* Delete all temporary records associated
* with the passed load id.
*/
public function deleteByLoadId(string $loadId): void
{
DB::table($this->table)->where('load_id', '=', $loadId)->delete();
}
/**
* Remove Double Clicks according to COUNTER guidelines
* Remove the potential of over-counting which could occur when a user clicks the same link multiple times.
* Double-clicks, i.e. two clicks in succession, on a link by the same user within a 30-second period MUST be counted as one action.
* When two actions are made for the same URL within 30 seconds the first request MUST be removed and the second retained.
* A user is identified by IP address combined with the browsers user-agent.
*
* See https://www.projectcounter.org/code-of-practice-five-sections/7-processing-rules-underlying-counter-reporting-data/#doubleclick
*/
public function removeDoubleClicks(string $loadId, int $counterDoubleClickTimeFilter): void
{
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
DB::statement(
"
DELETE FROM {$this->table} ust
WHERE EXISTS (
SELECT * FROM (
SELECT 1 FROM {$this->table} ustt
WHERE ust.load_id = ? AND ustt.load_id = ust.load_id AND
ustt.context_id = ust.context_id AND
ustt.ip = ust.ip AND ustt.user_agent = ust.user_agent AND ustt.canonical_url = ust.canonical_url AND
EXTRACT(EPOCH FROM (ustt.date - ust.date)) < ? AND
EXTRACT(EPOCH FROM (ustt.date - ust.date)) > 0 AND
ust.line_number < ustt.line_number) AS tmp
)
",
[$loadId, $counterDoubleClickTimeFilter]
);
} else {
DB::statement(
"
DELETE FROM ust USING {$this->table} ust
INNER JOIN {$this->table} ustt ON (
ustt.load_id = ust.load_id AND
ustt.context_id = ust.context_id AND
ustt.ip = ust.ip AND
ustt.user_agent = ust.user_agent AND
ustt.canonical_url = ust.canonical_url
)
WHERE ust.load_id = ? AND
TIMESTAMPDIFF(SECOND, ust.date, ustt.date) < ? AND
TIMESTAMPDIFF(SECOND, ust.date, ustt.date) > 0 AND
ust.line_number < ustt.line_number
",
[$loadId, $counterDoubleClickTimeFilter]
);
}
}
/**
* Load usage for context index pages
*/
public function compileContextMetrics(string $loadId): void
{
$date = DateTimeImmutable::createFromFormat('Ymd', substr($loadId, -12, 8));
DB::table('metrics_context')->where('load_id', '=', $loadId)->orWhereDate('date', '=', $date)->delete();
$selectContextMetrics = DB::table($this->table)
->select(DB::raw('load_id, context_id, DATE(date) as date, count(*) as metric'))
->where('load_id', '=', $loadId)
->where('assoc_type', '=', Application::getContextAssocType())
->groupBy(DB::raw('load_id, context_id, DATE(date)'));
DB::table('metrics_context')->insertUsing(['load_id', 'context_id', 'date', 'metric'], $selectContextMetrics);
}
/**
* Load usage for submissions (abstract, primary and supp files)
*/
public function compileSubmissionMetrics(string $loadId): void
{
$date = DateTimeImmutable::createFromFormat('Ymd', substr($loadId, -12, 8));
DB::table('metrics_submission')->where('load_id', '=', $loadId)->orWhereDate('date', '=', $date)->delete();
$selectSubmissionMetrics = DB::table($this->table)
->select(DB::raw('load_id, context_id, submission_id, assoc_type, DATE(date) as date, count(*) as metric'))
->where('load_id', '=', $loadId)
->where('assoc_type', '=', Application::ASSOC_TYPE_SUBMISSION)
->groupBy(DB::raw('load_id, context_id, submission_id, assoc_type, DATE(date)'));
DB::table('metrics_submission')->insertUsing(['load_id', 'context_id', 'submission_id', 'assoc_type', 'date', 'metric'], $selectSubmissionMetrics);
$selectSubmissionFileMetrics = DB::table($this->table)
->select(DB::raw('load_id, context_id, submission_id, representation_id, submission_file_id, file_type, assoc_type, DATE(date) as date, count(*) as metric'))
->where('load_id', '=', $loadId)
->where('assoc_type', '=', Application::ASSOC_TYPE_SUBMISSION_FILE)
->groupBy(DB::raw('load_id, context_id, submission_id, representation_id, submission_file_id, file_type, assoc_type, DATE(date)'));
DB::table('metrics_submission')->insertUsing(['load_id', 'context_id', 'submission_id', 'representation_id', 'submission_file_id', 'file_type', 'assoc_type', 'date', 'metric'], $selectSubmissionFileMetrics);
$selectSubmissionSuppFileMetrics = DB::table($this->table)
->select(DB::raw('load_id, context_id, submission_id, representation_id, submission_file_id, file_type, assoc_type, DATE(date) as date, count(*) as metric'))
->where('load_id', '=', $loadId)
->where('assoc_type', '=', Application::ASSOC_TYPE_SUBMISSION_FILE_COUNTER_OTHER)
->groupBy(DB::raw('load_id, context_id, submission_id, representation_id, submission_file_id, file_type, assoc_type, DATE(date)'));
DB::table('metrics_submission')->insertUsing(['load_id', 'context_id', 'submission_id', 'representation_id', 'submission_file_id', 'file_type', 'assoc_type', 'date', 'metric'], $selectSubmissionSuppFileMetrics);
}
// For the DB tables that contain also the unique metrics, this deletion by loadId is in a separate function,
// differently to the deletion for the tables above (metrics_context, metrics_issue and metrics_submission)
// The total metrics will be loaded here (s. load... functions below), unique metrics are loaded in UnsageStatsUnique... classes
public function deleteSubmissionGeoDailyByLoadId(string $loadId): void
{
$date = DateTimeImmutable::createFromFormat('Ymd', substr($loadId, -12, 8));
DB::table('metrics_submission_geo_daily')->where('load_id', '=', $loadId)->orWhereDate('date', '=', $date)->delete();
}
public function deleteCounterSubmissionDailyByLoadId(string $loadId): void
{
$date = DateTimeImmutable::createFromFormat('Ymd', substr($loadId, -12, 8));
DB::table('metrics_counter_submission_daily')->where('load_id', '=', $loadId)->orWhereDate('date', '=', $date)->delete();
}
public function deleteCounterSubmissionInstitutionDailyByLoadId(string $loadId): void
{
$date = DateTimeImmutable::createFromFormat('Ymd', substr($loadId, -12, 8));
DB::table('metrics_counter_submission_institution_daily')->where('load_id', '=', $loadId)->orWhereDate('date', '=', $date)->delete();
}
/**
* Load total geographical usage on the submission level
*/
public function compileSubmissionGeoDailyMetrics(string $loadId): void
{
// construct metric upsert
$metricUpsertSql = "
INSERT INTO metrics_submission_geo_daily (load_id, context_id, submission_id, date, country, region, city, metric, metric_unique)
SELECT * FROM (SELECT load_id, context_id, submission_id, DATE(date) as date, country, region, city, count(*) as metric_tmp, 0 as metric_unique
FROM {$this->table}
WHERE load_id = ? AND submission_id IS NOT NULL AND (country <> '' OR region <> '' OR city <> '')
GROUP BY load_id, context_id, submission_id, DATE(date), country, region, city) AS t
";
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
$metricUpsertSql .= '
ON CONFLICT ON CONSTRAINT msgd_uc_load_context_submission_c_r_c_date DO UPDATE
SET metric = excluded.metric;
';
} else {
$metricUpsertSql .= '
ON DUPLICATE KEY UPDATE metric = metric_tmp;
';
}
DB::statement($metricUpsertSql, [$loadId]);
}
/**
* Load total COUNTER submission usage (investigations and requests)
*/
public function compileCounterSubmissionDailyMetrics(string $loadId): void
{
// construct metric_investigations upsert
$metricInvestigationsUpsertSql = "
INSERT INTO metrics_counter_submission_daily (load_id, context_id, submission_id, date, metric_investigations, metric_investigations_unique, metric_requests, metric_requests_unique)
SELECT * FROM (SELECT load_id, context_id, submission_id, DATE(date) as date, count(*) as metric, 0 as metric_investigations_unique, 0 as metric_requests, 0 as metric_requests_unique
FROM {$this->table}
WHERE load_id = ? AND submission_id IS NOT NULL
GROUP BY load_id, context_id, submission_id, DATE(date)) AS t
";
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
$metricInvestigationsUpsertSql .= '
ON CONFLICT ON CONSTRAINT msd_uc_load_id_context_id_submission_id_date DO UPDATE
SET metric_investigations = excluded.metric_investigations;
';
} else {
$metricInvestigationsUpsertSql .= '
ON DUPLICATE KEY UPDATE metric_investigations = metric;
';
}
DB::statement($metricInvestigationsUpsertSql, [$loadId]);
// construct metric_requests upsert
$metricRequestsUpsertSql = "
INSERT INTO metrics_counter_submission_daily (load_id, context_id, submission_id, date, metric_investigations, metric_investigations_unique, metric_requests, metric_requests_unique)
SELECT * FROM (SELECT load_id, context_id, submission_id, DATE(date) as date, 0 as metric_investigations, 0 as metric_investigations_unique, count(*) as metric, 0 as metric_requests_unique
FROM {$this->table}
WHERE load_id = ? AND assoc_type = ?
GROUP BY load_id, context_id, submission_id, DATE(date)) AS t
";
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
$metricRequestsUpsertSql .= '
ON CONFLICT ON CONSTRAINT msd_uc_load_id_context_id_submission_id_date DO UPDATE
SET metric_requests = excluded.metric_requests;
';
} else {
$metricRequestsUpsertSql .= '
ON DUPLICATE KEY UPDATE metric_requests = metric;
';
}
DB::statement($metricRequestsUpsertSql, [$loadId, Application::ASSOC_TYPE_SUBMISSION_FILE]);
}
/**
* Load total institutional COUNTER submission usage (investigations and requests)
*/
public function compileCounterSubmissionInstitutionDailyMetrics(string $loadId): void
{
// construct metric_investigations upsert
$metricInvestigationsUpsertSql = "
INSERT INTO metrics_counter_submission_institution_daily (load_id, context_id, submission_id, date, institution_id, metric_investigations, metric_investigations_unique, metric_requests, metric_requests_unique)
SELECT * FROM (
SELECT ustt.load_id, ustt.context_id, ustt.submission_id, DATE(ustt.date) as date, usit.institution_id, count(*) as metric, 0 as metric_investigations_unique, 0 as metric_requests, 0 as metric_requests_unique
FROM {$this->table} ustt
JOIN usage_stats_institution_temporary_records usit on (usit.load_id = ustt.load_id AND usit.line_number = ustt.line_number)
WHERE ustt.load_id = ? AND submission_id IS NOT NULL AND usit.institution_id = ?
GROUP BY ustt.load_id, ustt.context_id, ustt.submission_id, DATE(ustt.date), usit.institution_id) AS t
";
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
$metricInvestigationsUpsertSql .= '
ON CONFLICT ON CONSTRAINT msid_uc_load_id_context_id_submission_id_institution_id_date DO UPDATE
SET metric_investigations = excluded.metric_investigations;
';
} else {
$metricInvestigationsUpsertSql .= '
ON DUPLICATE KEY UPDATE metric_investigations = metric;
';
}
// construct metric_requests upsert
$metricRequestsUpsertSql = "
INSERT INTO metrics_counter_submission_institution_daily (load_id, context_id, submission_id, date, institution_id, metric_investigations, metric_investigations_unique, metric_requests, metric_requests_unique)
SELECT * FROM (
SELECT ustt.load_id, ustt.context_id, ustt.submission_id, DATE(ustt.date) as date, usit.institution_id, 0 as metric_investigations, 0 as metric_investigations_unique, count(*) as metric, 0 as metric_requests_unique
FROM {$this->table} ustt
JOIN usage_stats_institution_temporary_records usit on (usit.load_id = ustt.load_id AND usit.line_number = ustt.line_number)
WHERE ustt.load_id = ? AND ustt.assoc_type = ? AND usit.institution_id = ?
GROUP BY ustt.load_id, ustt.context_id, ustt.submission_id, DATE(ustt.date), usit.institution_id) AS t
";
if (substr(Config::getVar('database', 'driver'), 0, strlen('postgres')) === 'postgres') {
$metricRequestsUpsertSql .= '
ON CONFLICT ON CONSTRAINT msid_uc_load_id_context_id_submission_id_institution_id_date DO UPDATE
SET metric_requests = excluded.metric_requests;
';
} else {
$metricRequestsUpsertSql .= '
ON DUPLICATE KEY UPDATE metric_requests = metric;
';
}
/** @var TemporaryInstitutionsDAO */
$temporaryInstitutionsDAO = DAORegistry::getDAO('TemporaryInstitutionsDAO');
$institutionIds = $temporaryInstitutionsDAO->getInstitutionIdsByLoadId($loadId);
foreach ($institutionIds as $institutionId) {
DB::statement($metricInvestigationsUpsertSql, [$loadId, (int) $institutionId]);
DB::statement($metricRequestsUpsertSql, [$loadId, Application::ASSOC_TYPE_SUBMISSION_FILE, (int) $institutionId]);
}
}
}
@@ -0,0 +1,62 @@
<?php
/**
* @file classes/statistics/TemporaryInstitutionsDAO.php
*
* Copyright (c) 2022 Simon Fraser University
* Copyright (c) 2022 John Willinsky
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
*
* @class TemporaryInstitutionsDAO
*
* @ingroup statistics
*
* @brief Operations for retrieving and adding the normalized data for institutions usage stats temporary records.
*/
namespace PKP\statistics;
use Illuminate\Support\Collection;
use Illuminate\Support\Facades\DB;
class TemporaryInstitutionsDAO
{
/** The name of the table */
public string $table = 'usage_stats_institution_temporary_records';
/**
* Insert the institution ids to normalize the data in temporary tables.
*/
public function insert(array $institutionIds, int $lineNumber, string $loadId): void
{
foreach ($institutionIds as $institutionId) {
DB::table($this->table)->insert([
'load_id' => $loadId,
'line_number' => $lineNumber,
'institution_id' => $institutionId
]);
}
}
/**
* Delete all records associated
* with the passed load id.
*/
public function deleteByLoadId(string $loadId): void
{
DB::table($this->table)->where('load_id', '=', $loadId)->delete();
}
/**
* Retrieve all distinct institution IDs for the given load id.
*/
public function getInstitutionIdsByLoadId(string $loadId): Collection
{
$institutionIds = DB::table($this->table)
->select('institution_id')
->distinct()
->where('load_id', '=', $loadId)
->pluck('institution_id');
return $institutionIds;
}
}