356 lines
13 KiB
PHP
356 lines
13 KiB
PHP
<?php
|
|
|
|
/**
|
|
* @file classes/statistics/PKPStatisticsHelper.php
|
|
*
|
|
* Copyright (c) 2013-2021 Simon Fraser University
|
|
* Copyright (c) 2003-2021 John Willinsky
|
|
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
|
*
|
|
* @class PKPStatisticsHelper
|
|
*
|
|
* @ingroup statistics
|
|
*
|
|
* @brief Statistics helper class.
|
|
*
|
|
*/
|
|
|
|
namespace PKP\statistics;
|
|
|
|
use APP\facades\Repo;
|
|
use GeoIp2\Database\Reader;
|
|
use InvalidArgumentException;
|
|
use PKP\cache\CacheManager;
|
|
use PKP\cache\FileCache;
|
|
use PKP\context\Context;
|
|
use PKP\file\PrivateFileManager;
|
|
use PKP\site\Site;
|
|
use Sokil\IsoCodes\IsoCodesFactory;
|
|
|
|
abstract class PKPStatisticsHelper
|
|
{
|
|
// Dimensions:
|
|
// 1) publication object dimension:
|
|
public const STATISTICS_DIMENSION_CONTEXT_ID = 'context_id';
|
|
public const STATISTICS_DIMENSION_SUBMISSION_ID = 'submission_id';
|
|
public const STATISTICS_DIMENSION_ASSOC_TYPE = 'assoc_type';
|
|
public const STATISTICS_DIMENSION_FILE_TYPE = 'file_type';
|
|
public const STATISTICS_DIMENSION_SUBMISSION_FILE_ID = 'submission_file_id';
|
|
public const STATISTICS_DIMENSION_REPRESENTATION_ID = 'representation_id';
|
|
|
|
// 2) time dimension:
|
|
public const STATISTICS_DIMENSION_YEAR = 'year';
|
|
public const STATISTICS_DIMENSION_MONTH = 'month';
|
|
public const STATISTICS_DIMENSION_DAY = 'day'; // used as API parameter for timelines
|
|
public const STATISTICS_DIMENSION_DATE = 'date';
|
|
|
|
// 3) geography dimension:
|
|
public const STATISTICS_DIMENSION_COUNTRY = 'country';
|
|
public const STATISTICS_DIMENSION_REGION = 'region';
|
|
public const STATISTICS_DIMENSION_CITY = 'city';
|
|
|
|
// Metrics:
|
|
public const STATISTICS_METRIC = 'metric';
|
|
public const STATISTICS_METRIC_UNIQUE = 'metric_unique';
|
|
|
|
// Ordering:
|
|
public const STATISTICS_ORDER_ASC = 'ASC';
|
|
public const STATISTICS_ORDER_DESC = 'DESC';
|
|
|
|
// File type to be used in publication object dimension.
|
|
public const STATISTICS_FILE_TYPE_HTML = 1;
|
|
public const STATISTICS_FILE_TYPE_PDF = 2;
|
|
public const STATISTICS_FILE_TYPE_OTHER = 3;
|
|
public const STATISTICS_FILE_TYPE_DOC = 4;
|
|
|
|
// Set the earliest date used
|
|
public const STATISTICS_EARLIEST_DATE = '2001-01-01';
|
|
|
|
/** These are rules defined by the COUNTER project.
|
|
* See https://www.projectcounter.org/code-of-practice-five-sections/7-processing-rules-underlying-counter-reporting-data/#doubleclick
|
|
*/
|
|
public const COUNTER_DOUBLE_CLICK_TIME_FILTER_SECONDS = 30;
|
|
|
|
// geotraphy settings
|
|
public const STATISTICS_SETTING_COUNTRY = 'country';
|
|
public const STATISTICS_SETTING_REGION = 'country+region';
|
|
public const STATISTICS_SETTING_CITY = 'country+region+city';
|
|
|
|
public FileCache $geoDataCache;
|
|
public FileCache $institutionDataCache;
|
|
|
|
/**
|
|
* Get the usage stats directory path.
|
|
*/
|
|
public static function getUsageStatsDirPath(): string
|
|
{
|
|
$fileMgr = new PrivateFileManager();
|
|
return realpath($fileMgr->getBasePath()) . '/usageStats';
|
|
}
|
|
|
|
/**
|
|
* Get the path to the salt file.
|
|
*/
|
|
public static function getSaltFileName(): string
|
|
{
|
|
return self::getUsageStatsDirPath() . '/salt';
|
|
}
|
|
|
|
/**
|
|
* Get the path to the Geo DB file.
|
|
*/
|
|
public static function getGeoDBPath(): string
|
|
{
|
|
return self::getUsageStatsDirPath() . '/IPGeoDB.mmdb';
|
|
}
|
|
|
|
/**
|
|
* Get document type based on the mimetype
|
|
* The mimetypes considered here are subset of those used in PKPFileService::getDocumentType()
|
|
*
|
|
* @return int One of the StatisticsHelper::STATISTICS_FILE_TYPE_ constants
|
|
*/
|
|
public static function getDocumentType(string $mimetype): int
|
|
{
|
|
switch ($mimetype) {
|
|
case 'application/pdf':
|
|
case 'application/x-pdf':
|
|
case 'text/pdf':
|
|
case 'text/x-pdf':
|
|
return self::STATISTICS_FILE_TYPE_PDF;
|
|
case 'application/msword':
|
|
case 'application/word':
|
|
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
|
|
return self::STATISTICS_FILE_TYPE_DOC;
|
|
case 'text/html':
|
|
return self::STATISTICS_FILE_TYPE_HTML;
|
|
default:
|
|
return self::STATISTICS_FILE_TYPE_OTHER;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Hash (SHA256) the given IP using the given SALT.
|
|
*
|
|
* NB: This implementation was taken from OA-S directly. See
|
|
* http://sourceforge.net/p/openaccessstati/code-0/3/tree/trunk/logfile-parser/lib/logutils.php
|
|
* We just do not implement the PHP4 part as OJS dropped PHP4 support.
|
|
*
|
|
*/
|
|
public static function hashIp(string $ip, string $salt): string
|
|
{
|
|
return hash('sha256', $ip . $salt);
|
|
}
|
|
|
|
/**
|
|
* Create a new salt, write it to the salt file and return it
|
|
*/
|
|
public static function createNewSalt(string $saltFileName): string
|
|
{
|
|
if (function_exists('mcrypt_create_iv')) {
|
|
$newSalt = bin2hex(mcrypt_create_iv(16, MCRYPT_DEV_URANDOM | MCRYPT_RAND));
|
|
} elseif (function_exists('openssl_random_pseudo_bytes')) {
|
|
$newSalt = bin2hex(openssl_random_pseudo_bytes(16, $cstrong));
|
|
} elseif (file_exists('/dev/urandom')) {
|
|
$newSalt = bin2hex(file_get_contents('/dev/urandom', false, null, 0, 16));
|
|
} else {
|
|
$newSalt = random_int(0, PHP_INT_MAX);
|
|
}
|
|
file_put_contents($saltFileName, $newSalt, LOCK_EX);
|
|
return $newSalt;
|
|
}
|
|
|
|
/**
|
|
* Retrieve Geo data (country, region, city) using IP and based on the site i.e. context settings
|
|
*/
|
|
public function getGeoData(Site $site, Context $context, string $ip, string $hashedIp, bool $flushCache = false): array
|
|
{
|
|
$country = $region = $city = null;
|
|
$enableGeoUsageStats = $context->getEnableGeoUsageStats($site);
|
|
if ($enableGeoUsageStats != 'disabled') {
|
|
$geoIPArray = $this->getLocation($ip, $hashedIp, $flushCache);
|
|
$country = $geoIPArray['country'];
|
|
if ($enableGeoUsageStats == self::STATISTICS_SETTING_CITY || $enableGeoUsageStats == self::STATISTICS_SETTING_REGION) {
|
|
$region = $geoIPArray['region'];
|
|
if ($enableGeoUsageStats == self::STATISTICS_SETTING_CITY) {
|
|
$city = $geoIPArray['city'];
|
|
}
|
|
}
|
|
}
|
|
return [$country, $region, $city];
|
|
}
|
|
|
|
/**
|
|
* Get location based on the IP, use cache if exists.
|
|
*
|
|
* @param string $ip User IP
|
|
* @param string $hashedIp Hashed user IP
|
|
* @param bool $flush If true empty cache
|
|
*
|
|
* @return array Cached Geo data
|
|
* [
|
|
* hashedIP => [
|
|
* 'country' => string Country ISO code,
|
|
* 'region' => string Region ISO code
|
|
* 'city' => string City name
|
|
* ]
|
|
* ]
|
|
*
|
|
*/
|
|
public function getLocation(string $ip, string $hashedIp, bool $flush = false): array
|
|
{
|
|
if (!isset($this->geoDataCache)) {
|
|
$geoCacheManager = CacheManager::getManager();
|
|
/** @var FileCache */
|
|
$this->geoDataCache = $geoCacheManager->getCache('geoIP', 'all', [&$this, 'geoDataCacheMiss']);
|
|
}
|
|
|
|
if ($flush) {
|
|
// Salt and thus hashed IPs changed, empty the cache.
|
|
$this->geoDataCacheMiss($this->geoDataCache);
|
|
}
|
|
|
|
$cachedGeoData = $this->geoDataCache->getContents();
|
|
if (array_key_exists($hashedIp, $cachedGeoData)) {
|
|
return $cachedGeoData[$hashedIp];
|
|
}
|
|
|
|
$reader = $countryIsoCode = $regionIsoCode = $cityName = null;
|
|
try {
|
|
$reader = new Reader($this->getGeoDBPath());
|
|
} catch (\MaxMind\Db\Reader\InvalidDatabaseException $e) {
|
|
error_log('There was a problem reading the Geo database at ' . $this->getGeoDBPath() . '. Error: ' . $e->getMessage());
|
|
} catch (InvalidArgumentException $e) {
|
|
error_log('There was a problem reading the Geo database at ' . $this->getGeoDBPath() . '. Error: ' . $e->getMessage());
|
|
}
|
|
if (isset($reader)) {
|
|
try {
|
|
$geoIPRecord = $reader->city($ip);
|
|
$countryIsoCode = $geoIPRecord->country->isoCode;
|
|
// When found, up to three characters long subdivision portion of the ISO 3166-2 code is returned
|
|
// s. https://github.com/maxmind/GeoIP2-php/blob/main/src/Record/Subdivision.php#L20
|
|
$regionIsoCode = $geoIPRecord->mostSpecificSubdivision->isoCode;
|
|
// DB-IP IP to City Lite database does not provide region Iso code but name,
|
|
// thus try to get the region Iso code by the name,
|
|
// but we need country for that
|
|
if (!isset($regionIsoCode) && isset($countryIsoCode)) {
|
|
$regionName = $geoIPRecord->mostSpecificSubdivision->name;
|
|
if (isset($regionName)) {
|
|
$isoCodes = app(IsoCodesFactory::class);
|
|
$allCountryRegions = $isoCodes->getSubdivisions()->getAllByCountryCode($countryIsoCode);
|
|
foreach ($allCountryRegions as $countryRegion) {
|
|
if ($countryRegion->getName() == $regionName) {
|
|
$regionIsoCodeArray = explode('-', $countryRegion->getCode());
|
|
$regionIsoCode = $regionIsoCodeArray[1];
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
$cityName = $geoIPRecord->city->name;
|
|
} catch (\BadMethodCallException $e) {
|
|
error_log('There was a problem using city method on the Geo database at ' . $this->getGeoDBPath() . '. Error: ' . $e->getMessage());
|
|
} catch (\GeoIp2\Exception\AddressNotFoundException $e) {
|
|
error_log('There was a problem finding IP in the Geo database at ' . $this->getGeoDBPath() . '. Error: ' . $e->getMessage());
|
|
} catch (\MaxMind\Db\Reader\InvalidDatabaseException $e) {
|
|
error_log('There was a problem reading the Geo database at ' . $this->getGeoDBPath() . '. Error: ' . $e->getMessage());
|
|
}
|
|
}
|
|
$cachedGeoData[$hashedIp]['country'] = $countryIsoCode;
|
|
$cachedGeoData[$hashedIp]['region'] = $regionIsoCode;
|
|
$cachedGeoData[$hashedIp]['city'] = $cityName;
|
|
$this->geoDataCache->setEntireCache($cachedGeoData);
|
|
return $cachedGeoData[$hashedIp];
|
|
}
|
|
|
|
/**
|
|
* Geo cache miss callback.
|
|
*/
|
|
public function geoDataCacheMiss(FileCache $cache): array
|
|
{
|
|
$cache->setEntireCache([]);
|
|
return [];
|
|
}
|
|
|
|
/**
|
|
* Get institution IDs for a given context based on the IP, use cache if exists.
|
|
*
|
|
* @param string $contextId Context ID
|
|
* @param string $ip User IP
|
|
* @param string $hashedIp Hashed user IP
|
|
* @param bool $flush If true empty cache
|
|
*
|
|
* @return array Cached Geo data
|
|
* [
|
|
* hashedIP => contextId => institutionIds[]
|
|
* ]
|
|
*
|
|
*/
|
|
public function getInstitutionIds(int $contextId, string $ip, string $hashedIp, bool $flush = false): array
|
|
{
|
|
if (!isset($this->institutionDataCache)) {
|
|
$institutionCacheManager = CacheManager::getManager();
|
|
/** @var FileCache */
|
|
$this->institutionDataCache = $institutionCacheManager->getCache('institutionIP', 'all', [&$this, 'institutionDataCacheMiss']);
|
|
}
|
|
|
|
if ($flush) {
|
|
// Salt and thus hashed IPs changed, empty the cache.
|
|
$this->institutionDataCacheMiss($this->institutionDataCache);
|
|
}
|
|
|
|
$cachedInstitutionData = $this->institutionDataCache->getContents();
|
|
if (array_key_exists($hashedIp, $cachedInstitutionData) && array_key_exists($contextId, $cachedInstitutionData[$hashedIp])) {
|
|
return $cachedInstitutionData[$hashedIp][$contextId];
|
|
}
|
|
$institutionIds = Repo::institution()->getCollector()
|
|
->filterByContextIds([$contextId])
|
|
->filterByIps([$ip])
|
|
->getIds()
|
|
->toArray();
|
|
|
|
$cachedInstitutionData[$hashedIp][$contextId] = $institutionIds;
|
|
$this->institutionDataCache->setEntireCache($cachedInstitutionData);
|
|
return $cachedInstitutionData[$hashedIp][$contextId];
|
|
}
|
|
|
|
/**
|
|
* Institution cache miss callback.
|
|
*/
|
|
public function institutionDataCacheMiss(FileCache $cache): array
|
|
{
|
|
$cache->setEntireCache([]);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
if (!PKP_STRICT_MODE) {
|
|
class_alias('\PKP\statistics\PKPStatisticsHelper', '\PKPStatisticsHelper');
|
|
foreach ([
|
|
'STATISTICS_DIMENSION_CONTEXT_ID',
|
|
'STATISTICS_DIMENSION_SUBMISSION_ID',
|
|
'STATISTICS_DIMENSION_REPRESENTATION_ID',
|
|
'STATISTICS_DIMENSION_ASSOC_TYPE',
|
|
'STATISTICS_DIMENSION_FILE_TYPE',
|
|
'STATISTICS_DIMENSION_YEAR',
|
|
'STATISTICS_DIMENSION_MONTH',
|
|
'STATISTICS_DIMENSION_DAY',
|
|
'STATISTICS_DIMENSION_DATE',
|
|
'STATISTICS_DIMENSION_COUNTRY',
|
|
'STATISTICS_DIMENSION_REGION',
|
|
'STATISTICS_DIMENSION_CITY',
|
|
'STATISTICS_METRIC',
|
|
'STATISTICS_METRIC_UNIQUE',
|
|
'STATISTICS_ORDER_ASC',
|
|
'STATISTICS_ORDER_DESC',
|
|
'STATISTICS_FILE_TYPE_HTML',
|
|
'STATISTICS_FILE_TYPE_PDF',
|
|
'STATISTICS_FILE_TYPE_OTHER',
|
|
'STATISTICS_FILE_TYPE_DOC',
|
|
'STATISTICS_EARLIEST_DATE',
|
|
'COUNTER_DOUBLE_CLICK_TIME_FILTER_SECONDS',
|
|
] as $constantName) {
|
|
define($constantName, constant('\PKPStatisticsHelper::' . $constantName));
|
|
}
|
|
}
|