first commit

This commit is contained in:
CHIEFSOFT\ameye
2024-06-08 17:09:23 -04:00
commit df3a033196
17887 changed files with 8637778 additions and 0 deletions
@@ -0,0 +1,75 @@
<?php
/**
* @file jobs/statistics/ArchiveUsageStatsLogFile.php
*
* Copyright (c) 2024 Simon Fraser University
* Copyright (c) 2024 John Willinsky
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
*
* @class ArchiveUsageStatsLogFile
*
* @ingroup jobs
*
* @brief Archive usage stats log file.
*/
namespace PKP\jobs\statistics;
use APP\statistics\StatisticsHelper;
use Exception;
use PKP\file\FileManager;
use PKP\job\exceptions\JobException;
use PKP\jobs\BaseJob;
use PKP\site\Site;
use PKP\task\FileLoader;
class ArchiveUsageStatsLogFile extends BaseJob
{
/**
* The load ID = usage stats log file name
*/
protected string $loadId;
protected Site $site;
/**
* Create a new job instance.
*/
public function __construct(string $loadId, Site $site)
{
parent::__construct();
$this->loadId = $loadId;
$this->site = $site;
}
/**
* Execute the job.
*/
public function handle(): void
{
// Move the archived file back to staging
$filename = $this->loadId;
$archiveFilePath = StatisticsHelper::getUsageStatsDirPath() . '/' . FileLoader::FILE_LOADER_PATH_ARCHIVE . '/' . $filename;
$dispatchFilePath = StatisticsHelper::getUsageStatsDirPath() . '/' . FileLoader::FILE_LOADER_PATH_DISPATCH . '/' . $filename;
if (!rename($dispatchFilePath, $archiveFilePath)) {
$message = __(
'admin.job.archiveLogFile.error',
[
'file' => $filename,
'dispatchFilePath' => $dispatchFilePath,
'archiveFilePath' => $archiveFilePath
]
);
throw new JobException($message);
}
if ($this->site->getData('compressStatsLogs')) {
try {
$fileMgr = new FileManager();
$fileMgr->gzCompressFile($archiveFilePath);
} catch (Exception $e) {
error_log($e);
}
}
}
}
@@ -0,0 +1,47 @@
<?php
/**
* @file jobs/statistics/CompileContextMetrics.php
*
* Copyright (c) 2024 Simon Fraser University
* Copyright (c) 2024 John Willinsky
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
*
* @class CompileContextMetrics
*
* @ingroup jobs
*
* @brief Compile context metrics.
*/
namespace PKP\jobs\statistics;
use APP\statistics\TemporaryTotalsDAO;
use PKP\db\DAORegistry;
use PKP\jobs\BaseJob;
class CompileContextMetrics extends BaseJob
{
/**
* The load ID = usage stats log file name
*/
protected string $loadId;
/**
* Create a new job instance.
*/
public function __construct(string $loadId)
{
parent::__construct();
$this->loadId = $loadId;
}
/**
* Execute the job.
*/
public function handle(): void
{
$temporaryTotalsDao = DAORegistry::getDAO('TemporaryTotalsDAO'); /** @var TemporaryTotalsDAO $temporaryTotalsDao */
$temporaryTotalsDao->compileContextMetrics($this->loadId);
}
}
@@ -0,0 +1,66 @@
<?php
/**
* @file jobs/statistics/CompileMonthlyMetrics.php
*
* Copyright (c) 2022 Simon Fraser University
* Copyright (c) 2022 John Willinsky
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
*
* @class CompileMonthlyMetrics
*
* @ingroup jobs
*
* @brief Compile and store monthly usage stats from the daily records.
*/
namespace PKP\jobs\statistics;
use APP\core\Services;
use PKP\jobs\BaseJob;
use PKP\site\Site;
class CompileMonthlyMetrics extends BaseJob
{
/**
* The month the usage metrics should be aggregated by, in format YYYYMM.
*/
protected string $month;
protected Site $site;
/**
* Create a new job instance.
*
* @param string $month In format YYYYMM
*/
public function __construct(string $month, Site $site)
{
parent::__construct();
$this->month = $month;
$this->site = $site;
}
/**
* Execute the job.
*/
public function handle(): void
{
$currentMonth = date('Ym');
$lastMonth = date('Ym', strtotime('last month'));
$geoService = Services::get('geoStats');
$geoService->deleteMonthlyMetrics($this->month);
$geoService->addMonthlyMetrics($this->month);
if (!$this->site->getData('keepDailyUsageStats') && $this->month != $currentMonth && $this->month != $lastMonth) {
$geoService->deleteDailyMetrics($this->month);
}
$counterService = Services::get('sushiStats');
$counterService->deleteMonthlyMetrics($this->month);
$counterService->addMonthlyMetrics($this->month);
if (!$this->site->getData('keepDailyUsageStats') && $this->month != $currentMonth && $this->month != $lastMonth) {
$counterService->deleteDailyMetrics($this->month);
}
}
}
@@ -0,0 +1,47 @@
<?php
/**
* @file jobs/statistics/CompileSubmissionMetrics.php
*
* Copyright (c) 2024 Simon Fraser University
* Copyright (c) 2024 John Willinsky
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
*
* @class CompileSubmissionMetrics
*
* @ingroup jobs
*
* @brief Compile submission metrics.
*/
namespace PKP\jobs\statistics;
use APP\statistics\TemporaryTotalsDAO;
use PKP\db\DAORegistry;
use PKP\jobs\BaseJob;
class CompileSubmissionMetrics extends BaseJob
{
/**
* The load ID = usage stats log file name
*/
protected string $loadId;
/**
* Create a new job instance.
*/
public function __construct(string $loadId)
{
parent::__construct();
$this->loadId = $loadId;
}
/**
* Execute the job.
*/
public function handle(): void
{
$temporaryTotalsDao = DAORegistry::getDAO('TemporaryTotalsDAO'); /** @var TemporaryTotalsDAO $temporaryTotalsDao */
$temporaryTotalsDao->compileSubmissionMetrics($this->loadId);
}
}
@@ -0,0 +1,185 @@
<?php
/**
* @file jobs/statistics/PKPProcessUsageStatsLogFile.php
*
* Copyright (c) 2024 Simon Fraser University
* Copyright (c) 2024 John Willinsky
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
*
* @class PKPProcessUsageStatsLogFile
*
* @ingroup jobs
*
* @brief Compile context metrics.
*/
namespace PKP\jobs\statistics;
use APP\statistics\StatisticsHelper;
use DateTime;
use Exception;
use PKP\core\Core;
use PKP\job\exceptions\JobException;
use PKP\jobs\BaseJob;
use PKP\task\FileLoader;
use SplFileObject;
abstract class PKPProcessUsageStatsLogFile extends BaseJob
{
/**
* Create a new job instance.
*
* @param string $loadId Usage stats log file name
*/
public function __construct(protected string $loadId)
{
parent::__construct();
}
/**
* Delete entries in usage stats temporary tables by loadId
*/
abstract protected function deleteByLoadId(): void;
/**
* Get valid assoc types that an usage event can contain
*/
abstract protected function getValidAssocTypes(): array;
/**
* Insert usage stats log entry into temporary tables
*/
abstract protected function insertTemporaryUsageStatsData(object $entry, int $lineNumber): void;
/**
* Execute the job.
*/
public function handle(): void
{
$filename = $this->loadId;
$dispatchFilePath = StatisticsHelper::getUsageStatsDirPath() . '/' . FileLoader::FILE_LOADER_PATH_DISPATCH . '/' . $filename;
if (!file_exists($dispatchFilePath)) {
throw new JobException(__(
'admin.job.processLogFile.fileNotFound',
['file' => $dispatchFilePath]
));
}
$this->process($dispatchFilePath);
}
/**
* Parse log file line by line and add the lines into the usage stats temporary DB tables.
*/
protected function process(string $dispatchFilePath): void
{
$splFileObject = new SplFileObject($dispatchFilePath, 'r');
if (!$splFileObject) {
// reject file -- move the file from dispatch to reject folder
$filename = $this->loadId;
$rejectFilePath = StatisticsHelper::getUsageStatsDirPath() . '/' . FileLoader::FILE_LOADER_PATH_REJECT . '/' . $filename;
if (!rename($dispatchFilePath, $rejectFilePath)) {
$message = __('admin.job.compileMetrics.returnToStaging.error', ['file' => $filename,
'dispatchFilePath' => $dispatchFilePath, 'rejectFilePath' => $rejectFilePath]);
error_log($message);
}
throw new JobException(__('admin.job.processLogFile.openFileFailed', ['file' => $dispatchFilePath]));
}
// Make sure we don't have any temporary records associated
// with the current load ID in database.
$this->deleteByLoadId();
$lineNumber = 0;
while (!$splFileObject->eof()) {
$lineNumber++;
$line = $splFileObject->fgets();
if (empty($line) || substr($line, 0, 1) === '#') {
continue;
} // Spacing or comment lines. This actually should not occur in the new format.
$entryData = json_decode($line);
if ($entryData === null) {
// This line is not in the right format.
$message = __(
'admin.job.processLogFile.wrongLoglineFormat',
['file' => $this->loadId, 'lineNumber' => $lineNumber]
);
error_log($message);
continue;
}
try {
$this->validateLogEntry($entryData);
} catch (Exception $e) {
$message = __(
'admin.job.processLogFile.invalidLogEntry',
['file' => $this->loadId, 'lineNumber' => $lineNumber, 'error' => $e->getMessage()]
);
error_log($message);
continue;
}
// Avoid bots.
if (Core::isUserAgentBot($entryData->userAgent)) {
continue;
}
$this->insertTemporaryUsageStatsData($entryData, $lineNumber);
}
//explicitly assign null, so that the file can be deleted
$splFileObject = null;
}
/**
* Validate the usage stats log entry
*
* @throws Exception.
*/
protected function validateLogEntry(object $entry): void
{
if (!$this->validateDate($entry->time)) {
throw new Exception(__('admin.job.processLogFile.invalidLogEntry.time'));
}
// check hashed IP ?
// check canonicalUrl ?
if (!is_int($entry->contextId)) {
throw new Exception(__('admin.job.processLogFile.invalidLogEntry.contextId'));
}
if (!empty($entry->submissionId) && !is_int($entry->submissionId)) {
throw new Exception(__('admin.job.processLogFile.invalidLogEntry.submissionId'));
}
$validAssocTypes = $this->getValidAssocTypes();
if (!in_array($entry->assocType, $validAssocTypes)) {
throw new Exception(__('admin.job.processLogFile.invalidLogEntry.assocType'));
}
$validFileTypes = [
StatisticsHelper::STATISTICS_FILE_TYPE_PDF,
StatisticsHelper::STATISTICS_FILE_TYPE_DOC,
StatisticsHelper::STATISTICS_FILE_TYPE_HTML,
StatisticsHelper::STATISTICS_FILE_TYPE_OTHER,
];
if (!empty($entry->fileType) && !in_array($entry->fileType, $validFileTypes)) {
throw new Exception(__('admin.job.processLogFile.invalidLogEntry.fileType'));
}
if (!empty($entry->country) && (!ctype_alpha($entry->country) || (strlen($entry->country) !== 2))) {
throw new Exception(__('admin.job.processLogFile.invalidLogEntry.country'));
}
if (!empty($entry->region) && (!ctype_alnum($entry->region) || (strlen($entry->region) > 3))) {
throw new Exception(__('admin.job.processLogFile.invalidLogEntry.region'));
}
if (!is_array($entry->institutionIds)) {
throw new Exception(__('admin.job.processLogFile.invalidLogEntry.institutionIds'));
}
}
/**
* Validate date, check if the date is a valid date and in requested format
*/
protected function validateDate(string $datetime, string $format = 'Y-m-d H:i:s'): bool
{
$d = DateTime::createFromFormat($format, $datetime);
return $d && $d->format($format) === $datetime;
}
}
@@ -0,0 +1,48 @@
<?php
/**
* @file jobs/statistics/RemoveDoubleClicks.php
*
* Copyright (c) 2024 Simon Fraser University
* Copyright (c) 2024 John Willinsky
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
*
* @class RemoveDoubleClicks
*
* @ingroup jobs
*
* @brief Remove Double Clicks according to COUNTER guidelines.
*/
namespace PKP\jobs\statistics;
use APP\statistics\StatisticsHelper;
use APP\statistics\TemporaryTotalsDAO;
use PKP\db\DAORegistry;
use PKP\jobs\BaseJob;
class RemoveDoubleClicks extends BaseJob
{
/**
* The load ID = usage stats log file name
*/
protected string $loadId;
/**
* Create a new job instance.
*/
public function __construct(string $loadId)
{
parent::__construct();
$this->loadId = $loadId;
}
/**
* Execute the job.
*/
public function handle(): void
{
$temporaryTotalsDao = DAORegistry::getDAO('TemporaryTotalsDAO'); /** @var TemporaryTotalsDAO $temporaryTotalsDao */
$temporaryTotalsDao->removeDoubleClicks($this->loadId, StatisticsHelper::COUNTER_DOUBLE_CLICK_TIME_FILTER_SECONDS);
}
}