1085 lines
48 KiB
PHP
1085 lines
48 KiB
PHP
<?php
|
|
|
|
/**
|
|
* @file lib/pkp/classes/cliTool/traits/ConvertLogFile.php
|
|
*
|
|
* Copyright (c) 2022 Simon Fraser University
|
|
* Copyright (c) 2022 John Willinsky
|
|
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
|
*
|
|
* @class ConvertLogFile
|
|
*
|
|
* @brief Trait to convert usage stats log file (used in releases < 3.4) into the new format.
|
|
*
|
|
* Special cases from the release 2.x:
|
|
* HTML and remote galley:
|
|
* article/view/articleId/galleyId.
|
|
*
|
|
* Supp File:
|
|
* article/downloadSuppFile/articleId/galleyId
|
|
*/
|
|
|
|
namespace PKP\cliTool\traits;
|
|
|
|
use APP\core\Application;
|
|
use APP\facades\Repo;
|
|
use APP\statistics\StatisticsHelper;
|
|
use DateTime;
|
|
use Exception;
|
|
use PKP\core\Core;
|
|
use PKP\core\Registry;
|
|
use PKP\db\DAORegistry;
|
|
use PKP\file\FileManager;
|
|
use PKP\submission\Genre;
|
|
|
|
if (!defined('STDERR')) {
|
|
define('STDERR', fopen('php://stderr', 'w'));
|
|
}
|
|
|
|
trait ConvertLogFile
|
|
{
|
|
/** List of contexts by their paths */
|
|
public array $contextsByPath;
|
|
|
|
/**
|
|
* Constructor.
|
|
*/
|
|
public function __constructTrait()
|
|
{
|
|
$contextDao = Application::getContextDAO();
|
|
$contextFactory = $contextDao->getAll();
|
|
$this->contextsByPath = [];
|
|
while ($context = $contextFactory->next()) { /** @var \PKP\context\Context $context */
|
|
$this->contextsByPath[$context->getPath()] = $context;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the folder the log file is in.
|
|
*/
|
|
abstract public function getLogFileDir(): string;
|
|
|
|
/**
|
|
* Get regular expression to parse log file entries
|
|
*/
|
|
abstract public function getParseRegex(): string;
|
|
|
|
/**
|
|
* Get the datetime format used in the log file
|
|
*/
|
|
abstract public function getPhpDateTimeFormat(): string;
|
|
|
|
/**
|
|
* Weather the URL parameters are used instead of CGI PATH_INFO.
|
|
* This will determine how URLs are parsed.
|
|
*/
|
|
abstract public function isPathInfoDisabled(): bool;
|
|
|
|
/**
|
|
* Weather this is an apache access log file
|
|
*/
|
|
abstract public function isApacheAccessLogFile(): bool;
|
|
|
|
/**
|
|
* Convert log file into the new format.
|
|
*
|
|
* The old log file will be renamed: '_old' is added at the end of the file name.
|
|
*/
|
|
public function convert(string $fileName): void
|
|
{
|
|
$filePath = $this->getLogFileDir() . '/' . $fileName;
|
|
|
|
$pathParts = pathinfo($filePath);
|
|
$extension = $pathParts['extension'];
|
|
|
|
$newFilePath = $this->getLogFileDir() . '/' . $pathParts['filename'] . '_new.log';
|
|
if ($extension == 'gz') {
|
|
$fileMgr = new FileManager();
|
|
try {
|
|
$filePath = $fileMgr->gzDecompressFile($filePath);
|
|
} catch (Exception $e) {
|
|
fwrite(STDERR, $e->getMessage() . PHP_EOL);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
$fhandle = fopen($filePath, 'r');
|
|
if (!$fhandle) {
|
|
fwrite(STDERR, "Error: Can not open file {$filePath}." . PHP_EOL);
|
|
exit(2);
|
|
}
|
|
|
|
$fnewHandle = fopen($newFilePath, 'a+b');
|
|
if (!$fnewHandle) {
|
|
fwrite(STDERR, "Error: Can not open file {$newFilePath}." . PHP_EOL);
|
|
exit(3);
|
|
}
|
|
|
|
// Read the salt for IP hashing here and not for each line
|
|
$saltFileName = StatisticsHelper::getSaltFileName();
|
|
// Create salt file and salt for the first time
|
|
if (!file_exists($saltFileName)) {
|
|
$salt = StatisticsHelper::createNewSalt($saltFileName);
|
|
// Salt changed, flush the cache
|
|
$flushCache = true;
|
|
} else {
|
|
$salt = trim(file_get_contents($saltFileName));
|
|
$flushCache = false;
|
|
}
|
|
|
|
$lineNumber = 0;
|
|
$isSuccessful = false;
|
|
while (!feof($fhandle)) {
|
|
$newEntry = [];
|
|
$lineNumber++;
|
|
$line = trim(fgets($fhandle));
|
|
if (empty($line) || substr($line, 0, 1) === '#') {
|
|
continue;
|
|
} // Spacing or comment lines.
|
|
|
|
$entryData = $this->getDataFromLogEntry($line);
|
|
|
|
if (!$this->isLogEntryValid($entryData)) {
|
|
fwrite(STDERR, "Invalid log entry at line {$lineNumber}." . PHP_EOL);
|
|
continue;
|
|
}
|
|
|
|
// Avoid internal apache requests.
|
|
if ($entryData['url'] == '*') {
|
|
continue;
|
|
}
|
|
|
|
// Avoid non sucessful requests.
|
|
$sucessfulReturnCodes = [200, 304];
|
|
if (!in_array($entryData['returnCode'], $sucessfulReturnCodes)) {
|
|
continue;
|
|
}
|
|
|
|
$newEntry['time'] = $entryData['date'];
|
|
|
|
$ip = $entryData['ip'];
|
|
$ipNotHashed = filter_var($ip, FILTER_VALIDATE_IP);
|
|
if ($ipNotHashed) {
|
|
// valid IP address i.e. the IP is not hashed
|
|
$hashedIp = StatisticsHelper::hashIp($ip, $salt);
|
|
$newEntry['ip'] = $hashedIp;
|
|
} else {
|
|
// check if it is a string(64) i.e. sha256 ?
|
|
$newEntry['ip'] = $ip;
|
|
}
|
|
|
|
$newEntry['userAgent'] = $entryData['userAgent'];
|
|
$newEntry['canonicalUrl'] = $entryData['url'];
|
|
|
|
[
|
|
'workingAssocType' => $assocType,
|
|
'contextPaths' => $contextPaths,
|
|
'page' => $page,
|
|
'operation' => $op,
|
|
'args' => $args
|
|
] = $this->getUrlMatches($entryData['url'], $lineNumber);
|
|
|
|
if ($assocType && $contextPaths && $page && $op) {
|
|
$foundContextPath = current($contextPaths);
|
|
if (!array_key_exists($foundContextPath, $this->contextsByPath)) {
|
|
fwrite(STDERR, "Context with the path {$foundContextPath} does not exist." . PHP_EOL);
|
|
continue;
|
|
}
|
|
$context = $this->contextsByPath[$foundContextPath];
|
|
$newEntry['contextId'] = $context->getId();
|
|
|
|
$this->setAssoc($assocType, $op, $args, $newEntry);
|
|
if (!array_key_exists('assocType', $newEntry)) {
|
|
if (!$this->isApacheAccessLogFile()) {
|
|
fwrite(STDERR, "The URL {$entryData['url']} in the line number {$lineNumber} was not considered." . PHP_EOL);
|
|
}
|
|
continue;
|
|
}
|
|
} else {
|
|
continue;
|
|
}
|
|
|
|
// Geo data
|
|
$country = $region = $city = null;
|
|
if ($ipNotHashed) {
|
|
$statisticsHelper = new StatisticsHelper();
|
|
$site = Application::get()->getRequest()->getSite();
|
|
[$country, $region, $city] = $statisticsHelper->getGeoData($site, $context, $ip, $hashedIp, $flushCache);
|
|
}
|
|
$newEntry['country'] = $country;
|
|
$newEntry['region'] = $region;
|
|
$newEntry['city'] = $city;
|
|
|
|
// institutions IDs
|
|
$institutionIds = [];
|
|
if ($ipNotHashed && $context->isInstitutionStatsEnabled($site)) {
|
|
$institutionIds = $statisticsHelper->getInstitutionIds($context->getId(), $ip, $hashedIp, $flushCache);
|
|
}
|
|
$newEntry['institutionIds'] = $institutionIds;
|
|
|
|
$newEntry['version'] = Registry::get('appVersion');
|
|
|
|
// write to a new file
|
|
$newLogEntry = json_encode($newEntry) . PHP_EOL;
|
|
fwrite($fnewHandle, $newLogEntry);
|
|
$isSuccessful = true;
|
|
}
|
|
fclose($fhandle);
|
|
fclose($fnewHandle);
|
|
|
|
|
|
if ($isSuccessful) {
|
|
$renameToOldFilePath = $this->getLogFileDir() . '/' . $pathParts['filename'] . '_old.log';
|
|
if (!rename($filePath, $renameToOldFilePath)) {
|
|
fwrite(STDERR, "Error: Could not rename the file {$filePath} to {$renameToOldFilePath}." . PHP_EOL);
|
|
exit(4);
|
|
} else {
|
|
if (!$this->isApacheAccessLogFile() && !Application::isUnderMaintenance()) {
|
|
// This is not important information for the apache log file conversion --
|
|
// the file is in a temporary folder that will be removed.
|
|
echo "The original file is renamed to {$renameToOldFilePath}.\n";
|
|
}
|
|
}
|
|
if (!rename($newFilePath, $filePath)) {
|
|
fwrite(STDERR, "Error: Could not rename the new file {$newFilePath} to {$filePath}." . PHP_EOL);
|
|
exit(5);
|
|
} else {
|
|
if (!Application::isUnderMaintenance()) {
|
|
echo "File {$filePath} is converted.";
|
|
}
|
|
}
|
|
if ($extension == 'gz') {
|
|
try {
|
|
$renameToOldFilePath = $fileMgr->gzCompressFile($renameToOldFilePath);
|
|
$filePath = $fileMgr->gzCompressFile($filePath);
|
|
} catch (Exception $e) {
|
|
fwrite(STDERR, $e->getMessage() . PHP_EOL);
|
|
exit(6);
|
|
}
|
|
}
|
|
} else {
|
|
fwrite(STDERR, "Error: File {$filePath} could not be successfully converted." . PHP_EOL);
|
|
exit(7);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get data from the passed log entry.
|
|
*/
|
|
protected function getDataFromLogEntry(string $entry): array
|
|
{
|
|
$entryData = [];
|
|
if (preg_match($this->getParseRegex(), $entry, $m)) {
|
|
$associative = count(array_filter(array_keys($m), 'is_string')) > 0;
|
|
$entryData['ip'] = $associative ? $m['ip'] : $m[1];
|
|
$time = $associative ? $m['date'] : $m[2];
|
|
$dateTime = DateTime::createFromFormat($this->getPhpDateTimeFormat(), $time);
|
|
$entryData['date'] = $dateTime->format('Y-m-d H:i:s');
|
|
// Apache log file URL can be relative, is this OK to be so in the new format or what to do in that case?
|
|
$entryData['url'] = urldecode($associative ? $m['url'] : $m[3]);
|
|
$entryData['returnCode'] = $associative ? $m['returnCode'] : $m[4];
|
|
$entryData['userAgent'] = $associative ? $m['userAgent'] : $m[5];
|
|
}
|
|
return $entryData;
|
|
}
|
|
|
|
/**
|
|
* Validate a log entry.
|
|
* This maybe does not have much sense, but because it was used till now, we will leave it.
|
|
*/
|
|
protected function isLogEntryValid(array $entry): bool
|
|
{
|
|
if (empty($entry)) {
|
|
return false;
|
|
}
|
|
$date = $entry['date'];
|
|
if (!is_numeric($date) && $date <= 0) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Get assoc type, page, operation and args from the passed url,
|
|
* if it matches any that's defined in getExpectedPageAndOp().
|
|
*/
|
|
protected function getUrlMatches(string $url, int $lineNumber): array
|
|
{
|
|
$noMatchesReturner = [
|
|
'workingAssocType' => null,
|
|
'contextPaths' => null,
|
|
'page' => null,
|
|
'operation' => null,
|
|
'args' => null
|
|
];
|
|
|
|
$expectedPageAndOp = $this->getExpectedPageAndOp();
|
|
|
|
// Apache and usage stats plugin log files come with complete or partial base url,
|
|
// remove it so we can retrieve path, page, operation and args.
|
|
$url = Core::removeBaseUrl($url);
|
|
if ($url) {
|
|
$contextPaths = $this->getContextPaths($url, !$this->isPathInfoDisabled());
|
|
$page = Core::getPage($url, !$this->isPathInfoDisabled());
|
|
$operation = Core::getOp($url, !$this->isPathInfoDisabled());
|
|
$args = Core::getArgs($url, !$this->isPathInfoDisabled());
|
|
} else {
|
|
// Could not remove the base URL, can't go on.
|
|
fwrite(STDERR, "The line number {$lineNumber} contains an url that the system can't remove the base URL from." . PHP_EOL);
|
|
return $noMatchesReturner;
|
|
}
|
|
|
|
if ($this->isApacheAccessLogFile()) {
|
|
// in apache access log files there could be all kind of URLs, e.g.
|
|
// /favicon.ico, /plugins/..., /lib/pkp/...
|
|
// In that case stop here to look further.
|
|
if (empty(array_intersect($contextPaths, array_keys($this->contextsByPath)))) {
|
|
return $noMatchesReturner;
|
|
}
|
|
}
|
|
|
|
// See bug #8698#.
|
|
if (is_array($contextPaths) && !$page && $operation == 'index') {
|
|
$page = 'index';
|
|
}
|
|
|
|
if (empty($contextPaths) || !$page || !$operation) {
|
|
fwrite(STDERR, 'Either context paths, page or operation could not be parsed from the URL correctly.' . PHP_EOL);
|
|
return $noMatchesReturner;
|
|
}
|
|
|
|
$pageAndOperation = $page . '/' . $operation;
|
|
|
|
$pageAndOpMatch = false;
|
|
foreach ($expectedPageAndOp as $workingAssocType => $workingPageAndOps) {
|
|
foreach ($workingPageAndOps as $workingPageAndOp) {
|
|
if ($pageAndOperation == $workingPageAndOp) {
|
|
// Expected url, don't look any further.
|
|
$pageAndOpMatch = true;
|
|
break 2;
|
|
}
|
|
}
|
|
}
|
|
if ($pageAndOpMatch) {
|
|
return [
|
|
'workingAssocType' => $workingAssocType,
|
|
'contextPaths' => $contextPaths,
|
|
'page' => $page,
|
|
'operation' => $operation,
|
|
'args' => $args
|
|
];
|
|
} else {
|
|
if (!$this->isApacheAccessLogFile()) {
|
|
fwrite(STDERR, "No matching page and operation found on line number {$lineNumber}." . PHP_EOL);
|
|
}
|
|
return $noMatchesReturner;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the expected page and operation.
|
|
* They are grouped by the object type constant that
|
|
* they give access to.
|
|
*/
|
|
protected function getExpectedPageAndOp(): array
|
|
{
|
|
$pageAndOp = [
|
|
Application::getContextAssocType() => [
|
|
'index/index'
|
|
]
|
|
];
|
|
$application = Application::get();
|
|
$applicationName = $application->getName();
|
|
switch ($applicationName) {
|
|
case 'ojs2':
|
|
$pageAndOp = $pageAndOp + [
|
|
Application::ASSOC_TYPE_SUBMISSION_FILE_COUNTER_OTHER => [
|
|
'article/downloadSuppFile'],
|
|
Application::ASSOC_TYPE_SUBMISSION_FILE => [
|
|
'article/download'],
|
|
Application::ASSOC_TYPE_SUBMISSION => [
|
|
'article/view'],
|
|
Application::ASSOC_TYPE_ISSUE => [
|
|
'issue/view'],
|
|
Application::ASSOC_TYPE_ISSUE_GALLEY => [
|
|
'issue/download']
|
|
];
|
|
$pageAndOp[Application::getContextAssocType()][] = 'index';
|
|
break;
|
|
case 'omp':
|
|
// Before 3.4 OMP did not have chapter assoc type i.e. chapter landing page
|
|
// so no need to consider it here
|
|
$pageAndOp = $pageAndOp + [
|
|
Application::ASSOC_TYPE_SUBMISSION_FILE => [
|
|
'catalog/download'],
|
|
Application::ASSOC_TYPE_MONOGRAPH => [
|
|
'catalog/book'],
|
|
Application::ASSOC_TYPE_SERIES => [
|
|
'catalog/series']
|
|
];
|
|
$pageAndOp[Application::getContextAssocType()][] = 'catalog/index';
|
|
break;
|
|
case 'ops':
|
|
$pageAndOp = $pageAndOp + [
|
|
Application::ASSOC_TYPE_SUBMISSION_FILE => [
|
|
'preprint/download'],
|
|
Application::ASSOC_TYPE_SUBMISSION => [
|
|
'preprint/view']
|
|
];
|
|
$pageAndOp[Application::getContextAssocType()][] = 'index';
|
|
break;
|
|
default:
|
|
throw new Exception('Unrecognized application name.');
|
|
}
|
|
return $pageAndOp;
|
|
}
|
|
|
|
/**
|
|
* Get context paths present into the passed
|
|
* url information.
|
|
*/
|
|
protected static function getContextPaths(string $urlInfo, bool $isPathInfo): array
|
|
{
|
|
$contextPaths = [];
|
|
$application = Application::get();
|
|
$contextList = [$application->getContextName()]; // Was $application->getContextList();
|
|
$contextDepth = 1; // Was $application->getContextDepth();
|
|
|
|
if ($isPathInfo) {
|
|
// Split the path info into its constituents. Save all non-context
|
|
// path info in $contextPaths[$contextDepth]
|
|
// by limiting the explode statement.
|
|
$contextPaths = explode('/', trim((string) $urlInfo, '/'), $contextDepth + 1);
|
|
// Remove the part of the path info that is not relevant for context (if present)
|
|
unset($contextPaths[$contextDepth]);
|
|
} else {
|
|
// Retrieve context from url query string
|
|
foreach ($contextList as $key => $contextName) {
|
|
parse_str((string) parse_url($urlInfo, PHP_URL_QUERY), $userVarsFromUrl);
|
|
$contextPaths[$key] = $userVarsFromUrl[$contextName] ?? null;
|
|
}
|
|
}
|
|
|
|
// Canonicalize and clean context paths
|
|
for ($key = 0; $key < $contextDepth; $key++) {
|
|
$contextPaths[$key] = (
|
|
isset($contextPaths[$key]) && !empty($contextPaths[$key]) ?
|
|
$contextPaths[$key] : 'index'
|
|
);
|
|
$contextPaths[$key] = Core::cleanFileVar($contextPaths[$key]);
|
|
}
|
|
return $contextPaths;
|
|
}
|
|
|
|
/**
|
|
* Get the page present into
|
|
* the passed url information. It expects that urls
|
|
* were built using the system.
|
|
*/
|
|
protected static function getPage(string $urlInfo, bool $isPathInfo): string
|
|
{
|
|
$page = self::getUrlComponents($urlInfo, $isPathInfo, 0, 'page');
|
|
return Core::cleanFileVar(is_null($page) ? '' : $page);
|
|
}
|
|
|
|
/**
|
|
* Get the operation present into
|
|
* the passed url information. It expects that urls
|
|
* were built using the system.
|
|
*/
|
|
protected static function getOp(string $urlInfo, bool $isPathInfo): string
|
|
{
|
|
$operation = self::getUrlComponents($urlInfo, $isPathInfo, 1, 'op');
|
|
return Core::cleanFileVar(empty($operation) ? 'index' : $operation);
|
|
}
|
|
|
|
/**
|
|
* Get the arguments present into
|
|
* the passed url information (not GET/POST arguments,
|
|
* only arguments appended to the URL separated by "/").
|
|
* It expects that urls were built using the system.
|
|
*/
|
|
protected static function getArgs(string $urlInfo, bool $isPathInfo): array
|
|
{
|
|
return self::getUrlComponents($urlInfo, $isPathInfo, 2, 'path');
|
|
}
|
|
|
|
/**
|
|
* Get url components (page, operation and args)
|
|
* based on the passed offset.
|
|
*/
|
|
protected static function getUrlComponents(string $urlInfo, bool $isPathInfo, int $offset, string $varName = ''): mixed
|
|
{
|
|
$component = null;
|
|
|
|
$isArrayComponent = false;
|
|
if ($varName == 'path') {
|
|
$isArrayComponent = true;
|
|
}
|
|
if ($isPathInfo) {
|
|
$application = Application::get();
|
|
$contextDepth = 1; // Was $application->getContextDepth();
|
|
|
|
$vars = explode('/', trim($urlInfo, '/'));
|
|
if (count($vars) > $contextDepth + $offset) {
|
|
if ($isArrayComponent) {
|
|
$component = array_slice($vars, $contextDepth + $offset);
|
|
} else {
|
|
$component = $vars[$contextDepth + $offset];
|
|
}
|
|
}
|
|
} else {
|
|
parse_str((string) parse_url($urlInfo, PHP_URL_QUERY), $userVarsFromUrl);
|
|
$component = $userVarsFromUrl[$varName] ?? null;
|
|
}
|
|
|
|
if ($isArrayComponent) {
|
|
if (empty($component)) {
|
|
$component = [];
|
|
} elseif (!is_array($component)) {
|
|
$component = [$component];
|
|
}
|
|
}
|
|
|
|
return $component;
|
|
}
|
|
|
|
/**
|
|
* Set assoc type and IDs from the passed page, operation and arguments.
|
|
*/
|
|
protected function setAssoc(int $assocType, string $op, array $args, array &$newEntry): void
|
|
{
|
|
$application = Application::get();
|
|
$applicationName = $application->getName();
|
|
switch ($applicationName) {
|
|
case 'ojs2':
|
|
$this->setOJSAssoc($assocType, $args, $newEntry);
|
|
break;
|
|
case 'omp':
|
|
$this->setOMPAssoc($assocType, $args, $newEntry);
|
|
break;
|
|
case 'ops':
|
|
$this->setOPSAssoc($assocType, $args, $newEntry);
|
|
break;
|
|
default:
|
|
throw new Exception('Unrecognized application name!');
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Set assoc type and IDs from the passed page, operation and
|
|
* arguments specific to OJS.
|
|
*/
|
|
protected function setOJSAssoc(int $assocType, array $args, array &$newEntry): void
|
|
{
|
|
switch ($assocType) {
|
|
case Application::getContextAssocType():
|
|
// $newEntry['contextId'] has already been set
|
|
$newEntry['assocType'] = $assocType;
|
|
$newEntry['submissionId'] = null;
|
|
$newEntry['representationId'] = null;
|
|
$newEntry['submissionFileId'] = null;
|
|
$newEntry['fileType'] = null;
|
|
$newEntry['issueId'] = null;
|
|
$newEntry['issueGalleyId'] = null;
|
|
break;
|
|
|
|
case Application::ASSOC_TYPE_SUBMISSION:
|
|
if (!isset($args[0])) {
|
|
fwrite(STDERR, 'Missing submission ID URL parameter.' . PHP_EOL);
|
|
break;
|
|
}
|
|
$submissionId = (int) $args[0];
|
|
if (!Repo::submission()->exists($submissionId, $newEntry['contextId'])) {
|
|
fwrite(STDERR, "Submission with the ID {$submissionId} does not exist in the journal with the ID {$newEntry['contextId']}." . PHP_EOL);
|
|
break;
|
|
}
|
|
// If it is an older submission version, the arguments must be:
|
|
// $submissionId/version/$publicationId.
|
|
$representationId = null;
|
|
if (in_array('version', $args)) {
|
|
if ($args[1] !== 'version' || !isset($args[2])) {
|
|
fwrite(STDERR, 'The following arguments are expected and not found: <submissionId>/version/<publicationId>.' . PHP_EOL);
|
|
break;
|
|
}
|
|
$publicationId = (int) $args[2];
|
|
if (!Repo::publication()->exists($publicationId, $submissionId)) {
|
|
fwrite(STDERR, "Publication (submission version) with the ID {$publicationId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
|
|
break;
|
|
}
|
|
} elseif (count($args) == 2) {
|
|
// Consider usage stats log files from OJS releases 2.x:
|
|
// The URL article/view/{$articleId}/{$galleyId} was used for assoc type galley (HTML and remote galleys).
|
|
$representationId = (int) $args[1];
|
|
$galley = Repo::galley()->get($representationId);
|
|
$submissionFileId = $galley->getData('submissionFileId');
|
|
if (!$submissionFileId) {
|
|
fwrite(STDERR, 'This is a remote galley from release 2.x.' . PHP_EOL);
|
|
break;
|
|
}
|
|
$submissionFile = Repo::submissionFile()->get($submissionFileId, $submissionId);
|
|
if (!$submissionFile) {
|
|
fwrite(STDERR, "Submission file with the ID {$submissionFileId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
|
|
break;
|
|
}
|
|
// This should be then the HTML full text file
|
|
$newEntry['assocType'] = Application::ASSOC_TYPE_SUBMISSION_FILE;
|
|
$newEntry['submissionId'] = $submissionId;
|
|
$newEntry['representationId'] = $representationId;
|
|
$newEntry['submissionFileId'] = $submissionFileId;
|
|
$newEntry['fileType'] = StatisticsHelper::getDocumentType($submissionFile->getData('mimetype'));
|
|
$newEntry['issueId'] = null;
|
|
$newEntry['issueGalleyId'] = null;
|
|
break;
|
|
}
|
|
$newEntry['submissionId'] = $submissionId;
|
|
$newEntry['assocType'] = $assocType;
|
|
$newEntry['representationId'] = null;
|
|
$newEntry['submissionFileId'] = null;
|
|
$newEntry['fileType'] = null;
|
|
$newEntry['issueId'] = null;
|
|
$newEntry['issueGalleyId'] = null;
|
|
break;
|
|
|
|
case Application::ASSOC_TYPE_SUBMISSION_FILE:
|
|
if (!isset($args[0])) {
|
|
fwrite(STDERR, 'Missing submission ID URL parameter.' . PHP_EOL);
|
|
break;
|
|
}
|
|
if (!isset($args[1])) {
|
|
fwrite(STDERR, 'Missing galley ID URL parameter.' . PHP_EOL);
|
|
break;
|
|
}
|
|
|
|
$submissionId = (int) $args[0];
|
|
$submissionExists = Repo::submission()->exists($submissionId, $newEntry['contextId']);
|
|
if (!$submissionExists) {
|
|
fwrite(STDERR, "Submission with the ID {$submissionId} does not exist in the journal with the ID {$newEntry['contextId']}." . PHP_EOL);
|
|
break;
|
|
}
|
|
|
|
// If it is an older submission version, the arguments must be:
|
|
// $submissionId/version/$publicationId/$representationId/$submissionFileId.
|
|
// Consider also release 2.x where log files can contain URL
|
|
// download/$submissionId/$representationId i.e. without $submissionFileId argument.
|
|
$publicationId = $submissionFileId = null; // do not necessarily exist
|
|
if (in_array('version', $args)) {
|
|
// This is a newer log file and it should contain submissionId in this case
|
|
if ($args[1] !== 'version' || !isset($args[2]) || !isset($args[3]) || !isset($args[4])) {
|
|
fwrite(STDERR, 'The following arguments are expected and not found: <submissionId>/version/<publicationId>/<galleyId>/<fileId>.' . PHP_EOL);
|
|
break;
|
|
}
|
|
$publicationId = (int) $args[2];
|
|
$representationId = (int) $args[3];
|
|
$submissionFileId = (int) $args[4];
|
|
if (!Repo::publication()->exists($publicationId, $submissionId)) {
|
|
fwrite(STDERR, "Publication (submission version) with the ID {$publicationId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
|
|
break;
|
|
}
|
|
} else {
|
|
$representationId = (int) $args[1];
|
|
if (isset($args[2])) {
|
|
$submissionFileId = (int) $args[2];
|
|
}
|
|
}
|
|
|
|
$galley = Repo::galley()->get($representationId, $publicationId);
|
|
if (!$galley) {
|
|
fwrite(STDERR, "Galley with the ID {$representationId} does not exist." . PHP_EOL);
|
|
break;
|
|
}
|
|
if (!$submissionFileId) { // Log files from releases 2.x
|
|
$submissionFileId = $galley->getData('submissionFileId');
|
|
}
|
|
$submissionFile = Repo::submissionFile()->get($submissionFileId, $submissionId);
|
|
if (!$submissionFile) {
|
|
fwrite(STDERR, "Submission file with the ID {$submissionFileId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
|
|
break;
|
|
}
|
|
if ($galley->getData('submissionFileId') != $submissionFileId) {
|
|
// This check is relevant if representation and submission file ID are provided as arguments
|
|
fwrite(STDERR, "Submission file with the ID {$submissionFileId} does not belong to the galley with the ID {$representationId}." . PHP_EOL);
|
|
break;
|
|
}
|
|
|
|
// is this a full text or supp file
|
|
/** @var \PKP\submission\GenreDAO */
|
|
$genreDao = DAORegistry::getDAO('GenreDAO');
|
|
$genre = $genreDao->getById($submissionFile->getData('genreId'));
|
|
if ($genre->getCategory() != Genre::GENRE_CATEGORY_DOCUMENT || $genre->getSupplementary() || $genre->getDependent()) {
|
|
$newEntry['assocType'] = Application::ASSOC_TYPE_SUBMISSION_FILE_COUNTER_OTHER;
|
|
} else {
|
|
$newEntry['assocType'] = $assocType;
|
|
}
|
|
$newEntry['submissionId'] = $submissionId;
|
|
$newEntry['representationId'] = $representationId;
|
|
$newEntry['submissionFileId'] = $submissionFileId;
|
|
$newEntry['fileType'] = StatisticsHelper::getDocumentType($submissionFile->getData('mimetype'));
|
|
$newEntry['issueId'] = null;
|
|
$newEntry['issueGalleyId'] = null;
|
|
break;
|
|
|
|
case Application::ASSOC_TYPE_SUBMISSION_FILE_COUNTER_OTHER:
|
|
// This is the URL article/downloadSuppFile/articleId/suppFileId from a 2.x usage stats log file
|
|
if (!isset($args[0])) {
|
|
fwrite(STDERR, 'Missing submission ID URL parameter.' . PHP_EOL);
|
|
break;
|
|
}
|
|
if (!isset($args[1])) {
|
|
fwrite(STDERR, 'Missing supp file ID URL parameter.' . PHP_EOL);
|
|
break;
|
|
}
|
|
$submissionId = (int) $args[0];
|
|
$submission = Repo::submission()->get($submissionId, $newEntry['contextId']);
|
|
if (!$submission) {
|
|
fwrite(STDERR, "Submission with the ID {$submissionId} does not exist in the journal with the ID {$newEntry['contextId']}." . PHP_EOL);
|
|
break;
|
|
}
|
|
$publications = $submission->getData('publications');
|
|
foreach ($publications as $publication) {
|
|
foreach ($publication->getData('galleys') as $publicationGalley) {
|
|
$submissionFileId = $publicationGalley->getData('submissionFileId');
|
|
if ($submissionFileId) {
|
|
$submissionFile = Repo::submissionFile()->get($submissionFileId, $submissionId);
|
|
if ($submissionFile && $submissionFile->getData('old-supp-id') == $args[1]) {
|
|
// File found
|
|
$newEntry['assocType'] = $assocType;
|
|
$newEntry['submissionId'] = $submissionId;
|
|
$newEntry['representationId'] = $publicationGalley->getId();
|
|
$newEntry['submissionFileId'] = $submissionFileId;
|
|
$newEntry['fileType'] = StatisticsHelper::getDocumentType($submissionFile->getData('mimetype'));
|
|
$newEntry['issueId'] = null;
|
|
$newEntry['issueGalleyId'] = null;
|
|
break 3;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
fwrite(STDERR, 'Supp file could not be found.' . PHP_EOL);
|
|
break;
|
|
|
|
case Application::ASSOC_TYPE_ISSUE:
|
|
if (!isset($args[0])) {
|
|
fwrite(STDERR, 'Missing issue ID URL parameter.' . PHP_EOL);
|
|
break;
|
|
}
|
|
$issueId = (int) $args[0];
|
|
if (!Repo::issue()->exists($issueId, $newEntry['contextId'])) {
|
|
fwrite(STDERR, "Issue with the ID {$issueId} does not exist in the journal with the ID {$newEntry['contextId']}." . PHP_EOL);
|
|
break;
|
|
}
|
|
$newEntry['submissionId'] = null;
|
|
$newEntry['representationId'] = null;
|
|
$newEntry['submissionFileId'] = null;
|
|
$newEntry['fileType'] = null;
|
|
$newEntry['issueGalleyId'] = null;
|
|
$newEntry['issueId'] = $issueId;
|
|
$newEntry['assocType'] = $assocType;
|
|
break;
|
|
|
|
case Application::ASSOC_TYPE_ISSUE_GALLEY:
|
|
if (!isset($args[0])) {
|
|
fwrite(STDERR, 'Missing issue ID URL parameter.' . PHP_EOL);
|
|
break;
|
|
}
|
|
if (!isset($args[1])) {
|
|
fwrite(STDERR, 'Missing issue galley ID URL parameter.' . PHP_EOL);
|
|
break;
|
|
}
|
|
/** @var \APP\issue\IssueGalleyDAO */
|
|
$issueGalleyDao = DAORegistry::getDAO('IssueGalleyDAO');
|
|
$issueId = (int) $args[0];
|
|
if (!Repo::issue()->exists($issueId, $newEntry['contextId'])) {
|
|
fwrite(STDERR, "Issue with the ID {$issueId} does not exist in the journal with the ID {$newEntry['contextId']}." . PHP_EOL);
|
|
break;
|
|
}
|
|
$issueGalley = $issueGalleyDao->getByBestId($args[1], $issueId);
|
|
if (!$issueGalley) {
|
|
fwrite(STDERR, "Issue galley with the URL path or ID {$args[1]} does not exist in the issue with the ID {$issueId}." . PHP_EOL);
|
|
break;
|
|
}
|
|
$newEntry['submissionId'] = null;
|
|
$newEntry['representationId'] = null;
|
|
$newEntry['submissionFileId'] = null;
|
|
$newEntry['fileType'] = null;
|
|
$newEntry['issueId'] = $issueId;
|
|
$newEntry['issueGalleyId'] = $issueGalley->getId();
|
|
$newEntry['assocType'] = $assocType;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Set assoc type and IDs from the passed page, operation and
|
|
* arguments specific to OMP.
|
|
*/
|
|
protected function setOMPAssoc(int $assocType, array $args, array &$newEntry): void
|
|
{
|
|
switch ($assocType) {
|
|
case Application::getContextAssocType():
|
|
// $newEntry['contextId'] has already been set
|
|
$newEntry['assocType'] = $assocType;
|
|
$newEntry['submissionId'] = null;
|
|
$newEntry['representationId'] = null;
|
|
$newEntry['submissionFileId'] = null;
|
|
$newEntry['fileType'] = null;
|
|
$newEntry['chapterId'] = null;
|
|
$newEntry['seriesId'] = null;
|
|
break;
|
|
|
|
case Application::ASSOC_TYPE_SUBMISSION:
|
|
if (!isset($args[0])) {
|
|
fwrite(STDERR, 'Missing submission ID URL parameter.' . PHP_EOL);
|
|
break;
|
|
}
|
|
$submissionId = (int) $args[0];
|
|
if (!Repo::submission()->exists($submissionId, $newEntry['contextId'])) {
|
|
fwrite(STDERR, "Submission with the ID {$submissionId} does not exist in the press with the ID {$newEntry['contextId']}." . PHP_EOL);
|
|
break;
|
|
}
|
|
// If it is an older submission version, the arguments must be:
|
|
// $submissionId/version/$publicationId.
|
|
if (in_array('version', $args)) {
|
|
if ($args[1] !== 'version' || !isset($args[2])) {
|
|
fwrite(STDERR, 'The following arguments are expected and not found: <submissionId>/version/<publicationId>.' . PHP_EOL);
|
|
break;
|
|
}
|
|
$publicationId = (int) $args[2];
|
|
if (!Repo::publication()->exists($publicationId, $submissionId)) {
|
|
fwrite(STDERR, "Publication (submission version) with the ID {$publicationId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
|
|
break;
|
|
}
|
|
}
|
|
$newEntry['submissionId'] = $submissionId;
|
|
$newEntry['assocType'] = $assocType;
|
|
$newEntry['representationId'] = null;
|
|
$newEntry['submissionFileId'] = null;
|
|
$newEntry['fileType'] = null;
|
|
$newEntry['chapterId'] = null;
|
|
$newEntry['seriesId'] = null;
|
|
break;
|
|
|
|
case Application::ASSOC_TYPE_SUBMISSION_FILE:
|
|
if (!isset($args[0])) {
|
|
fwrite(STDERR, 'Missing submission ID URL parameter.' . PHP_EOL);
|
|
break;
|
|
}
|
|
if (!isset($args[1])) {
|
|
fwrite(STDERR, 'Missing publication format ID URL parameter.' . PHP_EOL);
|
|
break;
|
|
}
|
|
if (!isset($args[2])) {
|
|
fwrite(STDERR, 'Missing file or publication ID URL parameter.' . PHP_EOL);
|
|
break;
|
|
}
|
|
|
|
$submissionId = (int) $args[0];
|
|
$submissionExists = Repo::submission()->exists($submissionId, $newEntry['contextId']);
|
|
if (!$submissionExists) {
|
|
fwrite(STDERR, "Submission with the ID {$submissionId} does not exist in the press with the ID {$newEntry['contextId']}." . PHP_EOL);
|
|
break;
|
|
}
|
|
|
|
// If it is an older submission version, the arguments must be:
|
|
// $submissionId/version/$publicationId/$representationId/$submissionFileId.
|
|
$publicationId = null;
|
|
if (in_array('version', $args)) {
|
|
// This is a newer log file and it should contain submissionId in this case
|
|
if ($args[1] !== 'version' || !isset($args[2]) || !isset($args[3]) || !isset($args[4])) {
|
|
fwrite(STDERR, 'The following arguments are expected and not found: <submissionId>/version/<publicationId>/<publicationFormatId>/<fileId>.' . PHP_EOL);
|
|
break;
|
|
}
|
|
$publicationId = (int) $args[2];
|
|
$representationId = (int) $args[3];
|
|
$submissionFileId = (int) $args[4];
|
|
if (!Repo::publication()->exists($publicationId, $submissionId)) {
|
|
fwrite(STDERR, "Publication (submission version) with the ID {$publicationId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
|
|
break;
|
|
}
|
|
} else {
|
|
$representationId = (int) $args[1];
|
|
$submissionFileId = (int) $args[2];
|
|
}
|
|
|
|
$publicationFormatDao = DAORegistry::getDAO('PublicationFormatDAO'); /** @var PublicationFormatDAO $publicationFormatDao */
|
|
$publicationFormat = $publicationFormatDao->getById($representationId, $publicationId);
|
|
if (!$publicationFormat) {
|
|
fwrite(STDERR, "Publication format with the ID {$representationId} does not exist." . PHP_EOL);
|
|
break;
|
|
}
|
|
|
|
$submissionFile = Repo::submissionFile()->get($submissionFileId, $submissionId);
|
|
if (!$submissionFile) {
|
|
fwrite(STDERR, "Submission file with the ID {$submissionFileId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
|
|
break;
|
|
}
|
|
if ($submissionFile->getData('assocType') != Application::ASSOC_TYPE_PUBLICATION_FORMAT) {
|
|
fwrite(STDERR, "Submission file with the ID {$submissionFileId} does not belong to a publication format." . PHP_EOL);
|
|
break;
|
|
}
|
|
if ($representationId != $submissionFile->getData('assocId')) {
|
|
fwrite(STDERR, "Submission file with the ID {$submissionFileId} does not belong to the publication format with the ID {$representationId}." . PHP_EOL);
|
|
break;
|
|
}
|
|
|
|
// is this a full text or supp file
|
|
/** @var \PKP\submission\GenreDAO */
|
|
$genreDao = DAORegistry::getDAO('GenreDAO');
|
|
$genre = $genreDao->getById($submissionFile->getData('genreId'));
|
|
if ($genre->getCategory() != Genre::GENRE_CATEGORY_DOCUMENT || $genre->getSupplementary() || $genre->getDependent()) {
|
|
$newEntry['assocType'] = Application::ASSOC_TYPE_SUBMISSION_FILE_COUNTER_OTHER;
|
|
} else {
|
|
$newEntry['assocType'] = $assocType;
|
|
}
|
|
$newEntry['submissionId'] = $submissionId;
|
|
$newEntry['representationId'] = $representationId;
|
|
$newEntry['submissionFileId'] = $submissionFileId;
|
|
$newEntry['fileType'] = StatisticsHelper::getDocumentType($submissionFile->getData('mimetype'));
|
|
$newEntry['chapterId'] = $submissionFile->getData('chapterId');
|
|
$newEntry['seriesId'] = null;
|
|
break;
|
|
|
|
case Application::ASSOC_TYPE_SERIES:
|
|
if (!isset($args[0])) {
|
|
fwrite(STDERR, 'Missing series path URL parameter.' . PHP_EOL);
|
|
break;
|
|
}
|
|
$seriesPath = $args[0];
|
|
$series = Repo::section()->getByPath($seriesPath, $newEntry['contextId']);
|
|
if (!$series) {
|
|
fwrite(STDERR, "Series with the path {$seriesPath} does not exist in the press with the ID {$newEntry['contextId']}." . PHP_EOL);
|
|
break;
|
|
}
|
|
$newEntry['submissionId'] = null;
|
|
$newEntry['representationId'] = null;
|
|
$newEntry['submissionFileId'] = null;
|
|
$newEntry['fileType'] = null;
|
|
$newEntry['chapterId'] = null;
|
|
$newEntry['seriesId'] = $series->getId();
|
|
$newEntry['assocType'] = $assocType;
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Set assoc type and IDs from the passed page, operation and
|
|
* arguments specific to OPS.
|
|
*/
|
|
protected function setOPSAssoc(int $assocType, array $args, array &$newEntry): void
|
|
{
|
|
switch ($assocType) {
|
|
case Application::getContextAssocType():
|
|
// $newEntry['contextId'] has already been set
|
|
$newEntry['assocType'] = $assocType;
|
|
$newEntry['submissionId'] = null;
|
|
$newEntry['representationId'] = null;
|
|
$newEntry['submissionFileId'] = null;
|
|
$newEntry['fileType'] = null;
|
|
break;
|
|
|
|
case Application::ASSOC_TYPE_SUBMISSION:
|
|
if (!isset($args[0])) {
|
|
fwrite(STDERR, 'Missing submission ID URL parameter.' . PHP_EOL);
|
|
break;
|
|
}
|
|
$submissionId = (int) $args[0];
|
|
if (!Repo::submission()->exists($submissionId, $newEntry['contextId'])) {
|
|
fwrite(STDERR, "Submission with the ID {$submissionId} does not exist in the server with the ID {$newEntry['contextId']}." . PHP_EOL);
|
|
break;
|
|
}
|
|
// If it is an older submission version, the arguments must be:
|
|
// $submissionId/version/$publicationId.
|
|
if (in_array('version', $args)) {
|
|
if ($args[1] !== 'version' || !isset($args[2])) {
|
|
fwrite(STDERR, 'The following arguments are expected and not found: <submissionId>/version/<publicationId>.' . PHP_EOL);
|
|
break;
|
|
}
|
|
$publicationId = (int) $args[2];
|
|
if (!Repo::publication()->exists($publicationId, $submissionId)) {
|
|
fwrite(STDERR, "Publication (submission version) with the ID {$publicationId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
|
|
break;
|
|
}
|
|
}
|
|
$newEntry['submissionId'] = $submissionId;
|
|
$newEntry['assocType'] = $assocType;
|
|
$newEntry['representationId'] = null;
|
|
$newEntry['submissionFileId'] = null;
|
|
$newEntry['fileType'] = null;
|
|
break;
|
|
|
|
case Application::ASSOC_TYPE_SUBMISSION_FILE:
|
|
if (!isset($args[0])) {
|
|
fwrite(STDERR, 'Missing submission ID URL parameter.' . PHP_EOL);
|
|
break;
|
|
}
|
|
if (!isset($args[1])) {
|
|
fwrite(STDERR, 'Missing galley ID URL parameter.' . PHP_EOL);
|
|
break;
|
|
}
|
|
if (!isset($args[2])) {
|
|
fwrite(STDERR, 'Missing file or publication ID URL parameter.' . PHP_EOL);
|
|
break;
|
|
}
|
|
|
|
$submissionId = (int) $args[0];
|
|
$submissionExists = Repo::submission()->exists($submissionId, $newEntry['contextId']);
|
|
if (!$submissionExists) {
|
|
fwrite(STDERR, "Submission with the ID {$submissionId} does not exist in the server with the ID {$newEntry['contextId']}." . PHP_EOL);
|
|
break;
|
|
}
|
|
|
|
// If it is an older submission version, the arguments must be:
|
|
// $submissionId/version/$publicationId/$representationId/$submissionFileId.
|
|
$publicationId = null;
|
|
if (in_array('version', $args)) {
|
|
// This is a newer log file and it should contain submissionId in this case
|
|
if ($args[1] !== 'version' || !isset($args[2]) || !isset($args[3]) || !isset($args[4])) {
|
|
fwrite(STDERR, 'The following arguments are expected and not found: <submissionId>/version/<publicationId>/<galleyId>/<fileId>.' . PHP_EOL);
|
|
break;
|
|
}
|
|
$publicationId = (int) $args[2];
|
|
$representationId = (int) $args[3];
|
|
$submissionFileId = (int) $args[4];
|
|
if (!Repo::publication()->exists($publicationId, $submissionId)) {
|
|
fwrite(STDERR, "Publication (submission version) with the ID {$publicationId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
|
|
break;
|
|
}
|
|
} else {
|
|
$representationId = (int) $args[1];
|
|
$submissionFileId = (int) $args[2];
|
|
}
|
|
|
|
$galley = Repo::galley()->get($representationId, $publicationId);
|
|
if (!$galley) {
|
|
fwrite(STDERR, "Galley with the ID {$representationId} does not exist." . PHP_EOL);
|
|
break;
|
|
}
|
|
$submissionFile = Repo::submissionFile()->get($submissionFileId, $submissionId);
|
|
if (!$submissionFile) {
|
|
fwrite(STDERR, "Submission file with the ID {$submissionFileId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
|
|
break;
|
|
}
|
|
if ($galley->getData('submissionFileId') != $submissionFileId) {
|
|
fwrite(STDERR, "Submission file with the ID {$submissionFileId} does not belong to the galley with the ID {$representationId}." . PHP_EOL);
|
|
break;
|
|
}
|
|
|
|
// is this a full text or supp file
|
|
/** @var \PKP\submission\GenreDAO */
|
|
$genreDao = DAORegistry::getDAO('GenreDAO');
|
|
$genre = $genreDao->getById($submissionFile->getData('genreId'));
|
|
if ($genre->getCategory() != Genre::GENRE_CATEGORY_DOCUMENT || $genre->getSupplementary() || $genre->getDependent()) {
|
|
$newEntry['assocType'] = Application::ASSOC_TYPE_SUBMISSION_FILE_COUNTER_OTHER;
|
|
} else {
|
|
$newEntry['assocType'] = $assocType;
|
|
}
|
|
$newEntry['submissionId'] = $submissionId;
|
|
$newEntry['representationId'] = $representationId;
|
|
$newEntry['submissionFileId'] = $submissionFileId;
|
|
$newEntry['fileType'] = StatisticsHelper::getDocumentType($submissionFile->getData('mimetype'));
|
|
break;
|
|
}
|
|
}
|
|
}
|