first commit
This commit is contained in:
@@ -0,0 +1,443 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file classes/task/FileLoader.php
|
||||
*
|
||||
* Copyright (c) 2014-2021 Simon Fraser University
|
||||
* Copyright (c) 2003-2021 John Willinsky
|
||||
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
||||
*
|
||||
* @class FileLoader
|
||||
*
|
||||
* @ingroup classes_task
|
||||
*
|
||||
* @brief Base scheduled task class to reliably handle files processing.
|
||||
*/
|
||||
|
||||
namespace PKP\task;
|
||||
|
||||
use Exception;
|
||||
use PKP\config\Config;
|
||||
use PKP\db\DAORegistry;
|
||||
use PKP\file\FileManager;
|
||||
use PKP\scheduledTask\ScheduledTask;
|
||||
use PKP\scheduledTask\ScheduledTaskHelper;
|
||||
use PKP\site\Site;
|
||||
use PKP\site\SiteDAO;
|
||||
|
||||
abstract class FileLoader extends ScheduledTask
|
||||
{
|
||||
public const FILE_LOADER_RETURN_TO_STAGING = 1;
|
||||
public const FILE_LOADER_RETURN_TO_DISPATCH = 2;
|
||||
public const FILE_LOADER_ERROR_MESSAGE_TYPE = 'common.error';
|
||||
public const FILE_LOADER_WARNING_MESSAGE_TYPE = 'common.warning';
|
||||
|
||||
public const FILE_LOADER_PATH_STAGING = 'stage';
|
||||
public const FILE_LOADER_PATH_PROCESSING = 'processing';
|
||||
public const FILE_LOADER_PATH_REJECT = 'reject';
|
||||
public const FILE_LOADER_PATH_ARCHIVE = 'archive';
|
||||
public const FILE_LOADER_PATH_DISPATCH = 'dispatch';
|
||||
|
||||
/** The current claimed filename that the script is working on. */
|
||||
private string $_claimedFilename;
|
||||
|
||||
/** Base directory path for the filesystem. */
|
||||
private string $_basePath;
|
||||
|
||||
/** Stage directory path. */
|
||||
private string $_stagePath;
|
||||
|
||||
/** Processing directory path. */
|
||||
private string $_processingPath;
|
||||
|
||||
/** Archive directory path. */
|
||||
private string $_archivePath;
|
||||
|
||||
/** Dispatch directory path. */
|
||||
private string $_dispatchPath;
|
||||
|
||||
/** Reject directory path. */
|
||||
private string $_rejectPath;
|
||||
|
||||
/** Admin email. */
|
||||
private string $_adminEmail;
|
||||
|
||||
/** Admin name. */
|
||||
private string $_adminName;
|
||||
|
||||
/** List of staged back files after processing. */
|
||||
private array $_stagedBackFiles = [];
|
||||
|
||||
/** Whether to compress the archived files or not. */
|
||||
private bool $_compressArchives = false;
|
||||
|
||||
/** List of files that should only be considered. */
|
||||
private array $_onlyConsiderFiles = [];
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param array $args script arguments
|
||||
*/
|
||||
public function __construct(array $args)
|
||||
{
|
||||
parent::__construct($args);
|
||||
|
||||
// Canonicalize the base path.
|
||||
$basePath = rtrim($args[0], '/');
|
||||
$basePathFolder = basename($basePath);
|
||||
// We assume that the parent folder of the base path
|
||||
// does already exist and can be canonicalized.
|
||||
$basePathParent = realpath(dirname($basePath));
|
||||
if ($basePathParent === false) {
|
||||
$basePath = null;
|
||||
} else {
|
||||
$basePath = "{$basePathParent}/{$basePathFolder}";
|
||||
}
|
||||
$this->_basePath = $basePath;
|
||||
|
||||
// Configure paths.
|
||||
if (!is_null($basePath)) {
|
||||
$this->_stagePath = "{$basePath}/" . self::FILE_LOADER_PATH_STAGING;
|
||||
$this->_archivePath = "{$basePath}/" . self::FILE_LOADER_PATH_ARCHIVE;
|
||||
$this->_rejectPath = "{$basePath}/" . self::FILE_LOADER_PATH_REJECT;
|
||||
$this->_processingPath = "{$basePath}/" . self::FILE_LOADER_PATH_PROCESSING;
|
||||
$this->_dispatchPath = "{$basePath}/" . self::FILE_LOADER_PATH_DISPATCH;
|
||||
}
|
||||
|
||||
// Set admin email and name.
|
||||
$siteDao = DAORegistry::getDAO('SiteDAO'); /** @var SiteDAO $siteDao */
|
||||
$site = $siteDao->getSite(); /** @var Site $site */
|
||||
$this->_adminEmail = $site->getLocalizedContactEmail();
|
||||
$this->_adminName = $site->getLocalizedContactName();
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Getters and setters.
|
||||
//
|
||||
/**
|
||||
* Return the staging path.
|
||||
*/
|
||||
public function getStagePath(): string
|
||||
{
|
||||
return $this->_stagePath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the processing path.
|
||||
*/
|
||||
public function getProcessingPath(): string
|
||||
{
|
||||
return $this->_processingPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the reject path.
|
||||
*/
|
||||
public function getRejectPath(): string
|
||||
{
|
||||
return $this->_rejectPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the archive path.
|
||||
*/
|
||||
public function getArchivePath(): string
|
||||
{
|
||||
return $this->_archivePath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the dispatch path.
|
||||
*/
|
||||
public function getDispatchPath(): string
|
||||
{
|
||||
return $this->_dispatchPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return whether the archives must be compressed or not.
|
||||
*/
|
||||
public function getCompressArchives(): bool
|
||||
{
|
||||
return $this->_compressArchives;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whether the archives must be compressed or not.
|
||||
*/
|
||||
public function setCompressArchives(bool $compressArchives): void
|
||||
{
|
||||
$this->_compressArchives = $compressArchives;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the files that should only be considered.
|
||||
*/
|
||||
public function getOnlyConsiderFiles(): array
|
||||
{
|
||||
return $this->_onlyConsiderFiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the files that should only be considered.
|
||||
*/
|
||||
public function setOnlyConsiderFiles(array $onlyConsiderFiles): void
|
||||
{
|
||||
$this->_onlyConsiderFiles = $onlyConsiderFiles;
|
||||
}
|
||||
|
||||
//
|
||||
// Public methods
|
||||
//
|
||||
/**
|
||||
* A public helper function that can be used to ensure
|
||||
* that the file structure has actually been installed.
|
||||
*
|
||||
* @param bool $install Set this parameter to true to
|
||||
* install the folder structure if it is missing.
|
||||
*
|
||||
* @return bool True if the folder structure exists,
|
||||
* otherwise false.
|
||||
*/
|
||||
public function checkFolderStructure(bool $install = false): bool
|
||||
{
|
||||
// Make sure that the base path is inside the private files dir.
|
||||
// The files dir has appropriate write permissions and is assumed
|
||||
// to be protected against information leak and symlink attacks.
|
||||
$filesDir = realpath(Config::getVar('files', 'files_dir'));
|
||||
if (is_null($this->_basePath) || strpos($this->_basePath, $filesDir) !== 0) {
|
||||
$this->addExecutionLogEntry(
|
||||
__('admin.fileLoader.wrongBasePathLocation', ['path' => $this->_basePath]),
|
||||
ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_ERROR
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check folder presence and readability.
|
||||
$pathsToCheck = [
|
||||
$this->_stagePath,
|
||||
$this->_archivePath,
|
||||
$this->_rejectPath,
|
||||
$this->_processingPath,
|
||||
$this->_dispatchPath
|
||||
];
|
||||
$fileManager = null;
|
||||
foreach ($pathsToCheck as $path) {
|
||||
if (!(is_dir($path) && is_readable($path))) {
|
||||
if ($install) {
|
||||
// Try installing the folder if it is missing.
|
||||
if (is_null($fileManager)) {
|
||||
$fileManager = new FileManager();
|
||||
}
|
||||
$fileManager->mkdirtree($path);
|
||||
}
|
||||
|
||||
// Try again.
|
||||
if (!(is_dir($path) && is_readable($path))) {
|
||||
// Give up...
|
||||
$this->addExecutionLogEntry(
|
||||
__('admin.fileLoader.pathNotAccessible', ['path' => $path]),
|
||||
ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_ERROR
|
||||
);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Protected methods.
|
||||
//
|
||||
/**
|
||||
* @copydoc ScheduledTask::executeActions()
|
||||
*/
|
||||
protected function executeActions(): bool
|
||||
{
|
||||
if (!$this->checkFolderStructure()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$foundErrors = false;
|
||||
while (!is_null($filePath = $this->claimNextFile())) {
|
||||
if ($filePath === false) {
|
||||
// Problem claiming the file.
|
||||
$foundErrors = true;
|
||||
break;
|
||||
}
|
||||
try {
|
||||
$result = $this->processFile($filePath);
|
||||
} catch (Exception $e) {
|
||||
$foundErrors = true;
|
||||
$this->rejectFile();
|
||||
$this->addExecutionLogEntry($e->getMessage(), ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_ERROR);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($result === self::FILE_LOADER_RETURN_TO_STAGING) {
|
||||
// Send the file back to staging
|
||||
$foundErrors = true;
|
||||
$this->stageFile();
|
||||
// Let the script know what files were sent back to staging,
|
||||
// so it doesn't claim them again thereby entering an infinite loop.
|
||||
$this->_stagedBackFiles[] = $this->_claimedFilename;
|
||||
} elseif ($result === self::FILE_LOADER_RETURN_TO_DISPATCH) {
|
||||
// Move the file to dispatch folder, where a dispatched job will find it
|
||||
$this->dispatchFile();
|
||||
$this->addExecutionLogEntry(__(
|
||||
'admin.fileLoader.fileDispatched',
|
||||
['filename' => $filePath]
|
||||
), ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_NOTICE);
|
||||
} else {
|
||||
$this->archiveFile();
|
||||
}
|
||||
|
||||
if ($result === true) {
|
||||
$this->addExecutionLogEntry(__(
|
||||
'admin.fileLoader.fileProcessed',
|
||||
['filename' => $filePath]
|
||||
), ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_NOTICE);
|
||||
}
|
||||
}
|
||||
return !$foundErrors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the passed file.
|
||||
*
|
||||
* @throws \Exception
|
||||
*
|
||||
* @return mixed True or self::FILE_LOADER_RETURN_TO_STAGING
|
||||
*
|
||||
* @see FileLoader::executeActions() to understand the expected return values.
|
||||
*
|
||||
*/
|
||||
abstract protected function processFile(string $filePath): bool|int;
|
||||
|
||||
/**
|
||||
* Move file between filesystem directories.
|
||||
*
|
||||
* @return string The destination path of the moved file.
|
||||
*/
|
||||
protected function moveFile(string $sourceDir, string $destDir, string $filename): string
|
||||
{
|
||||
$currentFilePath = "{$sourceDir}/{$filename}";
|
||||
$destinationPath = "{$destDir}/{$filename}";
|
||||
|
||||
if (!rename($currentFilePath, $destinationPath)) {
|
||||
$message = __('admin.fileLoader.moveFileFailed', ['filename' => $filename,
|
||||
'currentFilePath' => $currentFilePath, 'destinationPath' => $destinationPath]);
|
||||
$this->addExecutionLogEntry($message, ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_ERROR);
|
||||
|
||||
// Script should always stop if it can't manipulate files inside
|
||||
// its own directory system.
|
||||
fatalError($message);
|
||||
}
|
||||
|
||||
return $destinationPath;
|
||||
}
|
||||
|
||||
//
|
||||
// Private helper methods.
|
||||
//
|
||||
/**
|
||||
* Claim the first file that's inside the staging folder.
|
||||
*
|
||||
* @return mixed The claimed file path or false if
|
||||
* the claim was not successful.
|
||||
*/
|
||||
private function claimNextFile(): string|false|null
|
||||
{
|
||||
$stageDir = opendir($this->_stagePath);
|
||||
$processingFilePath = false;
|
||||
|
||||
while ($filename = readdir($stageDir)) {
|
||||
if ($filename == '..' || $filename == '.' ||
|
||||
in_array($filename, $this->_stagedBackFiles) ||
|
||||
(!empty($this->_onlyConsiderFiles) && !in_array($filename, $this->_onlyConsiderFiles))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$processingFilePath = $this->moveFile($this->_stagePath, $this->_processingPath, $filename);
|
||||
break;
|
||||
}
|
||||
|
||||
if (pathinfo($processingFilePath, PATHINFO_EXTENSION) == 'gz') {
|
||||
$fileMgr = new FileManager();
|
||||
try {
|
||||
$processingFilePath = $fileMgr->gzDecompressFile($processingFilePath);
|
||||
$filename = pathinfo($processingFilePath, PATHINFO_BASENAME);
|
||||
} catch (Exception $e) {
|
||||
$this->moveFile($this->_processingPath, $this->_stagePath, $filename);
|
||||
$this->addExecutionLogEntry($e->getMessage(), ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_ERROR);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if ($processingFilePath) {
|
||||
$this->_claimedFilename = $filename;
|
||||
return $processingFilePath;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reject the current claimed file.
|
||||
*/
|
||||
private function rejectFile(): void
|
||||
{
|
||||
$this->moveFile($this->_processingPath, $this->_rejectPath, $this->_claimedFilename);
|
||||
}
|
||||
|
||||
/**
|
||||
* Move the current claimed file into the dispatch folder.
|
||||
*/
|
||||
protected function dispatchFile(): void
|
||||
{
|
||||
$this->moveFile($this->_processingPath, $this->_dispatchPath, $this->_claimedFilename);
|
||||
}
|
||||
|
||||
/**
|
||||
* Archive the current claimed file.
|
||||
*/
|
||||
private function archiveFile(): void
|
||||
{
|
||||
$this->moveFile($this->_processingPath, $this->_archivePath, $this->_claimedFilename);
|
||||
if ($this->getCompressArchives()) {
|
||||
try {
|
||||
$fileMgr = new FileManager();
|
||||
$filePath = "{$this->_archivePath}/{$this->_claimedFilename}";
|
||||
$fileMgr->gzCompressFile($filePath);
|
||||
} catch (Exception $e) {
|
||||
$this->addExecutionLogEntry($e->getMessage(), ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_ERROR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stage the current claimed file.
|
||||
*/
|
||||
private function stageFile(): void
|
||||
{
|
||||
$this->moveFile($this->_processingPath, $this->_stagePath, $this->_claimedFilename);
|
||||
}
|
||||
}
|
||||
|
||||
if (!PKP_STRICT_MODE) {
|
||||
class_alias('\PKP\task\FileLoader', '\FileLoader');
|
||||
foreach ([
|
||||
'FILE_LOADER_RETURN_TO_STAGING',
|
||||
'FILE_LOADER_ERROR_MESSAGE_TYPE',
|
||||
'FILE_LOADER_WARNING_MESSAGE_TYPE',
|
||||
'FILE_LOADER_PATH_STAGING',
|
||||
'FILE_LOADER_PATH_PROCESSING',
|
||||
'FILE_LOADER_PATH_REJECT',
|
||||
'FILE_LOADER_PATH_ARCHIVE',
|
||||
] as $constantName) {
|
||||
define($constantName, constant('\FileLoader::' . $constantName));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user