first commit
This commit is contained in:
@@ -0,0 +1,417 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file classes/search/ArticleSearch.php
|
||||
*
|
||||
* Copyright (c) 2014-2021 Simon Fraser University
|
||||
* Copyright (c) 2003-2021 John Willinsky
|
||||
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
||||
*
|
||||
* @class ArticleSearch
|
||||
*
|
||||
* @ingroup search
|
||||
*
|
||||
* @see ArticleSearchDAO
|
||||
*
|
||||
* @brief Class for retrieving article search results.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace APP\search;
|
||||
|
||||
use APP\core\Application;
|
||||
use APP\core\Request;
|
||||
use APP\core\Services;
|
||||
use APP\facades\Repo;
|
||||
use APP\issue\IssueAction;
|
||||
use PKP\db\DAORegistry;
|
||||
use PKP\facades\Locale;
|
||||
use PKP\plugins\Hook;
|
||||
use PKP\search\SubmissionSearch;
|
||||
use PKP\submission\PKPSubmission;
|
||||
|
||||
class ArticleSearch extends SubmissionSearch
|
||||
{
|
||||
/**
|
||||
* See SubmissionSearch::getSparseArray()
|
||||
*/
|
||||
public function getSparseArray($unorderedResults, $orderBy, $orderDir, $exclude)
|
||||
{
|
||||
// Calculate a well-ordered (unique) score.
|
||||
$resultCount = count($unorderedResults);
|
||||
$i = 0;
|
||||
$contextIds = [];
|
||||
foreach ($unorderedResults as $submissionId => &$data) {
|
||||
// Reference is necessary to permit modification
|
||||
$data['score'] = ($resultCount * $data['count']) + $i++;
|
||||
$contextIds[] = $data['journal_id'];
|
||||
}
|
||||
|
||||
// If we got a primary sort order then apply it and use score as secondary
|
||||
// order only.
|
||||
// NB: We apply order after merging and before paging/formatting. Applying
|
||||
// order before merging would require us to retrieve dependent objects for
|
||||
// results being purged later. Doing everything in a closed SQL is not
|
||||
// possible (e.g. for authors). Applying sort order after paging and
|
||||
// formatting is not possible as we have to order the whole list before
|
||||
// slicing it. So this seems to be the most appropriate place, although we
|
||||
// may have to retrieve some objects again when formatting results.
|
||||
$orderedResults = [];
|
||||
$contextDao = Application::getContextDAO();
|
||||
$contextTitles = [];
|
||||
if ($orderBy == 'popularityAll' || $orderBy == 'popularityMonth') {
|
||||
// Retrieve a metrics report for all submissions.
|
||||
$filter = [
|
||||
'submissionIds' => array_keys($unorderedResults),
|
||||
'contextIds' => $contextIds,
|
||||
'assocTypes' => [Application::ASSOC_TYPE_SUBMISSION, Application::ASSOC_TYPE_SUBMISSION_FILE]
|
||||
];
|
||||
if ($orderBy == 'popularityMonth') {
|
||||
$oneMonthAgo = date('Ymd', strtotime('-1 month'));
|
||||
$today = date('Ymd');
|
||||
$filter['dateStart'] = $oneMonthAgo;
|
||||
$filter['dateEnd'] = $today;
|
||||
}
|
||||
$rawReport = Services::get('publicationStats')->getTotals($filter);
|
||||
foreach ($rawReport as $row) {
|
||||
$unorderedResults[$row->submission_id]['metric'] = $row->metric;
|
||||
}
|
||||
}
|
||||
|
||||
$i = 0; // Used to prevent ties from clobbering each other
|
||||
$authorUserGroups = Repo::userGroup()->getCollector()->filterByRoleIds([\PKP\security\Role::ROLE_ID_AUTHOR])->getMany();
|
||||
foreach ($unorderedResults as $submissionId => $data) {
|
||||
// Exclude unwanted IDs.
|
||||
if (in_array($submissionId, $exclude)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
switch ($orderBy) {
|
||||
case 'authors':
|
||||
$submission = Repo::submission()->get($submissionId);
|
||||
$orderKey = $submission->getCurrentPublication()->getAuthorString($authorUserGroups);
|
||||
break;
|
||||
|
||||
case 'title':
|
||||
$submission = Repo::submission()->get($submissionId);
|
||||
$orderKey = '';
|
||||
if (!empty($submission->getCurrentPublication())) {
|
||||
$orderKey = $submission->getCurrentPublication()->getLocalizedData('title');
|
||||
}
|
||||
break;
|
||||
|
||||
case 'journalTitle':
|
||||
if (!isset($contextTitles[$data['journal_id']])) {
|
||||
$context = $contextDao->getById($data['journal_id']);
|
||||
$contextTitles[$data['journal_id']] = $context->getLocalizedName();
|
||||
}
|
||||
$orderKey = $contextTitles[$data['journal_id']];
|
||||
break;
|
||||
|
||||
case 'issuePublicationDate':
|
||||
case 'publicationDate':
|
||||
$orderKey = $data[$orderBy];
|
||||
break;
|
||||
|
||||
case 'popularityAll':
|
||||
case 'popularityMonth':
|
||||
$orderKey = ($data['metric'] ?? 0);
|
||||
break;
|
||||
|
||||
default: // order by score.
|
||||
$orderKey = $data['score'];
|
||||
}
|
||||
if (!isset($orderedResults[$orderKey])) {
|
||||
$orderedResults[$orderKey] = [];
|
||||
}
|
||||
$orderedResults[$orderKey][$data['score'] + $i++] = $submissionId;
|
||||
}
|
||||
|
||||
// Order the results by primary order.
|
||||
if (strtolower($orderDir) == 'asc') {
|
||||
ksort($orderedResults);
|
||||
} else {
|
||||
krsort($orderedResults);
|
||||
}
|
||||
|
||||
// Order the result by secondary order and flatten it.
|
||||
$finalOrder = [];
|
||||
foreach ($orderedResults as $orderKey => $submissionIds) {
|
||||
if (count($submissionIds) == 1) {
|
||||
$finalOrder[] = array_pop($submissionIds);
|
||||
} else {
|
||||
if (strtolower($orderDir) == 'asc') {
|
||||
ksort($submissionIds);
|
||||
} else {
|
||||
krsort($submissionIds);
|
||||
}
|
||||
$finalOrder = array_merge($finalOrder, array_values($submissionIds));
|
||||
}
|
||||
}
|
||||
return $finalOrder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the search filters from the request.
|
||||
*
|
||||
* @param Request $request
|
||||
*
|
||||
* @return array All search filters (empty and active)
|
||||
*/
|
||||
public function getSearchFilters($request)
|
||||
{
|
||||
$searchFilters = [
|
||||
'query' => $request->getUserVar('query'),
|
||||
'searchJournal' => $request->getUserVar('searchJournal'),
|
||||
'abstract' => $request->getUserVar('abstract'),
|
||||
'authors' => $request->getUserVar('authors'),
|
||||
'title' => $request->getUserVar('title'),
|
||||
'galleyFullText' => $request->getUserVar('galleyFullText'),
|
||||
'discipline' => $request->getUserVar('discipline'),
|
||||
'subject' => $request->getUserVar('subject'),
|
||||
'type' => $request->getUserVar('type'),
|
||||
'coverage' => $request->getUserVar('coverage'),
|
||||
'indexTerms' => $request->getUserVar('indexTerms')
|
||||
];
|
||||
|
||||
// Is this a simplified query from the navigation
|
||||
// block plugin?
|
||||
$simpleQuery = $request->getUserVar('simpleQuery');
|
||||
if (!empty($simpleQuery)) {
|
||||
// In the case of a simplified query we get the
|
||||
// filter type from a drop-down.
|
||||
$searchType = $request->getUserVar('searchField');
|
||||
if (array_key_exists($searchType, $searchFilters)) {
|
||||
$searchFilters[$searchType] = $simpleQuery;
|
||||
}
|
||||
}
|
||||
|
||||
// Publishing dates.
|
||||
$fromDate = $request->getUserDateVar('dateFrom', 1, 1);
|
||||
$searchFilters['fromDate'] = (is_null($fromDate) ? null : date('Y-m-d H:i:s', $fromDate));
|
||||
$toDate = $request->getUserDateVar('dateTo', 32, 12, null, 23, 59, 59);
|
||||
$searchFilters['toDate'] = (is_null($toDate) ? null : date('Y-m-d H:i:s', $toDate));
|
||||
|
||||
// Instantiate the context.
|
||||
$context = $request->getContext();
|
||||
$siteSearch = !((bool)$context);
|
||||
if ($siteSearch) {
|
||||
$contextDao = Application::getContextDAO();
|
||||
if (!empty($searchFilters['searchJournal'])) {
|
||||
$context = $contextDao->getById($searchFilters['searchJournal']);
|
||||
} elseif (array_key_exists('journalTitle', $request->getUserVars())) {
|
||||
$contexts = $contextDao->getAll(true);
|
||||
while ($context = $contexts->next()) {
|
||||
if (in_array(
|
||||
$request->getUserVar('journalTitle'),
|
||||
(array) $context->getName(null)
|
||||
)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
$searchFilters['searchJournal'] = $context;
|
||||
$searchFilters['siteSearch'] = $siteSearch;
|
||||
|
||||
return $searchFilters;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the keywords array from a given search filter.
|
||||
*
|
||||
* @param array $searchFilters Search filters as returned from
|
||||
* ArticleSearch::getSearchFilters()
|
||||
*
|
||||
* @return array Keyword array as required by SubmissionSearch::retrieveResults()
|
||||
*/
|
||||
public function getKeywordsFromSearchFilters($searchFilters)
|
||||
{
|
||||
$indexFieldMap = $this->getIndexFieldMap();
|
||||
$indexFieldMap[SubmissionSearch::SUBMISSION_SEARCH_INDEX_TERMS] = 'indexTerms';
|
||||
$keywords = [];
|
||||
if (isset($searchFilters['query'])) {
|
||||
$keywords[''] = $searchFilters['query'];
|
||||
}
|
||||
foreach ($indexFieldMap as $bitmap => $searchField) {
|
||||
if (isset($searchFilters[$searchField]) && !empty($searchFilters[$searchField])) {
|
||||
$keywords[$bitmap] = $searchFilters[$searchField];
|
||||
}
|
||||
}
|
||||
return $keywords;
|
||||
}
|
||||
|
||||
/**
|
||||
* See SubmissionSearch::formatResults()
|
||||
*
|
||||
* @param array $results
|
||||
* @param \PKP\user\User $user optional (if availability information is desired)
|
||||
*
|
||||
* @return array An array with the articles, published submissions,
|
||||
* issue, journal, section and the issue availability.
|
||||
*/
|
||||
public function formatResults($results, $user = null)
|
||||
{
|
||||
$contextDao = Application::getContextDAO();
|
||||
|
||||
$publishedSubmissionCache = [];
|
||||
$articleCache = [];
|
||||
$issueCache = [];
|
||||
$issueAvailabilityCache = [];
|
||||
$contextCache = [];
|
||||
$sectionCache = [];
|
||||
|
||||
$returner = [];
|
||||
foreach ($results as $articleId) {
|
||||
// Get the article, storing in cache if necessary.
|
||||
if (!isset($articleCache[$articleId])) {
|
||||
$submission = Repo::submission()->get($articleId);
|
||||
$publishedSubmissionCache[$articleId] = $submission;
|
||||
$articleCache[$articleId] = $submission;
|
||||
}
|
||||
$article = $articleCache[$articleId];
|
||||
$publishedSubmission = $publishedSubmissionCache[$articleId];
|
||||
|
||||
if ($publishedSubmission && $article) {
|
||||
$sectionId = $article->getSectionId();
|
||||
if (!isset($sectionCache[$sectionId])) {
|
||||
$sectionCache[$sectionId] = Repo::section()->get($sectionId, $article->getData('contextId'));
|
||||
}
|
||||
|
||||
// Get the context, storing in cache if necessary.
|
||||
$contextId = $article->getData('contextId');
|
||||
if (!isset($contextCache[$contextId])) {
|
||||
$contextCache[$contextId] = $contextDao->getById($contextId);
|
||||
}
|
||||
|
||||
// Get the issue, storing in cache if necessary.
|
||||
$issueId = $publishedSubmission->getCurrentPublication()->getData('issueId');
|
||||
if ($issueId && !isset($issueCache[$issueId])) {
|
||||
$issue = Repo::issue()->get($issueId);
|
||||
$issueCache[$issueId] = $issue;
|
||||
$issueAction = new IssueAction();
|
||||
$issueAvailabilityCache[$issueId] = !$issueAction->subscriptionRequired($issue, $contextCache[$contextId]) || $issueAction->subscribedUser($user, $contextCache[$contextId], $issueId, $articleId) || $issueAction->subscribedDomain(Application::get()->getRequest(), $contextCache[$contextId], $issueId, $articleId);
|
||||
}
|
||||
|
||||
// Only display articles from published issues.
|
||||
if (!isset($issueCache[$issueId]) || !$issueCache[$issueId]->getPublished()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Store the retrieved objects in the result array.
|
||||
$returner[] = [
|
||||
'article' => $article,
|
||||
'publishedSubmission' => $publishedSubmissionCache[$articleId],
|
||||
'issue' => $issueCache[$issueId],
|
||||
'journal' => $contextCache[$contextId],
|
||||
'issueAvailable' => $issueAvailabilityCache[$issueId],
|
||||
'section' => $sectionCache[$sectionId]
|
||||
];
|
||||
}
|
||||
}
|
||||
return $returner;
|
||||
}
|
||||
|
||||
/**
|
||||
* Identify similarity terms for a given submission.
|
||||
*
|
||||
* @param int $submissionId
|
||||
*
|
||||
* @return null|array An array of string keywords or null
|
||||
* if some kind of error occurred.
|
||||
*/
|
||||
public function getSimilarityTerms($submissionId)
|
||||
{
|
||||
// Check whether a search plugin provides terms for a similarity search.
|
||||
$searchTerms = [];
|
||||
$result = Hook::call('ArticleSearch::getSimilarityTerms', [$submissionId, &$searchTerms]);
|
||||
|
||||
// If no plugin implements the hook then use the subject keywords
|
||||
// of the submission for a similarity search.
|
||||
if ($result === false) {
|
||||
// Retrieve the article.
|
||||
$article = Repo::submission()->get($submissionId);
|
||||
if ($article->getData('status') === PKPSubmission::STATUS_PUBLISHED) {
|
||||
// Retrieve keywords (if any).
|
||||
$submissionSubjectDao = DAORegistry::getDAO('SubmissionKeywordDAO'); /** @var \PKP\submission\SubmissionKeywordDAO $submissionSubjectDao */
|
||||
$allSearchTerms = array_filter($submissionSubjectDao->getKeywords($article->getCurrentPublication()->getId(), [Locale::getLocale(), $article->getLocale(), Locale::getPrimaryLocale()]));
|
||||
foreach ($allSearchTerms as $locale => $localeSearchTerms) {
|
||||
$searchTerms += $localeSearchTerms;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $searchTerms;
|
||||
}
|
||||
|
||||
public function getIndexFieldMap()
|
||||
{
|
||||
return [
|
||||
SubmissionSearch::SUBMISSION_SEARCH_AUTHOR => 'authors',
|
||||
SubmissionSearch::SUBMISSION_SEARCH_TITLE => 'title',
|
||||
SubmissionSearch::SUBMISSION_SEARCH_ABSTRACT => 'abstract',
|
||||
SubmissionSearch::SUBMISSION_SEARCH_GALLEY_FILE => 'galleyFullText',
|
||||
SubmissionSearch::SUBMISSION_SEARCH_DISCIPLINE => 'discipline',
|
||||
SubmissionSearch::SUBMISSION_SEARCH_SUBJECT => 'subject',
|
||||
SubmissionSearch::SUBMISSION_SEARCH_KEYWORD => 'keyword',
|
||||
SubmissionSearch::SUBMISSION_SEARCH_TYPE => 'type',
|
||||
SubmissionSearch::SUBMISSION_SEARCH_COVERAGE => 'coverage'
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* See SubmissionSearch::getResultSetOrderingOptions()
|
||||
*/
|
||||
public function getResultSetOrderingOptions($request)
|
||||
{
|
||||
$resultSetOrderingOptions = [
|
||||
'score' => __('search.results.orderBy.relevance'),
|
||||
'authors' => __('search.results.orderBy.author'),
|
||||
'issuePublicationDate' => __('search.results.orderBy.issue'),
|
||||
'publicationDate' => __('search.results.orderBy.date'),
|
||||
'title' => __('search.results.orderBy.article')
|
||||
];
|
||||
|
||||
// Only show the "popularity" options if we have a default metric.
|
||||
$resultSetOrderingOptions['popularityAll'] = __('search.results.orderBy.popularityAll');
|
||||
$resultSetOrderingOptions['popularityMonth'] = __('search.results.orderBy.popularityMonth');
|
||||
|
||||
// Only show the "journal title" option if we have several journals.
|
||||
$context = $request->getContext();
|
||||
if (!$context) {
|
||||
$resultSetOrderingOptions['journalTitle'] = __('search.results.orderBy.journal');
|
||||
}
|
||||
|
||||
// Let plugins mangle the search ordering options.
|
||||
Hook::call(
|
||||
'SubmissionSearch::getResultSetOrderingOptions',
|
||||
[$context, &$resultSetOrderingOptions]
|
||||
);
|
||||
|
||||
return $resultSetOrderingOptions;
|
||||
}
|
||||
|
||||
/**
|
||||
* See SubmissionSearch::getDefaultOrderDir()
|
||||
*/
|
||||
public function getDefaultOrderDir($orderBy)
|
||||
{
|
||||
$orderDir = 'asc';
|
||||
if (in_array($orderBy, ['score', 'publicationDate', 'issuePublicationDate', 'popularityAll', 'popularityMonth'])) {
|
||||
$orderDir = 'desc';
|
||||
}
|
||||
return $orderDir;
|
||||
}
|
||||
|
||||
/**
|
||||
* See SubmissionSearch::getSearchDao()
|
||||
*/
|
||||
protected function getSearchDao()
|
||||
{
|
||||
return DAORegistry::getDAO('ArticleSearchDAO');
|
||||
}
|
||||
}
|
||||
|
||||
if (!PKP_STRICT_MODE) {
|
||||
class_alias('\APP\search\ArticleSearch', '\ArticleSearch');
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file classes/search/ArticleSearchDAO.php
|
||||
*
|
||||
* Copyright (c) 2014-2021 Simon Fraser University
|
||||
* Copyright (c) 2003-2021 John Willinsky
|
||||
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
||||
*
|
||||
* @class ArticleSearchDAO
|
||||
*
|
||||
* @ingroup search
|
||||
*
|
||||
* @see ArticleSearch
|
||||
*
|
||||
* @brief DAO class for article search index.
|
||||
*/
|
||||
|
||||
namespace APP\search;
|
||||
|
||||
use APP\journal\Journal;
|
||||
use PKP\search\SubmissionSearchDAO;
|
||||
use PKP\submission\PKPSubmission;
|
||||
|
||||
class ArticleSearchDAO extends SubmissionSearchDAO
|
||||
{
|
||||
/**
|
||||
* Retrieve the top results for a phrase.
|
||||
*
|
||||
* @param Journal $journal
|
||||
* @param array $phrase
|
||||
* @param int|null $publishedFrom Optional start date
|
||||
* @param int|null $publishedTo Optional end date
|
||||
* @param int|null $type Application::ASSOC_TYPE_...
|
||||
* @param int $limit
|
||||
*
|
||||
* @return array of results (associative arrays)
|
||||
*/
|
||||
public function getPhraseResults($journal, $phrase, $publishedFrom = null, $publishedTo = null, $type = null, $limit = 500)
|
||||
{
|
||||
if (empty($phrase)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$sqlFrom = '';
|
||||
$sqlWhere = '';
|
||||
$params = [];
|
||||
|
||||
for ($i = 0, $count = count($phrase); $i < $count; $i++) {
|
||||
if (!empty($sqlFrom)) {
|
||||
$sqlFrom .= ', ';
|
||||
$sqlWhere .= ' AND ';
|
||||
}
|
||||
$sqlFrom .= 'submission_search_object_keywords o' . $i . ' NATURAL JOIN submission_search_keyword_list k' . $i;
|
||||
if (strstr($phrase[$i], '%') === false) {
|
||||
$sqlWhere .= 'k' . $i . '.keyword_text = ?';
|
||||
} else {
|
||||
$sqlWhere .= 'k' . $i . '.keyword_text LIKE ?';
|
||||
}
|
||||
if ($i > 0) {
|
||||
$sqlWhere .= ' AND o0.object_id = o' . $i . '.object_id AND o0.pos+' . $i . ' = o' . $i . '.pos';
|
||||
}
|
||||
|
||||
$params[] = $phrase[$i];
|
||||
}
|
||||
|
||||
if (!empty($type)) {
|
||||
$sqlWhere .= ' AND (o.type & ?) != 0';
|
||||
$params[] = $type;
|
||||
}
|
||||
|
||||
if (!empty($publishedFrom)) {
|
||||
$sqlWhere .= ' AND p.date_published >= ' . $this->datetimeToDB($publishedFrom);
|
||||
}
|
||||
|
||||
if (!empty($publishedTo)) {
|
||||
$sqlWhere .= ' AND p.date_published <= ' . $this->datetimeToDB($publishedTo);
|
||||
}
|
||||
|
||||
if (!empty($journal)) {
|
||||
$sqlWhere .= ' AND i.journal_id = ?';
|
||||
$params[] = $journal->getId();
|
||||
}
|
||||
|
||||
$result = $this->retrieve(
|
||||
'SELECT
|
||||
o.submission_id,
|
||||
MAX(s.context_id) AS journal_id,
|
||||
MAX(i.date_published) AS i_pub,
|
||||
MAX(p.date_published) AS s_pub,
|
||||
COUNT(*) AS count
|
||||
FROM
|
||||
submissions s
|
||||
JOIN publications p ON (p.publication_id = s.current_publication_id)
|
||||
JOIN publication_settings ps ON (ps.publication_id = p.publication_id AND ps.setting_name=\'issueId\' AND ps.locale=\'\')
|
||||
JOIN issues i ON (CAST(i.issue_id AS CHAR(20)) = ps.setting_value AND i.journal_id = s.context_id)
|
||||
JOIN submission_search_objects o ON (s.submission_id = o.submission_id)
|
||||
JOIN journals j ON j.journal_id = s.context_id
|
||||
LEFT JOIN journal_settings js ON j.journal_id = js.journal_id AND js.setting_name = \'publishingMode\'
|
||||
NATURAL JOIN ' . $sqlFrom . '
|
||||
WHERE
|
||||
(js.setting_value <> \'' . Journal::PUBLISHING_MODE_NONE . '\' OR
|
||||
js.setting_value IS NULL) AND j.enabled = 1 AND
|
||||
s.status = ' . PKPSubmission::STATUS_PUBLISHED . ' AND
|
||||
i.published = 1 AND ' . $sqlWhere . '
|
||||
GROUP BY o.submission_id
|
||||
ORDER BY count DESC
|
||||
LIMIT ' . $limit,
|
||||
$params
|
||||
);
|
||||
|
||||
$returner = [];
|
||||
foreach ($result as $row) {
|
||||
$returner[$row->submission_id] = [
|
||||
'count' => $row->count,
|
||||
'journal_id' => $row->journal_id,
|
||||
'issuePublicationDate' => $this->datetimeFromDB($row->i_pub),
|
||||
'publicationDate' => $this->datetimeFromDB($row->s_pub)
|
||||
];
|
||||
}
|
||||
return $returner;
|
||||
}
|
||||
}
|
||||
|
||||
if (!PKP_STRICT_MODE) {
|
||||
class_alias('\APP\search\ArticleSearchDAO', '\ArticleSearchDAO');
|
||||
}
|
||||
@@ -0,0 +1,392 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file classes/search/ArticleSearchIndex.php
|
||||
*
|
||||
* Copyright (c) 2014-2021 Simon Fraser University
|
||||
* Copyright (c) 2003-2021 John Willinsky
|
||||
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
||||
*
|
||||
* @class ArticleSearchIndex
|
||||
*
|
||||
* @ingroup search
|
||||
*
|
||||
* @brief Class to maintain the article search index.
|
||||
*/
|
||||
|
||||
namespace APP\search;
|
||||
|
||||
use APP\facades\Repo;
|
||||
use APP\journal\Journal;
|
||||
use APP\journal\JournalDAO;
|
||||
use APP\submission\Submission;
|
||||
use Exception;
|
||||
use PKP\config\Config;
|
||||
use PKP\core\PKPApplication;
|
||||
use PKP\db\DAORegistry;
|
||||
use PKP\jobs\submissions\UpdateSubmissionSearchJob;
|
||||
use PKP\plugins\Hook;
|
||||
use PKP\search\SearchFileParser;
|
||||
use PKP\search\SubmissionSearch;
|
||||
use PKP\search\SubmissionSearchIndex;
|
||||
use PKP\submissionFile\SubmissionFile;
|
||||
use Throwable;
|
||||
|
||||
class ArticleSearchIndex extends SubmissionSearchIndex
|
||||
{
|
||||
private const MINIMUM_DATA_LENGTH = 80 * 1024;
|
||||
|
||||
/**
|
||||
* @copydoc SubmissionSearchIndex::submissionMetadataChanged()
|
||||
*/
|
||||
public function submissionMetadataChanged($submission)
|
||||
{
|
||||
// Check whether a search plug-in jumps in.
|
||||
if (Hook::ABORT === Hook::call('ArticleSearchIndex::articleMetadataChanged', [$submission])) {
|
||||
return;
|
||||
}
|
||||
|
||||
$publication = $submission->getCurrentPublication();
|
||||
|
||||
// Build author keywords
|
||||
$authorText = [];
|
||||
foreach ($publication->getData('authors') as $author) {
|
||||
$authorText = array_merge(
|
||||
$authorText,
|
||||
array_values((array) $author->getData('givenName')),
|
||||
array_values((array) $author->getData('familyName')),
|
||||
array_values((array) $author->getData('preferredPublicName')),
|
||||
array_values(array_map('strip_tags', (array) $author->getData('affiliation'))),
|
||||
array_values(array_map('strip_tags', (array) $author->getData('biography')))
|
||||
);
|
||||
}
|
||||
|
||||
// Update search index
|
||||
$submissionId = $submission->getId();
|
||||
$this->_updateTextIndex($submissionId, SubmissionSearch::SUBMISSION_SEARCH_AUTHOR, $authorText);
|
||||
$this->_updateTextIndex($submissionId, SubmissionSearch::SUBMISSION_SEARCH_TITLE, $publication->getFullTitles());
|
||||
$this->_updateTextIndex($submissionId, SubmissionSearch::SUBMISSION_SEARCH_ABSTRACT, $publication->getData('abstract'));
|
||||
|
||||
$this->_updateTextIndex($submissionId, SubmissionSearch::SUBMISSION_SEARCH_SUBJECT, (array) $this->_flattenLocalizedArray($publication->getData('subjects')));
|
||||
$this->_updateTextIndex($submissionId, SubmissionSearch::SUBMISSION_SEARCH_KEYWORD, (array) $this->_flattenLocalizedArray($publication->getData('keywords')));
|
||||
$this->_updateTextIndex($submissionId, SubmissionSearch::SUBMISSION_SEARCH_DISCIPLINE, (array) $this->_flattenLocalizedArray($publication->getData('disciplines')));
|
||||
$this->_updateTextIndex($submissionId, SubmissionSearch::SUBMISSION_SEARCH_TYPE, (array) $publication->getData('type'));
|
||||
$this->_updateTextIndex($submissionId, SubmissionSearch::SUBMISSION_SEARCH_COVERAGE, (array) $publication->getData('coverage'));
|
||||
// FIXME Index sponsors too?
|
||||
}
|
||||
|
||||
/**
|
||||
* @copydoc SubmissionSearchIndex::submissionMetadataChanged()
|
||||
*/
|
||||
public function articleMetadataChanged($article)
|
||||
{
|
||||
if (Config::getVar('debug', 'deprecation_warnings')) {
|
||||
trigger_error('Deprecated call to articleMetadataChanged. Use submissionMetadataChanged instead.');
|
||||
}
|
||||
$this->submissionMetadataChanged($article);
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete keywords from the search index.
|
||||
*
|
||||
* @param int $articleId
|
||||
* @param int $type optional
|
||||
* @param int $assocId optional
|
||||
*/
|
||||
public function deleteTextIndex($articleId, $type = null, $assocId = null)
|
||||
{
|
||||
$searchDao = DAORegistry::getDAO('ArticleSearchDAO'); /** @var ArticleSearchDAO $searchDao */
|
||||
return $searchDao->deleteSubmissionKeywords($articleId, $type, $assocId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Signal to the indexing back-end that an article file changed.
|
||||
*
|
||||
* @see ArticleSearchIndex::submissionMetadataChanged() above for more
|
||||
* comments.
|
||||
*
|
||||
* @param int $articleId
|
||||
* @param int $type
|
||||
* @param SubmissionFile $submissionFile
|
||||
*/
|
||||
public function submissionFileChanged($articleId, $type, $submissionFile)
|
||||
{
|
||||
// Check whether a search plug-in jumps in.
|
||||
if (Hook::ABORT === Hook::call('ArticleSearchIndex::submissionFileChanged', [$articleId, $type, $submissionFile->getId()])) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If no search plug-in is activated then fall back to the default database search implementation.
|
||||
$parser = SearchFileParser::fromFile($submissionFile);
|
||||
if (!$parser) {
|
||||
error_log("Skipped indexation: No suitable parser for the submission file \"{$submissionFile->getData('path')}\"");
|
||||
return;
|
||||
}
|
||||
try {
|
||||
$parser->open();
|
||||
try {
|
||||
$searchDao = DAORegistry::getDAO('ArticleSearchDAO'); /** @var ArticleSearchDAO $searchDao */
|
||||
$objectId = $searchDao->insertObject($articleId, $type, $submissionFile->getId());
|
||||
do {
|
||||
for ($buffer = ''; ($chunk = $parser->read()) !== false && strlen($buffer .= $chunk) < static::MINIMUM_DATA_LENGTH;);
|
||||
if (strlen($buffer)) {
|
||||
$this->_indexObjectKeywords($objectId, $buffer);
|
||||
}
|
||||
} while ($chunk !== false);
|
||||
} finally {
|
||||
$parser->close();
|
||||
}
|
||||
} catch (Throwable $e) {
|
||||
throw new Exception("Indexation failed for the file: \"{$submissionFile->getData('path')}\"", 0, $e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove indexed file contents for a submission
|
||||
*
|
||||
* @param Submission $submission
|
||||
*/
|
||||
public function clearSubmissionFiles($submission)
|
||||
{
|
||||
$searchDao = DAORegistry::getDAO('ArticleSearchDAO'); /** @var ArticleSearchDAO $searchDao */
|
||||
$searchDao->deleteSubmissionKeywords($submission->getId(), SubmissionSearch::SUBMISSION_SEARCH_GALLEY_FILE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Signal to the indexing back-end that all files (supplementary
|
||||
* and galley) assigned to an article changed and must be re-indexed.
|
||||
*
|
||||
* @see ArticleSearchIndex::submissionMetadataChanged() above for more
|
||||
* comments.
|
||||
*
|
||||
* @param Submission $article
|
||||
*/
|
||||
public function submissionFilesChanged($article)
|
||||
{
|
||||
// If a search plug-in is activated then skip the default database search implementation.
|
||||
if (Hook::ABORT === Hook::call('ArticleSearchIndex::submissionFilesChanged', [$article])) {
|
||||
return;
|
||||
}
|
||||
|
||||
$submissionFiles = Repo::submissionFile()
|
||||
->getCollector()
|
||||
->filterBySubmissionIds([$article->getId()])
|
||||
->filterByFileStages([SubmissionFile::SUBMISSION_FILE_PROOF])
|
||||
->getMany();
|
||||
|
||||
$exceptions = [];
|
||||
foreach ($submissionFiles as $submissionFile) {
|
||||
try {
|
||||
$this->submissionFileChanged($article->getId(), SubmissionSearch::SUBMISSION_SEARCH_GALLEY_FILE, $submissionFile);
|
||||
} catch (Throwable $e) {
|
||||
$exceptions[] = $e;
|
||||
}
|
||||
$dependentFiles = Repo::submissionFile()->getCollector()
|
||||
->filterByAssoc(
|
||||
PKPApplication::ASSOC_TYPE_SUBMISSION_FILE,
|
||||
[$submissionFile->getId()]
|
||||
)
|
||||
->filterBySubmissionIds([$article->getId()])
|
||||
->filterByFileStages([SubmissionFile::SUBMISSION_FILE_DEPENDENT])
|
||||
->includeDependentFiles()
|
||||
->getMany();
|
||||
|
||||
foreach ($dependentFiles as $dependentFile) {
|
||||
try {
|
||||
$this->submissionFileChanged($article->getId(), SubmissionSearch::SUBMISSION_SEARCH_SUPPLEMENTARY_FILE, $dependentFile);
|
||||
} catch (Throwable $e) {
|
||||
$exceptions[] = $e;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (count($exceptions)) {
|
||||
$errorMessage = implode("\n\n", $exceptions);
|
||||
throw new Exception("The following errors happened while indexing the submission ID {$article->getId()}:\n{$errorMessage}");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Signal to the indexing back-end that a file was deleted.
|
||||
*
|
||||
* @see ArticleSearchIndex::submissionMetadataChanged() above for more
|
||||
* comments.
|
||||
*
|
||||
* @param int $articleId
|
||||
* @param int $type optional
|
||||
* @param int $assocId optional
|
||||
*/
|
||||
public function submissionFileDeleted($articleId, $type = null, $assocId = null)
|
||||
{
|
||||
// If a search plug-in is activated then skip the default database search implementation.
|
||||
if (Hook::ABORT === Hook::call('ArticleSearchIndex::submissionFileDeleted', [$articleId, $type, $assocId])) {
|
||||
return;
|
||||
}
|
||||
|
||||
$searchDao = DAORegistry::getDAO('ArticleSearchDAO'); /** @var ArticleSearchDAO $searchDao */
|
||||
return $searchDao->deleteSubmissionKeywords($articleId, $type, $assocId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Signal to the indexing back-end that the metadata of
|
||||
* a supplementary file changed.
|
||||
*
|
||||
* @see ArticleSearchIndex::submissionMetadataChanged() above for more
|
||||
* comments.
|
||||
*
|
||||
* @param int $articleId
|
||||
*/
|
||||
public function articleDeleted($articleId)
|
||||
{
|
||||
// Trigger a hook to let the indexing back-end know that
|
||||
// an article was deleted.
|
||||
Hook::call(
|
||||
'ArticleSearchIndex::articleDeleted',
|
||||
[$articleId]
|
||||
);
|
||||
|
||||
// The default indexing back-end does nothing when an
|
||||
// article is deleted (FIXME?).
|
||||
}
|
||||
|
||||
/**
|
||||
* @copydoc SubmissionSearchIndex::submissionChangesFinished()
|
||||
*/
|
||||
public function submissionChangesFinished()
|
||||
{
|
||||
// Trigger a hook to let the indexing back-end know that
|
||||
// the index may be updated.
|
||||
Hook::call(
|
||||
'ArticleSearchIndex::articleChangesFinished'
|
||||
);
|
||||
|
||||
// The default indexing back-end works completely synchronously
|
||||
// and will therefore not do anything here.
|
||||
}
|
||||
|
||||
/**
|
||||
* @copydoc SubmissionSearchIndex::submissionChangesFinished()
|
||||
*/
|
||||
public function articleChangesFinished()
|
||||
{
|
||||
if (Config::getVar('debug', 'deprecation_warnings')) {
|
||||
trigger_error('Deprecated call to articleChangesFinished. Use submissionChangesFinished instead.');
|
||||
}
|
||||
$this->submissionChangesFinished();
|
||||
}
|
||||
|
||||
/**
|
||||
* Rebuild the search index for one or all journals.
|
||||
*
|
||||
* @param bool $log Whether to display status information
|
||||
* to stdout.
|
||||
* @param Journal $journal If given the user wishes to
|
||||
* re-index only one journal. Not all search implementations
|
||||
* may be able to do so. Most notably: The default SQL
|
||||
* implementation does not support journal-specific re-indexing
|
||||
* as index data is not partitioned by journal.
|
||||
* @param array $switches Optional index administration switches.
|
||||
*/
|
||||
public function rebuildIndex($log = false, $journal = null, $switches = [])
|
||||
{
|
||||
// If a search plug-in is activated then skip the default database search implementation.
|
||||
if (Hook::ABORT === Hook::call('ArticleSearchIndex::rebuildIndex', [$log, $journal, $switches])) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check that no journal was given as we do
|
||||
// not support journal-specific re-indexing.
|
||||
if (is_a($journal, 'Journal')) {
|
||||
exit(__('search.cli.rebuildIndex.indexingByJournalNotSupported') . "\n");
|
||||
}
|
||||
|
||||
// Clear index
|
||||
if ($log) {
|
||||
echo __('search.cli.rebuildIndex.clearingIndex') . ' ... ';
|
||||
}
|
||||
$searchDao = DAORegistry::getDAO('ArticleSearchDAO'); /** @var ArticleSearchDAO $searchDao */
|
||||
$searchDao->clearIndex();
|
||||
if ($log) {
|
||||
echo __('search.cli.rebuildIndex.done') . "\n";
|
||||
}
|
||||
|
||||
// Build index
|
||||
$journalDao = DAORegistry::getDAO('JournalDAO'); /** @var JournalDAO $journalDao */
|
||||
|
||||
$journals = $journalDao->getAll()->toIterator();
|
||||
foreach ($journals as $journal) {
|
||||
$numIndexed = 0;
|
||||
|
||||
if ($log) {
|
||||
echo __('search.cli.rebuildIndex.indexing', ['journalName' => $journal->getLocalizedName()]) . ' ... ';
|
||||
}
|
||||
|
||||
$submissions = Repo::submission()
|
||||
->getCollector()
|
||||
->filterByContextIds([$journal->getId()])
|
||||
->getMany();
|
||||
|
||||
foreach ($submissions as $submission) {
|
||||
dispatch(new UpdateSubmissionSearchJob($submission->getId()));
|
||||
++$numIndexed;
|
||||
}
|
||||
|
||||
if ($log) {
|
||||
echo __('search.cli.rebuildIndex.result', ['numIndexed' => $numIndexed]) . "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Private helper methods
|
||||
//
|
||||
/**
|
||||
* Index a block of text for an object.
|
||||
*
|
||||
* @param int $objectId
|
||||
* @param string|array $text
|
||||
*/
|
||||
protected function _indexObjectKeywords($objectId, $text)
|
||||
{
|
||||
$searchDao = DAORegistry::getDAO('ArticleSearchDAO'); /** @var ArticleSearchDAO $searchDao */
|
||||
$keywords = $this->filterKeywords($text);
|
||||
$searchDao->insertObjectKeywords($objectId, $keywords);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a block of text to the search index.
|
||||
*
|
||||
* @param int $articleId
|
||||
* @param int $type
|
||||
* @param string|array $text
|
||||
* @param int $assocId optional
|
||||
*/
|
||||
protected function _updateTextIndex($articleId, $type, $text, $assocId = null)
|
||||
{
|
||||
$searchDao = DAORegistry::getDAO('ArticleSearchDAO'); /** @var ArticleSearchDAO $searchDao */
|
||||
$objectId = $searchDao->insertObject($articleId, $type, $assocId);
|
||||
$this->_indexObjectKeywords($objectId, $text);
|
||||
}
|
||||
|
||||
/**
|
||||
* Flattens array of localized fields to a single, non-associative array of items
|
||||
*
|
||||
* @param array $arrayWithLocales Array of localized fields
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
protected function _flattenLocalizedArray($arrayWithLocales)
|
||||
{
|
||||
$flattenedArray = [];
|
||||
foreach ($arrayWithLocales as $localeArray) {
|
||||
$flattenedArray = array_merge(
|
||||
$flattenedArray,
|
||||
$localeArray
|
||||
);
|
||||
}
|
||||
return $flattenedArray;
|
||||
}
|
||||
}
|
||||
|
||||
if (!PKP_STRICT_MODE) {
|
||||
class_alias('\APP\search\ArticleSearchIndex', '\ArticleSearchIndex');
|
||||
}
|
||||
Reference in New Issue
Block a user