434 lines
15 KiB
PHP
434 lines
15 KiB
PHP
<?php
|
|
|
|
/**
|
|
* @file classes/search/SubmissionSearch.php
|
|
*
|
|
* Copyright (c) 2014-2021 Simon Fraser University
|
|
* Copyright (c) 2003-2021 John Willinsky
|
|
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
|
*
|
|
* @class SubmissionSearch
|
|
*
|
|
* @ingroup search
|
|
*
|
|
* @see SubmissionSearchDAO
|
|
*
|
|
* @brief Class for retrieving search results.
|
|
*
|
|
* FIXME: NEAR; precedence w/o parens?; stemming; weighted counting
|
|
*/
|
|
|
|
namespace PKP\search;
|
|
|
|
use APP\core\Application;
|
|
use APP\core\Request;
|
|
use PKP\config\Config;
|
|
use PKP\context\Context;
|
|
use PKP\core\PKPString;
|
|
use PKP\core\VirtualArrayIterator;
|
|
use PKP\db\DAO;
|
|
use PKP\plugins\Hook;
|
|
use PKP\user\User;
|
|
|
|
abstract class SubmissionSearch
|
|
{
|
|
// Search types
|
|
public const SUBMISSION_SEARCH_AUTHOR = 1;
|
|
public const SUBMISSION_SEARCH_TITLE = 2;
|
|
public const SUBMISSION_SEARCH_ABSTRACT = 4;
|
|
public const SUBMISSION_SEARCH_DISCIPLINE = 8;
|
|
public const SUBMISSION_SEARCH_SUBJECT = 16;
|
|
public const SUBMISSION_SEARCH_KEYWORD = 17;
|
|
public const SUBMISSION_SEARCH_TYPE = 32;
|
|
public const SUBMISSION_SEARCH_COVERAGE = 64;
|
|
public const SUBMISSION_SEARCH_GALLEY_FILE = 128;
|
|
public const SUBMISSION_SEARCH_SUPPLEMENTARY_FILE = 256;
|
|
public const SUBMISSION_SEARCH_INDEX_TERMS = 120;
|
|
|
|
public const SUBMISSION_SEARCH_DEFAULT_RESULT_LIMIT = 20;
|
|
|
|
/**
|
|
* Constructor
|
|
*/
|
|
public function __construct()
|
|
{
|
|
}
|
|
|
|
/**
|
|
* Parses a search query string.
|
|
* Supports +/-, AND/OR, parens
|
|
*
|
|
* @param string $query
|
|
*
|
|
* @return array of the form ('+' => <required>, '' => <optional>, '-' => excluded)
|
|
*/
|
|
public function _parseQuery($query)
|
|
{
|
|
$count = PKPString::regexp_match_all('/(\+|\-|)("[^"]+"|\(|\)|[^\s\)]+)/', $query, $matches);
|
|
$pos = 0;
|
|
return $this->_parseQueryInternal($matches[1], $matches[2], $pos, $count);
|
|
}
|
|
|
|
/**
|
|
* Query parsing helper routine.
|
|
* Returned structure is based on that used by the Search::QueryParser Perl module.
|
|
*/
|
|
public function _parseQueryInternal($signTokens, $tokens, &$pos, $total)
|
|
{
|
|
$return = ['+' => [], '' => [], '-' => []];
|
|
$postBool = $preBool = '';
|
|
|
|
$submissionSearchIndex = Application::getSubmissionSearchIndex();
|
|
|
|
$notOperator = PKPString::strtolower(__('search.operator.not'));
|
|
$andOperator = PKPString::strtolower(__('search.operator.and'));
|
|
$orOperator = PKPString::strtolower(__('search.operator.or'));
|
|
while ($pos < $total) {
|
|
if (!empty($signTokens[$pos])) {
|
|
$sign = $signTokens[$pos];
|
|
} elseif (empty($sign)) {
|
|
$sign = '+';
|
|
}
|
|
$token = PKPString::strtolower($tokens[$pos++]);
|
|
switch ($token) {
|
|
case $notOperator:
|
|
$sign = '-';
|
|
break;
|
|
case ')':
|
|
return $return;
|
|
case '(':
|
|
$token = $this->_parseQueryInternal($signTokens, $tokens, $pos, $total);
|
|
// no break
|
|
default:
|
|
$postBool = '';
|
|
if ($pos < $total) {
|
|
$peek = PKPString::strtolower($tokens[$pos]);
|
|
if ($peek == $orOperator) {
|
|
$postBool = 'or';
|
|
$pos++;
|
|
} elseif ($peek == $andOperator) {
|
|
$postBool = 'and';
|
|
$pos++;
|
|
}
|
|
}
|
|
$bool = empty($postBool) ? $preBool : $postBool;
|
|
$preBool = $postBool;
|
|
if ($bool == 'or') {
|
|
$sign = '';
|
|
}
|
|
if (is_array($token)) {
|
|
$k = $token;
|
|
} else {
|
|
$k = $submissionSearchIndex->filterKeywords($token, true);
|
|
}
|
|
if (!empty($k)) {
|
|
$return[$sign][] = $k;
|
|
}
|
|
$sign = '';
|
|
break;
|
|
}
|
|
}
|
|
return $return;
|
|
}
|
|
|
|
/**
|
|
* Takes an unordered list of search result data, flattens it, orders it
|
|
* and excludes unwanted results.
|
|
*
|
|
* @return array An ordered and flattened list of article IDs.
|
|
*/
|
|
public function _getMergedArray($context, &$keywords, $publishedFrom, $publishedTo)
|
|
{
|
|
$resultsPerKeyword = Config::getVar('search', 'results_per_keyword', 100);
|
|
|
|
$mergedKeywords = ['+' => [], '' => [], '-' => []];
|
|
foreach ($keywords as $type => $keyword) {
|
|
if (!empty($keyword['+'])) {
|
|
$mergedKeywords['+'][] = ['type' => $type, '+' => $keyword['+'], '' => [], '-' => []];
|
|
}
|
|
if (!empty($keyword[''])) {
|
|
$mergedKeywords[''][] = ['type' => $type, '+' => [], '' => $keyword[''], '-' => []];
|
|
}
|
|
if (!empty($keyword['-'])) {
|
|
$mergedKeywords['-'][] = ['type' => $type, '+' => [], '' => $keyword['-'], '-' => []];
|
|
}
|
|
}
|
|
return $this->_getMergedKeywordResults($context, $mergedKeywords, null, $publishedFrom, $publishedTo, $resultsPerKeyword);
|
|
}
|
|
|
|
/**
|
|
* Recursive helper for _getMergedArray.
|
|
*/
|
|
public function _getMergedKeywordResults($context, &$keyword, $type, $publishedFrom, $publishedTo, $resultsPerKeyword)
|
|
{
|
|
$mergedResults = null;
|
|
|
|
if (isset($keyword['type'])) {
|
|
$type = $keyword['type'];
|
|
}
|
|
|
|
foreach ($keyword['+'] as $phrase) {
|
|
$results = $this->_getMergedPhraseResults($context, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword);
|
|
if ($mergedResults === null) {
|
|
$mergedResults = $results;
|
|
} else {
|
|
foreach ($mergedResults as $submissionId => $data) {
|
|
if (isset($results[$submissionId])) {
|
|
$mergedResults[$submissionId]['count'] += $results[$submissionId]['count'];
|
|
} else {
|
|
unset($mergedResults[$submissionId]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($mergedResults == null) {
|
|
$mergedResults = [];
|
|
}
|
|
|
|
if (!empty($mergedResults) || empty($keyword['+'])) {
|
|
foreach ($keyword[''] as $phrase) {
|
|
$results = $this->_getMergedPhraseResults($context, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword);
|
|
foreach ($results as $submissionId => $data) {
|
|
if (isset($mergedResults[$submissionId])) {
|
|
$mergedResults[$submissionId]['count'] += $data['count'];
|
|
} elseif (empty($keyword['+'])) {
|
|
$mergedResults[$submissionId] = $data;
|
|
}
|
|
}
|
|
}
|
|
|
|
foreach ($keyword['-'] as $phrase) {
|
|
$results = $this->_getMergedPhraseResults($context, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword);
|
|
foreach ($results as $submissionId => $count) {
|
|
if (isset($mergedResults[$submissionId])) {
|
|
unset($mergedResults[$submissionId]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return $mergedResults;
|
|
}
|
|
|
|
/**
|
|
* Recursive helper for _getMergedArray.
|
|
*/
|
|
protected function _getMergedPhraseResults($context, &$phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword)
|
|
{
|
|
if (isset($phrase['+'])) {
|
|
return $this->_getMergedKeywordResults($context, $phrase, $type, $publishedFrom, $publishedTo, $resultsPerKeyword);
|
|
}
|
|
|
|
return $this->getSearchDao()->getPhraseResults(
|
|
$context,
|
|
$phrase,
|
|
$publishedFrom,
|
|
$publishedTo,
|
|
$type,
|
|
$resultsPerKeyword
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Return an array of search results matching the supplied
|
|
* keyword IDs in decreasing order of match quality.
|
|
* Keywords are supplied in an array of the following format:
|
|
* $keywords[SUBMISSION_SEARCH_AUTHOR] = array('John', 'Doe');
|
|
* $keywords[SUBMISSION_SEARCH_...] = array(...);
|
|
* $keywords[null] = array('Matches', 'All', 'Fields');
|
|
*
|
|
* @param Request $request
|
|
* @param Context $context The context to search
|
|
* @param array $keywords List of keywords
|
|
* @param string $error a reference to a variable that will
|
|
* contain an error message if the search service produces
|
|
* an error.
|
|
* @param string $publishedFrom Search-from date
|
|
* @param string $publishedTo Search-to date
|
|
* @param ?\PKP\db\DBResultRange $rangeInfo Information on the range of results to return
|
|
* @param array $exclude An array of article IDs to exclude from the result.
|
|
*
|
|
* @return VirtualArrayIterator An iterator with one entry per retrieved
|
|
* article containing the article, published submission, issue, context, etc.
|
|
*/
|
|
public function retrieveResults($request, $context, $keywords, &$error, $publishedFrom = null, $publishedTo = null, $rangeInfo = null, $exclude = [])
|
|
{
|
|
// Pagination
|
|
if ($rangeInfo && $rangeInfo->isValid()) {
|
|
$page = $rangeInfo->getPage();
|
|
$itemsPerPage = $rangeInfo->getCount();
|
|
} else {
|
|
$page = 1;
|
|
$itemsPerPage = self::SUBMISSION_SEARCH_DEFAULT_RESULT_LIMIT;
|
|
}
|
|
|
|
// Result set ordering.
|
|
[$orderBy, $orderDir] = $this->getResultSetOrdering($request);
|
|
|
|
// Check whether a search plug-in jumps in to provide ranked search results.
|
|
$totalResults = null;
|
|
$results = null;
|
|
$hookResult = Hook::call(
|
|
'SubmissionSearch::retrieveResults',
|
|
[&$context, &$keywords, $publishedFrom, $publishedTo, $orderBy, $orderDir, $exclude, $page, $itemsPerPage, &$totalResults, &$error, &$results]
|
|
);
|
|
|
|
// If no search plug-in is activated then fall back to the
|
|
// default database search implementation.
|
|
if ($hookResult === false) {
|
|
// Parse the query.
|
|
foreach ($keywords as $searchType => $query) {
|
|
$keywords[$searchType] = $this->_parseQuery($query);
|
|
}
|
|
|
|
// Fetch all the results from all the keywords into one array
|
|
// (mergedResults), where mergedResults[submission_id]
|
|
// = sum of all the occurrences for all keywords associated with
|
|
// that article ID.
|
|
$mergedResults = $this->_getMergedArray($context, $keywords, $publishedFrom, $publishedTo);
|
|
|
|
// Convert mergedResults into an array (frequencyIndicator =>
|
|
// $submissionId).
|
|
// The frequencyIndicator is a synthetically-generated number,
|
|
// where higher is better, indicating the quality of the match.
|
|
// It is generated here in such a manner that matches with
|
|
// identical frequency do not collide.
|
|
$results = $this->getSparseArray($mergedResults, $orderBy, $orderDir, $exclude);
|
|
$totalResults = count($results);
|
|
|
|
// Use only the results for the specified page.
|
|
$offset = $itemsPerPage * ($page - 1);
|
|
$length = max($totalResults - $offset, 0);
|
|
$length = min($itemsPerPage, $length);
|
|
if ($length == 0) {
|
|
$results = [];
|
|
} else {
|
|
$results = array_slice(
|
|
$results,
|
|
$offset,
|
|
$length
|
|
);
|
|
}
|
|
}
|
|
|
|
// Take the range of results and retrieve the Article, Journal,
|
|
// and associated objects.
|
|
$results = $this->formatResults($results, $request->getUser());
|
|
|
|
// Return the appropriate iterator.
|
|
return new VirtualArrayIterator($results, $totalResults, $page, $itemsPerPage);
|
|
}
|
|
|
|
/**
|
|
* Return the available options for the result
|
|
* set ordering direction.
|
|
*
|
|
* @return array
|
|
*/
|
|
public function getResultSetOrderingDirectionOptions()
|
|
{
|
|
return [
|
|
'asc' => __('search.results.orderDir.asc'),
|
|
'desc' => __('search.results.orderDir.desc')
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Return the currently selected result
|
|
* set ordering option (default: descending relevance).
|
|
*
|
|
* @param Request $request
|
|
*
|
|
* @return array An array with the order field as the
|
|
* first entry and the order direction as the second
|
|
* entry.
|
|
*/
|
|
public function getResultSetOrdering($request)
|
|
{
|
|
// Order field.
|
|
$orderBy = $request->getUserVar('orderBy');
|
|
$orderByOptions = $this->getResultSetOrderingOptions($request);
|
|
if (is_null($orderBy) || !in_array($orderBy, array_keys($orderByOptions))) {
|
|
$orderBy = 'score';
|
|
}
|
|
|
|
// Ordering direction.
|
|
$orderDir = $request->getUserVar('orderDir');
|
|
$orderDirOptions = $this->getResultSetOrderingDirectionOptions();
|
|
if (is_null($orderDir) || !in_array($orderDir, array_keys($orderDirOptions))) {
|
|
$orderDir = $this->getDefaultOrderDir($orderBy);
|
|
}
|
|
|
|
return [$orderBy, $orderDir];
|
|
}
|
|
|
|
//
|
|
// Methods to be implemented by subclasses.
|
|
//
|
|
/**
|
|
* See implementation of retrieveResults for a description of this
|
|
* function.
|
|
*
|
|
* Note that this function is also called externally to fetch
|
|
* results for the title index, and possibly elsewhere.
|
|
*
|
|
* @param array $results
|
|
* @param User $user optional (if availability information is desired)
|
|
*
|
|
* @return array
|
|
*/
|
|
abstract public function formatResults($results, $user = null);
|
|
|
|
/**
|
|
* Return the available options for result set ordering.
|
|
*
|
|
* @param Request $request
|
|
*
|
|
* @return array
|
|
*/
|
|
abstract public function getResultSetOrderingOptions($request);
|
|
|
|
/**
|
|
* See implementation of retrieveResults for a description of this
|
|
* function.
|
|
*/
|
|
abstract protected function getSparseArray($unorderedResults, $orderBy, $orderDir, $exclude);
|
|
|
|
/**
|
|
* Return the default order direction.
|
|
*
|
|
* @param string $orderBy
|
|
*
|
|
* @return string
|
|
*/
|
|
abstract protected function getDefaultOrderDir($orderBy);
|
|
|
|
/**
|
|
* Return the search DAO
|
|
*
|
|
* @return DAO
|
|
*/
|
|
abstract protected function getSearchDao();
|
|
}
|
|
|
|
if (!PKP_STRICT_MODE) {
|
|
class_alias('\PKP\search\SubmissionSearch', '\SubmissionSearch');
|
|
foreach ([
|
|
'SUBMISSION_SEARCH_AUTHOR',
|
|
'SUBMISSION_SEARCH_TITLE',
|
|
'SUBMISSION_SEARCH_ABSTRACT',
|
|
'SUBMISSION_SEARCH_DISCIPLINE',
|
|
'SUBMISSION_SEARCH_SUBJECT',
|
|
'SUBMISSION_SEARCH_KEYWORD',
|
|
'SUBMISSION_SEARCH_TYPE',
|
|
'SUBMISSION_SEARCH_COVERAGE',
|
|
'SUBMISSION_SEARCH_GALLEY_FILE',
|
|
'SUBMISSION_SEARCH_SUPPLEMENTARY_FILE',
|
|
'SUBMISSION_SEARCH_INDEX_TERMS',
|
|
'SUBMISSION_SEARCH_DEFAULT_RESULT_LIMIT',
|
|
] as $constantName) {
|
|
define($constantName, constant('\SubmissionSearch::' . $constantName));
|
|
}
|
|
}
|