180 lines
6.1 KiB
PHP
180 lines
6.1 KiB
PHP
<?php
|
|
|
|
/**
|
|
* @file classes/search/SubmissionSearchDAO.php
|
|
*
|
|
* Copyright (c) 2014-2021 Simon Fraser University
|
|
* Copyright (c) 2003-2021 John Willinsky
|
|
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
|
*
|
|
* @class SubmissionSearchDAO
|
|
*
|
|
* @ingroup search
|
|
*
|
|
* @see SubmissionSearch
|
|
*
|
|
* @brief DAO class for submission search index.
|
|
*/
|
|
|
|
namespace PKP\search;
|
|
|
|
use Illuminate\Database\Query\Builder;
|
|
use Illuminate\Support\Collection;
|
|
use Illuminate\Support\Facades\DB;
|
|
use PKP\core\PKPString;
|
|
|
|
class SubmissionSearchDAO extends \PKP\db\DAO
|
|
{
|
|
/**
|
|
* Delete all keywords for a submission.
|
|
*
|
|
* @param int $submissionId
|
|
* @param int $type optional
|
|
* @param int $assocId optional
|
|
*/
|
|
public function deleteSubmissionKeywords($submissionId, $type = null, $assocId = null)
|
|
{
|
|
DB::table('submission_search_objects')
|
|
->where('submission_id', '=', $submissionId)
|
|
->when(isset($type), fn (Builder $query) => $query->where('type', '=', $type))
|
|
->when(isset($assocId), fn (Builder $query) => $query->where('assoc_id', '=', $assocId))
|
|
->delete();
|
|
}
|
|
|
|
/**
|
|
* Add a submission object to the index (if already exists, indexed keywords are cleared).
|
|
*
|
|
* @param int $submissionId
|
|
* @param int $type
|
|
* @param ?int $assocId
|
|
*
|
|
* @return int the object ID
|
|
*/
|
|
public function insertObject($submissionId, $type, $assocId)
|
|
{
|
|
$objectId = DB::table('submission_search_objects')
|
|
->where('submission_id', '=', $submissionId)
|
|
->where('type', '=', $type)
|
|
->when($assocId !== null, fn (Builder $query) => $query->where('assoc_id', '=', $assocId))
|
|
->value('object_id');
|
|
|
|
if ($objectId) {
|
|
// Clear the old keywords
|
|
DB::table('submission_search_object_keywords')
|
|
->where('object_id', '=', $objectId)
|
|
->delete();
|
|
return $objectId;
|
|
}
|
|
|
|
return DB::table('submission_search_objects')->insertGetId([
|
|
'submission_id' => $submissionId,
|
|
'type' => $type,
|
|
'assoc_id' => $assocId
|
|
], 'object_id');
|
|
}
|
|
|
|
/**
|
|
* Index an occurrence of a keyword in an object.
|
|
*/
|
|
public function insertObjectKeywords(int $objectId, array $keywords): void
|
|
{
|
|
/** @var array<string,?int> */
|
|
static $keywordMap = [];
|
|
|
|
// Discard long keywords
|
|
$keywords = collect($keywords)
|
|
->filter(fn (string $keyword) => PKPString::strlen($keyword) <= SubmissionSearchIndex::SEARCH_KEYWORD_MAX_LENGTH);
|
|
|
|
// Quit if there's no keywords
|
|
if (!$keywords->count()) {
|
|
return;
|
|
}
|
|
|
|
$chunkedUnmappedKeywords = $keywords
|
|
// Skip mapped keywords
|
|
->diff(array_keys($keywordMap))
|
|
// Chunk by 1000
|
|
->chunk(1000);
|
|
|
|
$chunkedUnmappedKeywords->map(function (Collection $keywords) use (&$keywordMap) {
|
|
$missingKeywords = collect();
|
|
// Update the map with the existing IDs. Due to the database collation, very similar keywords might end up with the same ID
|
|
foreach ($this->getKeywordIdMap($keywords) as $keyword => $id) {
|
|
if ($id) {
|
|
$keywordMap[$keyword] = $id;
|
|
} else {
|
|
$missingKeywords->push($keyword);
|
|
}
|
|
}
|
|
|
|
// Batch insert keywords that don't exist using the "ignore" feature to deal with collation issues (e.g. attempt to insert "a" and "ã" at the same time might fail)
|
|
// This isn't executed first just to avoid "burning" IDs due to existing keywords
|
|
DB::table('submission_search_keyword_list')->insertOrIgnore(
|
|
$missingKeywords
|
|
->map(fn (string $keyword) => ['keyword_text' => $keyword])
|
|
->toArray()
|
|
);
|
|
|
|
// Grab the the map with the new IDs
|
|
foreach ($this->getKeywordIdMap($missingKeywords) as $keyword => $id) {
|
|
$keywordMap[$keyword] = $id;
|
|
}
|
|
});
|
|
|
|
// Get the current position
|
|
$position = DB::table('submission_search_object_keywords')
|
|
->where('object_id', $objectId)
|
|
->max('pos') ?? -1;
|
|
|
|
$keywords
|
|
// Skip missed keywords (probably not needed, present for correctness)
|
|
->filter(fn (string $keyword) => isset($keywordMap[$keyword]))
|
|
// Convert to batch insert format
|
|
->map(function (string $keyword) use (&$position, $objectId, $keywordMap) {
|
|
return [
|
|
'object_id' => $objectId,
|
|
'keyword_id' => $keywordMap[$keyword],
|
|
'pos' => ++$position
|
|
];
|
|
})
|
|
// Chunk by 1000
|
|
->chunk(1000)
|
|
// Batch insert
|
|
->map(fn (Collection $data) => DB::table('submission_search_object_keywords')->insert($data->toArray()));
|
|
}
|
|
|
|
/**
|
|
* Clear the search index.
|
|
*/
|
|
public function clearIndex()
|
|
{
|
|
DB::table('submission_search_objects')->delete();
|
|
DB::table('submission_search_keyword_list')->delete();
|
|
}
|
|
|
|
/**
|
|
* Retrieves a keyword => ID map for the given keywords
|
|
*
|
|
* @param Collection<int,string>
|
|
* @return Collection<string,int>
|
|
*/
|
|
private function getKeywordIdMap(Collection $keywords): Collection
|
|
{
|
|
if (!$keywords->count()) {
|
|
return collect();
|
|
}
|
|
|
|
// Generates a temporary keyword table (sequence of "SELECT ? AS keyword UNION ALL SELECT ?...")
|
|
return DB::table(
|
|
DB::raw('(SELECT ? AS keyword' . str_repeat(' UNION ALL SELECT ?', $keywords->count() - 1) . ') AS tmp')
|
|
)
|
|
->setBindings($keywords->toArray(), 'from')
|
|
->leftJoin('submission_search_keyword_list AS sskl', 'sskl.keyword_text', '=', 'tmp.keyword')
|
|
->pluck('sskl.keyword_id', 'tmp.keyword');
|
|
}
|
|
}
|
|
|
|
if (!PKP_STRICT_MODE) {
|
|
class_alias('\PKP\search\SubmissionSearchDAO', '\SubmissionSearchDAO');
|
|
}
|