first commit
This commit is contained in:
@@ -0,0 +1,179 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file classes/search/SubmissionSearchDAO.php
|
||||
*
|
||||
* Copyright (c) 2014-2021 Simon Fraser University
|
||||
* Copyright (c) 2003-2021 John Willinsky
|
||||
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
||||
*
|
||||
* @class SubmissionSearchDAO
|
||||
*
|
||||
* @ingroup search
|
||||
*
|
||||
* @see SubmissionSearch
|
||||
*
|
||||
* @brief DAO class for submission search index.
|
||||
*/
|
||||
|
||||
namespace PKP\search;
|
||||
|
||||
use Illuminate\Database\Query\Builder;
|
||||
use Illuminate\Support\Collection;
|
||||
use Illuminate\Support\Facades\DB;
|
||||
use PKP\core\PKPString;
|
||||
|
||||
class SubmissionSearchDAO extends \PKP\db\DAO
|
||||
{
|
||||
/**
|
||||
* Delete all keywords for a submission.
|
||||
*
|
||||
* @param int $submissionId
|
||||
* @param int $type optional
|
||||
* @param int $assocId optional
|
||||
*/
|
||||
public function deleteSubmissionKeywords($submissionId, $type = null, $assocId = null)
|
||||
{
|
||||
DB::table('submission_search_objects')
|
||||
->where('submission_id', '=', $submissionId)
|
||||
->when(isset($type), fn (Builder $query) => $query->where('type', '=', $type))
|
||||
->when(isset($assocId), fn (Builder $query) => $query->where('assoc_id', '=', $assocId))
|
||||
->delete();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a submission object to the index (if already exists, indexed keywords are cleared).
|
||||
*
|
||||
* @param int $submissionId
|
||||
* @param int $type
|
||||
* @param ?int $assocId
|
||||
*
|
||||
* @return int the object ID
|
||||
*/
|
||||
public function insertObject($submissionId, $type, $assocId)
|
||||
{
|
||||
$objectId = DB::table('submission_search_objects')
|
||||
->where('submission_id', '=', $submissionId)
|
||||
->where('type', '=', $type)
|
||||
->when($assocId !== null, fn (Builder $query) => $query->where('assoc_id', '=', $assocId))
|
||||
->value('object_id');
|
||||
|
||||
if ($objectId) {
|
||||
// Clear the old keywords
|
||||
DB::table('submission_search_object_keywords')
|
||||
->where('object_id', '=', $objectId)
|
||||
->delete();
|
||||
return $objectId;
|
||||
}
|
||||
|
||||
return DB::table('submission_search_objects')->insertGetId([
|
||||
'submission_id' => $submissionId,
|
||||
'type' => $type,
|
||||
'assoc_id' => $assocId
|
||||
], 'object_id');
|
||||
}
|
||||
|
||||
/**
|
||||
* Index an occurrence of a keyword in an object.
|
||||
*/
|
||||
public function insertObjectKeywords(int $objectId, array $keywords): void
|
||||
{
|
||||
/** @var array<string,?int> */
|
||||
static $keywordMap = [];
|
||||
|
||||
// Discard long keywords
|
||||
$keywords = collect($keywords)
|
||||
->filter(fn (string $keyword) => PKPString::strlen($keyword) <= SubmissionSearchIndex::SEARCH_KEYWORD_MAX_LENGTH);
|
||||
|
||||
// Quit if there's no keywords
|
||||
if (!$keywords->count()) {
|
||||
return;
|
||||
}
|
||||
|
||||
$chunkedUnmappedKeywords = $keywords
|
||||
// Skip mapped keywords
|
||||
->diff(array_keys($keywordMap))
|
||||
// Chunk by 1000
|
||||
->chunk(1000);
|
||||
|
||||
$chunkedUnmappedKeywords->map(function (Collection $keywords) use (&$keywordMap) {
|
||||
$missingKeywords = collect();
|
||||
// Update the map with the existing IDs. Due to the database collation, very similar keywords might end up with the same ID
|
||||
foreach ($this->getKeywordIdMap($keywords) as $keyword => $id) {
|
||||
if ($id) {
|
||||
$keywordMap[$keyword] = $id;
|
||||
} else {
|
||||
$missingKeywords->push($keyword);
|
||||
}
|
||||
}
|
||||
|
||||
// Batch insert keywords that don't exist using the "ignore" feature to deal with collation issues (e.g. attempt to insert "a" and "ã" at the same time might fail)
|
||||
// This isn't executed first just to avoid "burning" IDs due to existing keywords
|
||||
DB::table('submission_search_keyword_list')->insertOrIgnore(
|
||||
$missingKeywords
|
||||
->map(fn (string $keyword) => ['keyword_text' => $keyword])
|
||||
->toArray()
|
||||
);
|
||||
|
||||
// Grab the the map with the new IDs
|
||||
foreach ($this->getKeywordIdMap($missingKeywords) as $keyword => $id) {
|
||||
$keywordMap[$keyword] = $id;
|
||||
}
|
||||
});
|
||||
|
||||
// Get the current position
|
||||
$position = DB::table('submission_search_object_keywords')
|
||||
->where('object_id', $objectId)
|
||||
->max('pos') ?? -1;
|
||||
|
||||
$keywords
|
||||
// Skip missed keywords (probably not needed, present for correctness)
|
||||
->filter(fn (string $keyword) => isset($keywordMap[$keyword]))
|
||||
// Convert to batch insert format
|
||||
->map(function (string $keyword) use (&$position, $objectId, $keywordMap) {
|
||||
return [
|
||||
'object_id' => $objectId,
|
||||
'keyword_id' => $keywordMap[$keyword],
|
||||
'pos' => ++$position
|
||||
];
|
||||
})
|
||||
// Chunk by 1000
|
||||
->chunk(1000)
|
||||
// Batch insert
|
||||
->map(fn (Collection $data) => DB::table('submission_search_object_keywords')->insert($data->toArray()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the search index.
|
||||
*/
|
||||
public function clearIndex()
|
||||
{
|
||||
DB::table('submission_search_objects')->delete();
|
||||
DB::table('submission_search_keyword_list')->delete();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a keyword => ID map for the given keywords
|
||||
*
|
||||
* @param Collection<int,string>
|
||||
* @return Collection<string,int>
|
||||
*/
|
||||
private function getKeywordIdMap(Collection $keywords): Collection
|
||||
{
|
||||
if (!$keywords->count()) {
|
||||
return collect();
|
||||
}
|
||||
|
||||
// Generates a temporary keyword table (sequence of "SELECT ? AS keyword UNION ALL SELECT ?...")
|
||||
return DB::table(
|
||||
DB::raw('(SELECT ? AS keyword' . str_repeat(' UNION ALL SELECT ?', $keywords->count() - 1) . ') AS tmp')
|
||||
)
|
||||
->setBindings($keywords->toArray(), 'from')
|
||||
->leftJoin('submission_search_keyword_list AS sskl', 'sskl.keyword_text', '=', 'tmp.keyword')
|
||||
->pluck('sskl.keyword_id', 'tmp.keyword');
|
||||
}
|
||||
}
|
||||
|
||||
if (!PKP_STRICT_MODE) {
|
||||
class_alias('\PKP\search\SubmissionSearchDAO', '\SubmissionSearchDAO');
|
||||
}
|
||||
Reference in New Issue
Block a user