fix: make background scan job compatible with sharding

Signed-off-by: Robin Appelman <robin@icewind.nl>
This commit is contained in:
Robin Appelman 2024-07-25 18:46:47 +02:00 committed by Louis
parent 1b6d76aa1b
commit 550072342b

View file

@ -33,11 +33,11 @@ class ScanFiles extends TimedJob {
public const USERS_PER_SESSION = 500;
public function __construct(
IConfig $config,
IConfig $config,
IEventDispatcher $dispatcher,
LoggerInterface $logger,
IDBConnection $connection,
ITimeFactory $time
LoggerInterface $logger,
IDBConnection $connection,
ITimeFactory $time
) {
parent::__construct($time);
// Run once per 10 minutes
@ -70,15 +70,61 @@ class ScanFiles extends TimedJob {
* @return string|false
*/
private function getUserToScan() {
$query = $this->connection->getQueryBuilder();
$query->select('user_id')
->from('filecache', 'f')
->innerJoin('f', 'mounts', 'm', $query->expr()->eq('storage_id', 'storage'))
->where($query->expr()->lt('size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)))
->andWhere($query->expr()->gt('parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT)))
->setMaxResults(1);
if ($this->connection->getShardDefinition("filecache")) {
// for sharded filecache, the "LIMIT" from the normal query doesn't work
return $query->executeQuery()->fetchOne();
// first we try it with a "LEFT JOIN" on mounts, this is fast, but might return a storage that isn't mounted.
// we also ask for up to 10 results from different storages to increase the odds of finding a result that is mounted
$query = $this->connection->getQueryBuilder();
$query->select('m.user_id')
->from('filecache', 'f')
->leftJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage'))
->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)))
->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT)))
->setMaxResults(10)
->groupBy("f.storage")
->runAcrossAllShards();
$result = $query->executeQuery();
while ($res = $result->fetch()) {
if ($res['user_id']) {
return $res['user_id'];
}
}
// as a fallback, we try a slower approach where we find all mounted storages first
// this is essentially doing the inner join manually
$storages = $this->getAllMountedStorages();
$query = $this->connection->getQueryBuilder();
$query->select('m.user_id')
->from('filecache', 'f')
->leftJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage'))
->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)))
->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT)))
->andWhere($query->expr()->in('f.storage', $query->createNamedParameter($storages, IQueryBuilder::PARAM_INT_ARRAY)))
->setMaxResults(1)
->runAcrossAllShards();
return $query->executeQuery()->fetchOne();
} else {
$query = $this->connection->getQueryBuilder();
$query->select('m.user_id')
->from('filecache', 'f')
->innerJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage'))
->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)))
->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT)))
->setMaxResults(1)
->runAcrossAllShards();
return $query->executeQuery()->fetchOne();
}
}
private function getAllMountedStorages(): array {
$query = $this->connection->getQueryBuilder();
$query->selectDistinct('storage_id')
->from('mounts');
return $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
}
/**