Merge pull request #37674 from nextcloud/feature/speech-to-text

feat(SpeechToText): Add SpeechToText OCP provider API
This commit is contained in:
Joas Schilling 2023-04-19 16:29:44 +02:00 committed by GitHub
commit fd473f89e8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 578 additions and 0 deletions

View file

@ -573,6 +573,11 @@ return array(
'OCP\\Share_Backend' => $baseDir . '/lib/public/Share_Backend.php',
'OCP\\Share_Backend_Collection' => $baseDir . '/lib/public/Share_Backend_Collection.php',
'OCP\\Share_Backend_File_Dependent' => $baseDir . '/lib/public/Share_Backend_File_Dependent.php',
'OCP\\SpeechToText\\Events\\AbstractTranscriptionEvent' => $baseDir . '/lib/public/SpeechToText/Events/AbstractTranscriptionEvent.php',
'OCP\\SpeechToText\\Events\\TranscriptionFailedEvent' => $baseDir . '/lib/public/SpeechToText/Events/TranscriptionFailedEvent.php',
'OCP\\SpeechToText\\Events\\TranscriptionSuccessfulEvent' => $baseDir . '/lib/public/SpeechToText/Events/TranscriptionSuccessfulEvent.php',
'OCP\\SpeechToText\\ISpeechToTextManager' => $baseDir . '/lib/public/SpeechToText/ISpeechToTextManager.php',
'OCP\\SpeechToText\\ISpeechToTextProvider' => $baseDir . '/lib/public/SpeechToText/ISpeechToTextProvider.php',
'OCP\\Support\\CrashReport\\ICollectBreadcrumbs' => $baseDir . '/lib/public/Support/CrashReport/ICollectBreadcrumbs.php',
'OCP\\Support\\CrashReport\\IMessageReporter' => $baseDir . '/lib/public/Support/CrashReport/IMessageReporter.php',
'OCP\\Support\\CrashReport\\IRegistry' => $baseDir . '/lib/public/Support/CrashReport/IRegistry.php',
@ -1584,6 +1589,8 @@ return array(
'OC\\Share\\Constants' => $baseDir . '/lib/private/Share/Constants.php',
'OC\\Share\\Helper' => $baseDir . '/lib/private/Share/Helper.php',
'OC\\Share\\Share' => $baseDir . '/lib/private/Share/Share.php',
'OC\\SpeechToText\\SpeechToTextManager' => $baseDir . '/lib/private/SpeechToText/SpeechToTextManager.php',
'OC\\SpeechToText\\TranscriptionJob' => $baseDir . '/lib/private/SpeechToText/TranscriptionJob.php',
'OC\\StreamImage' => $baseDir . '/lib/private/StreamImage.php',
'OC\\Streamer' => $baseDir . '/lib/private/Streamer.php',
'OC\\SubAdmin' => $baseDir . '/lib/private/SubAdmin.php',

View file

@ -606,6 +606,11 @@ class ComposerStaticInit749170dad3f5e7f9ca158f5a9f04f6a2
'OCP\\Share_Backend' => __DIR__ . '/../../..' . '/lib/public/Share_Backend.php',
'OCP\\Share_Backend_Collection' => __DIR__ . '/../../..' . '/lib/public/Share_Backend_Collection.php',
'OCP\\Share_Backend_File_Dependent' => __DIR__ . '/../../..' . '/lib/public/Share_Backend_File_Dependent.php',
'OCP\\SpeechToText\\Events\\AbstractTranscriptionEvent' => __DIR__ . '/../../..' . '/lib/public/SpeechToText/Events/AbstractTranscriptionEvent.php',
'OCP\\SpeechToText\\Events\\TranscriptionFailedEvent' => __DIR__ . '/../../..' . '/lib/public/SpeechToText/Events/TranscriptionFailedEvent.php',
'OCP\\SpeechToText\\Events\\TranscriptionSuccessfulEvent' => __DIR__ . '/../../..' . '/lib/public/SpeechToText/Events/TranscriptionSuccessfulEvent.php',
'OCP\\SpeechToText\\ISpeechToTextManager' => __DIR__ . '/../../..' . '/lib/public/SpeechToText/ISpeechToTextManager.php',
'OCP\\SpeechToText\\ISpeechToTextProvider' => __DIR__ . '/../../..' . '/lib/public/SpeechToText/ISpeechToTextProvider.php',
'OCP\\Support\\CrashReport\\ICollectBreadcrumbs' => __DIR__ . '/../../..' . '/lib/public/Support/CrashReport/ICollectBreadcrumbs.php',
'OCP\\Support\\CrashReport\\IMessageReporter' => __DIR__ . '/../../..' . '/lib/public/Support/CrashReport/IMessageReporter.php',
'OCP\\Support\\CrashReport\\IRegistry' => __DIR__ . '/../../..' . '/lib/public/Support/CrashReport/IRegistry.php',
@ -1617,6 +1622,8 @@ class ComposerStaticInit749170dad3f5e7f9ca158f5a9f04f6a2
'OC\\Share\\Constants' => __DIR__ . '/../../..' . '/lib/private/Share/Constants.php',
'OC\\Share\\Helper' => __DIR__ . '/../../..' . '/lib/private/Share/Helper.php',
'OC\\Share\\Share' => __DIR__ . '/../../..' . '/lib/private/Share/Share.php',
'OC\\SpeechToText\\SpeechToTextManager' => __DIR__ . '/../../..' . '/lib/private/SpeechToText/SpeechToTextManager.php',
'OC\\SpeechToText\\TranscriptionJob' => __DIR__ . '/../../..' . '/lib/private/SpeechToText/TranscriptionJob.php',
'OC\\StreamImage' => __DIR__ . '/../../..' . '/lib/private/StreamImage.php',
'OC\\Streamer' => __DIR__ . '/../../..' . '/lib/private/Streamer.php',
'OC\\SubAdmin' => __DIR__ . '/../../..' . '/lib/private/SubAdmin.php',

View file

@ -33,6 +33,7 @@ use Closure;
use OCP\Calendar\Resource\IBackend as IResourceBackend;
use OCP\Calendar\Room\IBackend as IRoomBackend;
use OCP\Collaboration\Reference\IReferenceProvider;
use OCP\SpeechToText\ISpeechToTextProvider;
use OCP\Talk\ITalkBackend;
use OCP\Translation\ITranslationProvider;
use RuntimeException;
@ -111,6 +112,9 @@ class RegistrationContext {
/** @var ServiceRegistration<IHandler>[] */
private $wellKnownHandlers = [];
/** @var ServiceRegistration<ISpeechToTextProvider>[] */
private $speechToTextProviders = [];
/** @var ServiceRegistration<ICustomTemplateProvider>[] */
private $templateProviders = [];
@ -252,6 +256,13 @@ class RegistrationContext {
);
}
public function registerSpeechToTextProvider(string $providerClass): void {
$this->context->registerSpeechToTextProvider(
$this->appId,
$providerClass
);
}
public function registerTemplateProvider(string $providerClass): void {
$this->context->registerTemplateProvider(
$this->appId,
@ -414,6 +425,10 @@ class RegistrationContext {
$this->wellKnownHandlers[] = new ServiceRegistration($appId, $class);
}
public function registerSpeechToTextProvider(string $appId, string $class): void {
$this->speechToTextProviders[] = new ServiceRegistration($appId, $class);
}
public function registerTemplateProvider(string $appId, string $class): void {
$this->templateProviders[] = new ServiceRegistration($appId, $class);
}
@ -685,6 +700,13 @@ class RegistrationContext {
return $this->wellKnownHandlers;
}
/**
* @return ServiceRegistration<ISpeechToTextProvider>[]
*/
public function getSpeechToTextProviders(): array {
return $this->speechToTextProviders;
}
/**
* @return ServiceRegistration<ICustomTemplateProvider>[]
*/

View file

@ -148,6 +148,7 @@ use OC\Security\VerificationToken\VerificationToken;
use OC\Session\CryptoWrapper;
use OC\Share20\ProviderFactory;
use OC\Share20\ShareHelper;
use OC\SpeechToText\SpeechToTextManager;
use OC\SystemTag\ManagerFactory as SystemTagManagerFactory;
use OC\Tagging\TagMapper;
use OC\Talk\Broker;
@ -246,6 +247,7 @@ use OCP\Security\ISecureRandom;
use OCP\Security\ITrustedDomainHelper;
use OCP\Security\VerificationToken\IVerificationToken;
use OCP\Share\IShareHelper;
use OCP\SpeechToText\ISpeechToTextManager;
use OCP\SystemTag\ISystemTagManager;
use OCP\SystemTag\ISystemTagObjectMapper;
use OCP\Talk\IBroker;
@ -1457,6 +1459,8 @@ class Server extends ServerContainer implements IServerContainer {
$this->registerAlias(ITranslationManager::class, TranslationManager::class);
$this->registerAlias(ISpeechToTextManager::class, SpeechToTextManager::class);
$this->connectDispatcher();
}

View file

@ -0,0 +1,124 @@
<?php
declare(strict_types=1);
/**
* @copyright Copyright (c) 2023 Julius Härtl <jus@bitgrid.net>
* @copyright Copyright (c) 2023 Marcel Klehr <mklehr@gmx.net>
*
* @author Julius Härtl <jus@bitgrid.net>
* @author Marcel Klehr <mklehr@gmx.net>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace OC\SpeechToText;
use InvalidArgumentException;
use OC\AppFramework\Bootstrap\Coordinator;
use OCP\BackgroundJob\IJobList;
use OCP\Files\File;
use OCP\Files\InvalidPathException;
use OCP\Files\NotFoundException;
use OCP\IServerContainer;
use OCP\PreConditionNotMetException;
use OCP\SpeechToText\ISpeechToTextManager;
use OCP\SpeechToText\ISpeechToTextProvider;
use Psr\Container\ContainerExceptionInterface;
use Psr\Container\NotFoundExceptionInterface;
use Psr\Log\LoggerInterface;
use RuntimeException;
use Throwable;
class SpeechToTextManager implements ISpeechToTextManager {
/** @var ?ISpeechToTextProvider[] */
private ?array $providers = null;
public function __construct(
private IServerContainer $serverContainer,
private Coordinator $coordinator,
private LoggerInterface $logger,
private IJobList $jobList,
) {
}
public function getProviders(): array {
$context = $this->coordinator->getRegistrationContext();
if ($context === null) {
return [];
}
if ($this->providers !== null) {
return $this->providers;
}
$this->providers = [];
foreach ($context->getSpeechToTextProviders() as $providerServiceRegistration) {
$class = $providerServiceRegistration->getService();
try {
$this->providers[$class] = $this->serverContainer->get($class);
} catch (NotFoundExceptionInterface|ContainerExceptionInterface|Throwable $e) {
$this->logger->error('Failed to load SpeechToText provider ' . $class, [
'exception' => $e,
]);
}
}
return $this->providers;
}
public function hasProviders(): bool {
$context = $this->coordinator->getRegistrationContext();
if ($context === null) {
return false;
}
return !empty($context->getSpeechToTextProviders());
}
public function scheduleFileTranscription(File $file, ?string $userId, string $appId): void {
if (!$this->hasProviders()) {
throw new PreConditionNotMetException('No SpeechToText providers have been registered');
}
try {
$this->jobList->add(TranscriptionJob::class, [
'fileId' => $file->getId(),
'owner' => $file->getOwner()->getUID(),
'userId' => $userId,
'appId' => $appId,
]);
} catch (NotFoundException|InvalidPathException $e) {
throw new InvalidArgumentException('Invalid file provided for file transcription: ' . $e->getMessage());
}
}
public function transcribeFile(File $file): string {
if (!$this->hasProviders()) {
throw new PreConditionNotMetException('No SpeechToText providers have been registered');
}
foreach ($this->getProviders() as $provider) {
try {
return $provider->transcribeFile($file);
} catch (\Throwable $e) {
$this->logger->info('SpeechToText transcription using provider ' . $provider->getName() . ' failed', ['exception' => $e]);
}
}
throw new RuntimeException('Could not transcribe file');
}
}

View file

@ -0,0 +1,104 @@
<?php
declare(strict_types=1);
/**
* @copyright Copyright (c) 2023 Marcel Klehr <mklehr@gmx.net>
*
* @author Marcel Klehr <mklehr@gmx.net>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace OC\SpeechToText;
use OC\User\NoUserException;
use OCP\AppFramework\Utility\ITimeFactory;
use OCP\BackgroundJob\QueuedJob;
use OCP\EventDispatcher\IEventDispatcher;
use OCP\Files\File;
use OCP\Files\IRootFolder;
use OCP\Files\NotFoundException;
use OCP\Files\NotPermittedException;
use OCP\PreConditionNotMetException;
use OCP\SpeechToText\Events\TranscriptionFailedEvent;
use OCP\SpeechToText\Events\TranscriptionSuccessfulEvent;
use OCP\SpeechToText\ISpeechToTextManager;
use Psr\Log\LoggerInterface;
class TranscriptionJob extends QueuedJob {
public function __construct(
ITimeFactory $timeFactory,
private ISpeechToTextManager $speechToTextManager,
private IEventDispatcher $eventDispatcher,
private IRootFolder $rootFolder,
private LoggerInterface $logger,
) {
parent::__construct($timeFactory);
}
/**
* @inheritDoc
*/
protected function run($argument) {
$fileId = $argument['fileId'];
$owner = $argument['owner'];
$userId = $argument['userId'];
$appId = $argument['appId'];
$file = null;
try {
\OC_Util::setupFS($owner);
$userFolder = $this->rootFolder->getUserFolder($owner);
$file = current($userFolder->getById($fileId));
if (!($file instanceof File)) {
$this->logger->warning('Transcription of file ' . $fileId . ' failed. The file could not be found');
$this->eventDispatcher->dispatchTyped(
new TranscriptionFailedEvent(
$fileId,
null,
'File not found',
$userId,
$appId,
)
);
return;
}
$result = $this->speechToTextManager->transcribeFile($file);
$this->eventDispatcher->dispatchTyped(
new TranscriptionSuccessfulEvent(
$fileId,
$file,
$result,
$userId,
$appId,
)
);
} catch (PreConditionNotMetException|\RuntimeException|\InvalidArgumentException|NotFoundException|NotPermittedException|NoUserException $e) {
$this->logger->warning('Transcription of file ' . $fileId . ' failed', ['exception' => $e]);
$this->eventDispatcher->dispatchTyped(
new TranscriptionFailedEvent(
$fileId,
$file,
$e->getMessage(),
$userId,
$appId,
)
);
}
}
}

View file

@ -39,6 +39,7 @@ use OCP\Files\Template\ICustomTemplateProvider;
use OCP\IContainer;
use OCP\Notification\INotifier;
use OCP\Preview\IProviderV2;
use OCP\SpeechToText\ISpeechToTextProvider;
use OCP\Translation\ITranslationProvider;
/**
@ -208,6 +209,16 @@ interface IRegistrationContext {
*/
public function registerWellKnownHandler(string $class): void;
/**
* Register a custom SpeechToText provider class that can provide transcription
* of audio through the OCP\SpeechToText APIs
*
* @param string $providerClass
* @psalm-param class-string<ISpeechToTextProvider> $providerClass
* @since 27.0.0
*/
public function registerSpeechToTextProvider(string $providerClass): void;
/**
* Register a custom template provider class that is able to inject custom templates
* in addition to the user defined ones

View file

@ -0,0 +1,74 @@
<?php
declare(strict_types=1);
/**
* @copyright Copyright (c) 2023 Marcel Klehr <mklehr@gmx.net>
*
* @author Marcel Klehr <mklehr@gmx.net>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
namespace OCP\SpeechToText\Events;
use OCP\EventDispatcher\Event;
use OCP\Files\File;
/**
* @since 27.0.0
*/
abstract class AbstractTranscriptionEvent extends Event {
/**
* @since 27.0.0
*/
public function __construct(
private int $fileIdId,
private ?File $file,
private ?string $userId,
private string $appId,
) {
parent::__construct();
}
/**
* @since 27.0.0
*/
public function getFileId(): int {
return $this->fileIdId;
}
/**
* @since 27.0.0
*/
public function getFile(): ?File {
return $this->file;
}
/**
* @since 27.0.0
*/
public function getUserId(): ?string {
return $this->userId;
}
/**
* @since 27.0.0
*/
public function getAppId(): string {
return $this->appId;
}
}

View file

@ -0,0 +1,56 @@
<?php
declare(strict_types=1);
/**
* @copyright Copyright (c) 2023 Marcel Klehr <mklehr@gmx.net>
*
* @author Marcel Klehr <mklehr@gmx.net>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace OCP\SpeechToText\Events;
use OCP\Files\File;
/**
* This Event is emitted if a transcription of a media file using a Speech-To-Text provider failed
* @since 27.0.0
*/
class TranscriptionFailedEvent extends AbstractTranscriptionEvent {
/**
* @since 27.0.0
*/
public function __construct(
int $fileId,
?File $file,
private string $errorMessage,
?string $userId,
string $appId,
) {
parent::__construct($fileId, $file, $userId, $appId);
}
/**
* @since 27.0.0
* @return string The error message
*/
public function getErrorMessage(): string {
return $this->errorMessage;
}
}

View file

@ -0,0 +1,56 @@
<?php
declare(strict_types=1);
/**
* @copyright Copyright (c) 2023 Marcel Klehr <mklehr@gmx.net>
*
* @author Marcel Klehr <mklehr@gmx.net>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace OCP\SpeechToText\Events;
use OCP\Files\File;
/**
* This Event is emitted when a transcription of a media file happened successfully
* @since 27.0.0
*/
class TranscriptionSuccessfulEvent extends AbstractTranscriptionEvent {
/**
* @since 27.0.0
*/
public function __construct(
int $fileId,
?File $file,
private string $transcript,
?string $userId,
string $appId,
) {
parent::__construct($fileId, $file, $userId, $appId);
}
/**
* @since 27.0.0
* @return string The transcript of the media file
*/
public function getTranscript(): string {
return $this->transcript;
}
}

View file

@ -0,0 +1,67 @@
<?php
declare(strict_types=1);
/**
* @copyright Copyright (c) 2023 Marcel Klehr <mklehr@gmx.net>
*
* @author Marcel Klehr <mklehr@gmx.net>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace OCP\SpeechToText;
use InvalidArgumentException;
use OCP\Files\File;
use OCP\PreConditionNotMetException;
use RuntimeException;
/**
* @since 27.0.0
*/
interface ISpeechToTextManager {
/**
* @since 27.0.0
*/
public function hasProviders(): bool;
/**
* Will schedule a transcription process in the background. The result will become available
* with the \OCP\SpeechToText\Events\TranscriptionFinishedEvent
* You should add context information to the context array to re-identify the transcription result as
* belonging to your transcription request.
*
* @param File $file The media file to transcribe
* @param ?string $userId The user that triggered this request (only for convenience, will be available on the TranscriptEvents)
* @param string $appId The app that triggered this request (only for convenience, will be available on the TranscriptEvents)
* @throws PreConditionNotMetException If no provider was registered but this method was still called
* @throws InvalidArgumentException If the file could not be found or is not of a supported type
* @since 27.0.0
*/
public function scheduleFileTranscription(File $file, ?string $userId, string $appId): void;
/**
* @param File $file The media file to transcribe
* @returns string The transcription of the passed media file
* @throws PreConditionNotMetException If no provider was registered but this method was still called
* @throws InvalidArgumentException If the file could not be found or is not of a supported type
* @throws RuntimeException If the transcription failed for other reasons
* @since 27.0.0
*/
public function transcribeFile(File $file): string;
}

View file

@ -0,0 +1,46 @@
<?php
declare(strict_types=1);
/**
* @copyright Copyright (c) 2022 Marcel Klehr <mklehr@gmx.net>
*
* @author Marcel Klehr <mklehr@gmx.net>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace OCP\SpeechToText;
use OCP\Files\File;
use RuntimeException;
/**
* @since 27.0.0
*/
interface ISpeechToTextProvider {
/**
* @since 27.0.0
*/
public function getName(): string;
/**
* @since 27.0.0
* @throws RuntimeException If the text could not be transcribed
*/
public function transcribeFile(File $file): string;
}