feat(TaskProcessing): add agency audio-to-audio task type

Signed-off-by: Julien Veyssier <julien-nc@posteo.net>
This commit is contained in:
Julien Veyssier 2025-07-07 15:28:15 +02:00
parent 58a37108da
commit 2da3f450fa
No known key found for this signature in database
GPG key ID: 4141FEE162030638
5 changed files with 124 additions and 3 deletions

View file

@ -844,6 +844,7 @@ return array(
'OCP\\TaskProcessing\\Task' => $baseDir . '/lib/public/TaskProcessing/Task.php',
'OCP\\TaskProcessing\\TaskTypes\\AudioToAudioChat' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/AudioToAudioChat.php',
'OCP\\TaskProcessing\\TaskTypes\\AudioToText' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/AudioToText.php',
'OCP\\TaskProcessing\\TaskTypes\\ContextAgentAudioInteraction' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/ContextAgentAudioInteraction.php',
'OCP\\TaskProcessing\\TaskTypes\\ContextAgentInteraction' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/ContextAgentInteraction.php',
'OCP\\TaskProcessing\\TaskTypes\\ContextWrite' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/ContextWrite.php',
'OCP\\TaskProcessing\\TaskTypes\\GenerateEmoji' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/GenerateEmoji.php',

View file

@ -885,6 +885,7 @@ class ComposerStaticInit749170dad3f5e7f9ca158f5a9f04f6a2
'OCP\\TaskProcessing\\Task' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/Task.php',
'OCP\\TaskProcessing\\TaskTypes\\AudioToAudioChat' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/AudioToAudioChat.php',
'OCP\\TaskProcessing\\TaskTypes\\AudioToText' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/AudioToText.php',
'OCP\\TaskProcessing\\TaskTypes\\ContextAgentAudioInteraction' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/ContextAgentAudioInteraction.php',
'OCP\\TaskProcessing\\TaskTypes\\ContextAgentInteraction' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/ContextAgentInteraction.php',
'OCP\\TaskProcessing\\TaskTypes\\ContextWrite' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/ContextWrite.php',
'OCP\\TaskProcessing\\TaskTypes\\GenerateEmoji' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/GenerateEmoji.php',

View file

@ -590,6 +590,7 @@ class Manager implements IManager {
\OCP\TaskProcessing\TaskTypes\TextToTextProofread::ID => \OCP\Server::get(\OCP\TaskProcessing\TaskTypes\TextToTextProofread::class),
\OCP\TaskProcessing\TaskTypes\TextToSpeech::ID => \OCP\Server::get(\OCP\TaskProcessing\TaskTypes\TextToSpeech::class),
\OCP\TaskProcessing\TaskTypes\AudioToAudioChat::ID => \OCP\Server::get(\OCP\TaskProcessing\TaskTypes\AudioToAudioChat::class),
\OCP\TaskProcessing\TaskTypes\ContextAgentAudioInteraction::ID => \OCP\Server::get(\OCP\TaskProcessing\TaskTypes\ContextAgentAudioInteraction::class),
];
foreach ($context->getTaskProcessingTaskTypes() as $providerServiceRegistration) {

View file

@ -16,7 +16,7 @@ use OCP\TaskProcessing\ITaskType;
use OCP\TaskProcessing\ShapeDescriptor;
/**
* This is the task processing task type for text chat
* This is the task processing task type for audio chat
* @since 32.0.0
*/
class AudioToAudioChat implements ITaskType {
@ -75,12 +75,12 @@ class AudioToAudioChat implements ITaskType {
),
'input' => new ShapeDescriptor(
$this->l->t('Chat voice message'),
$this->l->t('Describe a task that you want the assistant to do or ask a question'),
$this->l->t('Describe a task that you want the assistant to do or ask a question.'),
EShapeType::Audio
),
'history' => new ShapeDescriptor(
$this->l->t('Chat history'),
$this->l->t('The history of chat messages before the current message, starting with a message by the user'),
$this->l->t('The history of chat messages before the current message, starting with a message by the user.'),
EShapeType::ListOfTexts
)
];

View file

@ -0,0 +1,118 @@
<?php
declare(strict_types=1);
/**
* SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
namespace OCP\TaskProcessing\TaskTypes;
use OCP\IL10N;
use OCP\L10N\IFactory;
use OCP\TaskProcessing\EShapeType;
use OCP\TaskProcessing\ITaskType;
use OCP\TaskProcessing\ShapeDescriptor;
/**
* This is the task processing task type for Context Agent interaction
* @since 32.0.0
*/
class ContextAgentAudioInteraction implements ITaskType {
public const ID = 'core:contextagent:audio-interaction';
private IL10N $l;
/**
* @param IFactory $l10nFactory
* @since 32.0.0
*/
public function __construct(
IFactory $l10nFactory,
) {
$this->l = $l10nFactory->get('lib');
}
/**
* @inheritDoc
* @since 32.0.0
*/
public function getName(): string {
return 'ContextAgent audio'; // We do not translate this
}
/**
* @inheritDoc
* @since 32.0.0
*/
public function getDescription(): string {
return $this->l->t('Chat by voice with an agent');
}
/**
* @return string
* @since 32.0.0
*/
public function getId(): string {
return self::ID;
}
/**
* @return ShapeDescriptor[]
* @since 32.0.0
*/
public function getInputShape(): array {
return [
'input' => new ShapeDescriptor(
$this->l->t('Chat voice message'),
$this->l->t('Describe a task that you want the agent to do or ask a question.'),
EShapeType::Audio
),
'confirmation' => new ShapeDescriptor(
$this->l->t('Confirmation'),
$this->l->t('Whether to confirm previously requested actions: 0 for denial and 1 for confirmation.'),
EShapeType::Number
),
'conversation_token' => new ShapeDescriptor(
$this->l->t('Conversation token'),
$this->l->t('A token representing the conversation.'),
EShapeType::Text
),
];
}
/**
* @return ShapeDescriptor[]
* @since 32.0.0
*/
public function getOutputShape(): array {
return [
'input_transcript' => new ShapeDescriptor(
$this->l->t('Input transcript'),
$this->l->t('Transcription of the audio input'),
EShapeType::Text,
),
'output' => new ShapeDescriptor(
$this->l->t('Response voice message'),
$this->l->t('The generated voice response as part of the conversation'),
EShapeType::Audio
),
'output_transcript' => new ShapeDescriptor(
$this->l->t('Output transcript'),
$this->l->t('Transcription of the audio output'),
EShapeType::Text,
),
'conversation_token' => new ShapeDescriptor(
$this->l->t('The new conversation token'),
$this->l->t('Send this along with the next interaction.'),
EShapeType::Text
),
'actions' => new ShapeDescriptor(
$this->l->t('Requested actions by the agent'),
$this->l->t('Actions that the agent would like to carry out in JSON format.'),
EShapeType::Text
),
];
}
}