feat(TaskProcessing): add agency audio-to-audio task type

Signed-off-by: Julien Veyssier <julien-nc@posteo.net>
2026-04-29 18:11:41 -04:00 · 2025-07-07 15:28:15 +02:00 · 2025-07-07 15:28:15 +02:00 · 2da3f450fa
commit 2da3f450fa
parent 58a37108da
5 changed files with 124 additions and 3 deletions
--- a/lib/composer/composer/autoload_classmap.php
+++ b/lib/composer/composer/autoload_classmap.php
@ -844,6 +844,7 @@ return array(
    'OCP\\TaskProcessing\\Task' => $baseDir . '/lib/public/TaskProcessing/Task.php',
    'OCP\\TaskProcessing\\TaskTypes\\AudioToAudioChat' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/AudioToAudioChat.php',
    'OCP\\TaskProcessing\\TaskTypes\\AudioToText' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/AudioToText.php',
+    'OCP\\TaskProcessing\\TaskTypes\\ContextAgentAudioInteraction' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/ContextAgentAudioInteraction.php',
    'OCP\\TaskProcessing\\TaskTypes\\ContextAgentInteraction' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/ContextAgentInteraction.php',
    'OCP\\TaskProcessing\\TaskTypes\\ContextWrite' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/ContextWrite.php',
    'OCP\\TaskProcessing\\TaskTypes\\GenerateEmoji' => $baseDir . '/lib/public/TaskProcessing/TaskTypes/GenerateEmoji.php',
--- a/lib/composer/composer/autoload_static.php
+++ b/lib/composer/composer/autoload_static.php
@ -885,6 +885,7 @@ class ComposerStaticInit749170dad3f5e7f9ca158f5a9f04f6a2
        'OCP\\TaskProcessing\\Task' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/Task.php',
        'OCP\\TaskProcessing\\TaskTypes\\AudioToAudioChat' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/AudioToAudioChat.php',
        'OCP\\TaskProcessing\\TaskTypes\\AudioToText' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/AudioToText.php',
+        'OCP\\TaskProcessing\\TaskTypes\\ContextAgentAudioInteraction' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/ContextAgentAudioInteraction.php',
        'OCP\\TaskProcessing\\TaskTypes\\ContextAgentInteraction' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/ContextAgentInteraction.php',
        'OCP\\TaskProcessing\\TaskTypes\\ContextWrite' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/ContextWrite.php',
        'OCP\\TaskProcessing\\TaskTypes\\GenerateEmoji' => __DIR__ . '/../../..' . '/lib/public/TaskProcessing/TaskTypes/GenerateEmoji.php',
--- a/lib/private/TaskProcessing/Manager.php
+++ b/lib/private/TaskProcessing/Manager.php
@ -590,6 +590,7 @@ class Manager implements IManager {
 			\OCP\TaskProcessing\TaskTypes\TextToTextProofread::ID => \OCP\Server::get(\OCP\TaskProcessing\TaskTypes\TextToTextProofread::class),
 			\OCP\TaskProcessing\TaskTypes\TextToSpeech::ID => \OCP\Server::get(\OCP\TaskProcessing\TaskTypes\TextToSpeech::class),
 			\OCP\TaskProcessing\TaskTypes\AudioToAudioChat::ID => \OCP\Server::get(\OCP\TaskProcessing\TaskTypes\AudioToAudioChat::class),
+			\OCP\TaskProcessing\TaskTypes\ContextAgentAudioInteraction::ID => \OCP\Server::get(\OCP\TaskProcessing\TaskTypes\ContextAgentAudioInteraction::class),
 		];

 		foreach ($context->getTaskProcessingTaskTypes() as $providerServiceRegistration) {
--- a/lib/public/TaskProcessing/TaskTypes/AudioToAudioChat.php
+++ b/lib/public/TaskProcessing/TaskTypes/AudioToAudioChat.php
@ -16,7 +16,7 @@ use OCP\TaskProcessing\ITaskType;
 use OCP\TaskProcessing\ShapeDescriptor;

 /**
- * This is the task processing task type for text chat
+ * This is the task processing task type for audio chat
 * @since 32.0.0
 */
 class AudioToAudioChat implements ITaskType {
@ -75,12 +75,12 @@ class AudioToAudioChat implements ITaskType {
 			),
 			'input' => new ShapeDescriptor(
 				$this->l->t('Chat voice message'),
-				$this->l->t('Describe a task that you want the assistant to do or ask a question'),
+				$this->l->t('Describe a task that you want the assistant to do or ask a question.'),
 				EShapeType::Audio
 			),
 			'history' => new ShapeDescriptor(
 				$this->l->t('Chat history'),
-				$this->l->t('The history of chat messages before the current message, starting with a message by the user'),
+				$this->l->t('The history of chat messages before the current message, starting with a message by the user.'),
 				EShapeType::ListOfTexts
 			)
 		];
--- a/lib/public/TaskProcessing/TaskTypes/ContextAgentAudioInteraction.php
+++ b/lib/public/TaskProcessing/TaskTypes/ContextAgentAudioInteraction.php
@ -0,0 +1,118 @@
+<?php
+
+declare(strict_types=1);
+
+/**
+ * SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+namespace OCP\TaskProcessing\TaskTypes;
+
+use OCP\IL10N;
+use OCP\L10N\IFactory;
+use OCP\TaskProcessing\EShapeType;
+use OCP\TaskProcessing\ITaskType;
+use OCP\TaskProcessing\ShapeDescriptor;
+
+/**
+ * This is the task processing task type for Context Agent interaction
+ * @since 32.0.0
+ */
+class ContextAgentAudioInteraction implements ITaskType {
+	public const ID = 'core:contextagent:audio-interaction';
+
+	private IL10N $l;
+
+	/**
+	 * @param IFactory $l10nFactory
+	 * @since 32.0.0
+	 */
+	public function __construct(
+		IFactory $l10nFactory,
+	) {
+		$this->l = $l10nFactory->get('lib');
+	}
+
+	/**
+	 * @inheritDoc
+	 * @since 32.0.0
+	 */
+	public function getName(): string {
+		return 'ContextAgent audio'; // We do not translate this
+	}
+
+	/**
+	 * @inheritDoc
+	 * @since 32.0.0
+	 */
+	public function getDescription(): string {
+		return $this->l->t('Chat by voice with an agent');
+	}
+
+	/**
+	 * @return string
+	 * @since 32.0.0
+	 */
+	public function getId(): string {
+		return self::ID;
+	}
+
+	/**
+	 * @return ShapeDescriptor[]
+	 * @since 32.0.0
+	 */
+	public function getInputShape(): array {
+		return [
+			'input' => new ShapeDescriptor(
+				$this->l->t('Chat voice message'),
+				$this->l->t('Describe a task that you want the agent to do or ask a question.'),
+				EShapeType::Audio
+			),
+			'confirmation' => new ShapeDescriptor(
+				$this->l->t('Confirmation'),
+				$this->l->t('Whether to confirm previously requested actions: 0 for denial and 1 for confirmation.'),
+				EShapeType::Number
+			),
+			'conversation_token' => new ShapeDescriptor(
+				$this->l->t('Conversation token'),
+				$this->l->t('A token representing the conversation.'),
+				EShapeType::Text
+			),
+		];
+	}
+
+	/**
+	 * @return ShapeDescriptor[]
+	 * @since 32.0.0
+	 */
+	public function getOutputShape(): array {
+		return [
+			'input_transcript' => new ShapeDescriptor(
+				$this->l->t('Input transcript'),
+				$this->l->t('Transcription of the audio input'),
+				EShapeType::Text,
+			),
+			'output' => new ShapeDescriptor(
+				$this->l->t('Response voice message'),
+				$this->l->t('The generated voice response as part of the conversation'),
+				EShapeType::Audio
+			),
+			'output_transcript' => new ShapeDescriptor(
+				$this->l->t('Output transcript'),
+				$this->l->t('Transcription of the audio output'),
+				EShapeType::Text,
+			),
+			'conversation_token' => new ShapeDescriptor(
+				$this->l->t('The new conversation token'),
+				$this->l->t('Send this along with the next interaction.'),
+				EShapeType::Text
+			),
+			'actions' => new ShapeDescriptor(
+				$this->l->t('Requested actions by the agent'),
+				$this->l->t('Actions that the agent would like to carry out in JSON format.'),
+				EShapeType::Text
+			),
+		];
+	}
+}