mymuseum-visitapp/lib/Services/Glasses/glasses_orchestrator.dart

503 lines
19 KiB
Dart

import 'dart:async';
import 'dart:io';
import 'package:flutter/foundation.dart';
import 'package:mymuseum_visitapp/Helpers/translationHelper.dart';
import 'package:just_audio/just_audio.dart';
import 'package:mobile_scanner/mobile_scanner.dart';
import 'package:mymuseum_visitapp/Models/visitContext.dart';
import 'package:mymuseum_visitapp/Services/Glasses/engines/llm_client.dart';
import 'package:mymuseum_visitapp/Services/Glasses/engines/stt_engine.dart';
import 'package:mymuseum_visitapp/Services/Glasses/engines/tts_engine.dart';
import 'package:mymuseum_visitapp/Services/Glasses/engines/wake_word_engine.dart';
import 'package:mymuseum_visitapp/Services/meta_glasses_service.dart';
/// Instance active de l'orchestrateur, accessible globalement.
/// Initialisée dans main.dart après la connexion lunettes.
GlassesOrchestrator? activeOrchestrator;
/// Orchestre le pipeline complet mains-libres :
/// WakeWord → STT → dispatch → LLM ou QR scan → TTS
///
/// Toutes les dépendances sont injectées via les interfaces abstraites
/// pour pouvoir swapper chaque maillon indépendamment.
class GlassesOrchestrator {
final VisitAppContext visitAppContext;
final WakeWordEngine wakeWordEngine;
final SttEngine sttEngine;
final TtsEngine ttsEngine;
final LlmClient llmClient;
bool _running = false;
bool _inConversation = false;
/// Dernière transcription capturée — observable depuis l'UI.
final ValueNotifier<String> lastTranscription = ValueNotifier('');
final ValueNotifier<bool> isListeningForCommand = ValueNotifier(false);
/// Dernier texte envoyé au TTS — pour détecter les astérisques et autres artefacts.
final ValueNotifier<String> lastTtsText = ValueNotifier('');
// Photos de visite (V2 — stockées pour un résumé en fin de visite)
final List<String> visitPhotos = [];
// Sons de feedback — fichiers courts dans assets/sounds/
// wake_detected.mp3 : ~0.3s — "je t'écoute"
// thinking.mp3 : ~0.5s — "je réfléchis"
// done.mp3 : ~0.3s — "réponse prête" (optionnel)
static const String _wakeSound = 'assets/sounds/wake_detected.mp3';
static const String _thinkingSound = 'assets/sounds/thinking.mp3';
static const String _doneSound = 'assets/sounds/done.mp3';
final AudioPlayer _soundPlayer = AudioPlayer(); // sons one-shot
final AudioPlayer _thinkingPlayer = AudioPlayer(); // thinking loop
// Anti-spam QR
final Map<String, int> _lastQrTime = {};
static const int _qrCooldownMs = 10000;
static final RegExp _urlPattern1 =
RegExp(r'https://web\.mymuseum\.be/([^/]+)/([^/]+)/([^/\s]+)');
static final RegExp _urlPattern2 =
RegExp(r'https://web\.myinfomate\.be/([^/]+)/([^/]+)/([^/\s]+)');
GlassesOrchestrator({
required this.visitAppContext,
required this.wakeWordEngine,
required this.sttEngine,
required this.ttsEngine,
required this.llmClient,
});
Future<void> start() async {
if (_running) return;
_running = true;
await wakeWordEngine.start(
onDetected: _onWakeWord,
onDetectedWithCommand: _onWakeWordWithCommand,
);
debugPrint('[GlassesOrchestrator] Started');
}
Future<void> stop() async {
await wakeWordEngine.stop();
await sttEngine.cancel();
await ttsEngine.stop();
_running = false;
debugPrint('[GlassesOrchestrator] Stopped');
}
bool get isRunning => _running;
bool get isInConversation => _inConversation;
bool get isListening => _running && !_inConversation;
/// Relance l'écoute wake word en utilisant les callbacks internes
/// (avec son de détection). À appeler depuis le lifecycle observer.
Future<void> restartWakeWord() async {
if (!_running || _inConversation) return;
await wakeWordEngine.start(
onDetected: _onWakeWord,
onDetectedWithCommand: _onWakeWordWithCommand,
);
}
/// Déclenchement manuel (ex: bouton debug, test)
Future<void> triggerConversation() => _handleConversation();
/// Dispatch direct d'une commande (utilisé par le lifecycle observer)
Future<void> dispatchCommand(String command) => _dispatch(command);
/// Déclenchement scan QR depuis un chemin d'image existant.
Future<void> triggerQrScan(String imagePath) async {
final qr = await _tryDecodeQr(imagePath);
if (qr != null) await explainSection(qr.sectionId, configurationId: qr.configId);
}
// ── Wake word ──────────────────────────────────────────────────────────────
/// Déclenché quand le wake word est détecté sans commande inline.
/// Lance un cycle STT séparé pour capturer la commande.
void _onWakeWord() async {
if (_inConversation) return;
_inConversation = true;
await wakeWordEngine.stop(); // envoie ACTION_PAUSE immédiatement — AudioRecord s'arrête pendant le son
await _stopThinkingLoop();
await _playWakeSound(); // ~300ms — largement suffisant pour que l'AudioRecord soit libéré
try {
await _handleConversation();
} finally {
_inConversation = false;
if (_running) await wakeWordEngine.start(
onDetected: _onWakeWord,
onDetectedWithCommand: _onWakeWordWithCommand,
);
}
}
/// Déclenché quand le wake word ET la commande sont dans le même énoncé.
/// "visite qu'est-ce que c'est" → commande = "qu'est-ce que c'est"
/// Si commande vide → fallback sur cycle STT séparé.
void _onWakeWordWithCommand(String inlineCommand) async {
if (_inConversation) return;
_inConversation = true;
await wakeWordEngine.stop();
await _stopThinkingLoop();
await _playWakeSound();
try {
if (inlineCommand.isNotEmpty) {
debugPrint('[GlassesOrchestrator] Inline command: "$inlineCommand"');
await _dispatch(inlineCommand);
} else {
await _handleConversation();
}
} finally {
_inConversation = false;
if (_running) await wakeWordEngine.start(
onDetected: _onWakeWord,
onDetectedWithCommand: _onWakeWordWithCommand,
);
}
}
Future<void> _playSound(String asset) async {
try {
await _soundPlayer.setAsset(asset);
await _soundPlayer.play();
// play() se complète quand la lecture se termine
} catch (_) {
debugPrint('[GlassesOrchestrator] Sound not found: $asset');
}
}
Future<void> _playWakeSound() => _playSound(_wakeSound);
Future<void> _playDoneSound() => _playSound(_doneSound);
Future<void> _startThinkingLoop() async {
try {
await _thinkingPlayer.setAsset(_thinkingSound);
await _thinkingPlayer.setLoopMode(LoopMode.one);
_thinkingPlayer.play(); // pas de await — tourne en arrière-plan
} catch (_) {
debugPrint('[GlassesOrchestrator] Thinking sound not found');
}
}
Future<void> _stopThinkingLoop() async {
await _thinkingPlayer.stop();
}
// ── Conversation vocale ────────────────────────────────────────────────────
Future<void> _handleConversation() async {
final lang = visitAppContext.language ?? 'FR';
final langCode = _toLangCode(lang);
isListeningForCommand.value = true;
final command = await sttEngine.transcribeOnce(languageCode: langCode);
isListeningForCommand.value = false;
debugPrint('[GlassesOrchestrator] Command: "$command"');
if (command.isEmpty) return;
lastTranscription.value = command;
await _dispatch(command);
}
Future<void> _dispatch(String command, {bool continueConversation = true}) async {
final lang = visitAppContext.language ?? 'FR';
final langCode = _toLangCode(lang);
if (_isStopCommand(command)) {
debugPrint('[GlassesOrchestrator] Annulé par l\'utilisateur: "$command"');
await _stopThinkingLoop();
// Pas de son pour l'annulation — évite une bascule audio focus supplémentaire
// qui déclenche le muting persistant MIUI
return;
}
if (_isQrScanCommand(command)) {
await _handleQrScan();
return;
}
if (_isPhotoCommand(command)) {
await _handlePhotoCapture();
return;
}
if (_isRepeatCommand(command)) {
await ttsEngine.replay();
} else {
// Question libre → thinking loop → LLM → done → TTS
try {
await _startThinkingLoop();
final reply = await llmClient.chat(
command,
configurationId: visitAppContext.configuration?.id,
languageCode: lang,
);
await _stopThinkingLoop();
if (reply.isNotEmpty) {
lastTtsText.value = reply;
await _playDoneSound();
await ttsEngine.speak(reply, languageCode: langCode);
}
} catch (e) {
await _stopThinkingLoop();
debugPrint('[GlassesOrchestrator] LLM error: $e');
return; // pas de follow-up si erreur
}
}
// Mode conversation : écoute directement la réponse sans redemander le wake word
if (continueConversation) {
await _listenForFollowUp();
}
}
/// Écoute une question de suivi après la réponse TTS.
/// Timeout = 5s de silence → fin de conversation, retour au wake word.
Future<void> _listenForFollowUp() async {
final lang = visitAppContext.language ?? 'FR';
final langCode = _toLangCode(lang);
isListeningForCommand.value = true;
final followUp = await sttEngine.transcribeOnce(
languageCode: langCode,
timeout: const Duration(seconds: 5),
);
isListeningForCommand.value = false;
if (followUp.isNotEmpty) lastTranscription.value = followUp;
if (followUp.isEmpty || _isStopCommand(followUp)) {
debugPrint('[GlassesOrchestrator] Conversation ended (silence or stop)');
return;
}
debugPrint('[GlassesOrchestrator] Follow-up: "$followUp"');
await _dispatch(followUp, continueConversation: true);
}
// ── QR scan depuis frames du stream ──────────────────────────────────────
/// Démarre le stream, prend jusqu'à 5 frames espacées de 400ms,
/// tente de décoder un QR sur chacune. Stoppe le stream après.
Future<void> _handleQrScan() async {
final lang = _toLangCode(visitAppContext.language ?? 'FR');
debugPrint('[QrScan] Starting — requesting photo capture...');
final completer = Completer<String?>();
final prevCallback = MetaGlassesService.instance.onPhotoCaptured;
MetaGlassesService.instance.onPhotoCaptured = (path) {
debugPrint('[QrScan] Photo callback received — path="${path.isEmpty ? "EMPTY/ERROR" : path}"');
if (!completer.isCompleted) completer.complete(path.isEmpty ? null : path);
prevCallback?.call(path);
};
Timer(const Duration(seconds: 10), () {
if (!completer.isCompleted) {
debugPrint('[QrScan] Timeout — no photo received after 10s');
completer.complete(null);
}
});
await MetaGlassesService.instance.requestPhotoCapture();
debugPrint('[QrScan] requestPhotoCapture() returned — waiting for callback...');
final photoPath = await completer.future;
MetaGlassesService.instance.onPhotoCaptured = prevCallback;
if (photoPath == null) {
debugPrint('[QrScan] No photo — camera unavailable or error');
lastTtsText.value = TranslationHelper.getFromLocale('voice.cameraUnavailable', visitAppContext);
await ttsEngine.speak(lastTtsText.value, languageCode: lang);
return;
}
debugPrint('[QrScan] Photo received at $photoPath — decoding QR...');
final qr = await _tryDecodeQr(photoPath);
try { File(photoPath).deleteSync(); } catch (_) {}
if (qr != null) {
debugPrint('[QrScan] QR found — sectionId=${qr.sectionId} configId=${qr.configId}');
await explainSection(qr.sectionId, configurationId: qr.configId);
} else {
debugPrint('[QrScan] No QR code found in photo');
lastTtsText.value = TranslationHelper.getFromLocale('voice.noQrFound', visitAppContext);
await ttsEngine.speak(lastTtsText.value, languageCode: lang);
}
}
// ── Photo capture : mémoire visite ────────────────────────────────────────
/// Capture une photo, essaie de décoder un QR code.
/// Si QR valide → explique la section.
/// Si pas de QR → sauvegarde la photo pour la visite (V2).
Future<void> _handlePhotoCapture() async {
final completer = Completer<String?>();
// Écoute la photo quand elle arrive
final prevCallback = MetaGlassesService.instance.onPhotoCaptured;
MetaGlassesService.instance.onPhotoCaptured = (path) {
if (!completer.isCompleted) completer.complete(path);
prevCallback?.call(path);
};
// Timeout si pas de photo en 10s
Timer(const Duration(seconds: 10), () {
if (!completer.isCompleted) completer.complete(null);
});
await MetaGlassesService.instance.requestPhotoCapture();
final photoPath = await completer.future;
// Restaure le callback précédent
MetaGlassesService.instance.onPhotoCaptured = prevCallback;
if (photoPath == null || photoPath.isEmpty) {
debugPrint('[GlassesOrchestrator] Photo capture failed or timeout');
final lang = _toLangCode(visitAppContext.language ?? 'FR');
lastTtsText.value = TranslationHelper.getFromLocale('voice.photoFailed', visitAppContext);
await ttsEngine.speak(lastTtsText.value, languageCode: lang);
return;
}
// Essaie de décoder un QR code
final qr = await _tryDecodeQr(photoPath);
if (qr != null) {
await explainSection(qr.sectionId, configurationId: qr.configId);
} else {
// Pas de QR — sauvegarde pour la visite
visitPhotos.add(photoPath);
debugPrint('[GlassesOrchestrator] Photo saved for visit (no QR): $photoPath');
// V2 : résumé en fin de visite, identification d'œuvre, etc.
// Pour l'instant : feedback vocal simple
final lang = _toLangCode(visitAppContext.language ?? 'FR');
lastTtsText.value = TranslationHelper.getFromLocale('voice.photoCaptured', visitAppContext);
await ttsEngine.speak(lastTtsText.value, languageCode: lang);
}
}
/// Essaie de décoder un QR code depuis l'image.
/// Retourne le sectionId si trouvé, null sinon.
Future<({String sectionId, String? configId})?> _tryDecodeQr(String imagePath) async {
debugPrint('[QrScan] analyzeImage: $imagePath');
final controller = MobileScannerController();
({String sectionId, String? configId})? result;
try {
final completer = Completer<({String sectionId, String? configId})?>();
final sub = controller.barcodes.listen((capture) {
debugPrint('[QrScan] barcodes detected: ${capture.barcodes.length}');
for (final barcode in capture.barcodes) {
final raw = barcode.rawValue;
debugPrint('[QrScan] raw value: "$raw"');
if (raw != null) {
final ids = _extractQrIds(raw);
debugPrint('[QrScan] extracted: sectionId=${ids?.sectionId} configId=${ids?.configId}');
if (ids != null && !completer.isCompleted) completer.complete(ids);
}
}
});
await controller.analyzeImage(imagePath);
Timer(const Duration(seconds: 2), () {
if (!completer.isCompleted) {
debugPrint('[QrScan] analyzeImage timeout — no barcode found');
completer.complete(null);
}
});
result = await completer.future;
await sub.cancel();
} catch (e) {
debugPrint('[QrScan] decode error: $e');
} finally {
controller.dispose();
}
debugPrint('[QrScan] result: ${result != null ? "found sectionId=${result.sectionId}" : "null"}');
return result;
}
/// Retourne (sectionId, configId) extraits du QR.
/// configId peut être null si le QR est un ID brut sans URL.
({String sectionId, String? configId})? _extractQrIds(String raw) {
final m1 = _urlPattern1.firstMatch(raw);
if (m1 != null) return (sectionId: m1.group(3)!, configId: m1.group(2));
final m2 = _urlPattern2.firstMatch(raw);
if (m2 != null) return (sectionId: m2.group(3)!, configId: m2.group(2));
// ID brut — valider contre la config si disponible
if (visitAppContext.sectionIds != null) {
return visitAppContext.sectionIds!.contains(raw)
? (sectionId: raw, configId: visitAppContext.configuration?.id)
: null;
}
// Pas de config chargée — accepter l'ID brut sans configId
return (sectionId: raw, configId: null);
}
Future<void> explainSection(String sectionId, {String? configurationId}) async {
final now = DateTime.now().millisecondsSinceEpoch;
if ((now - (_lastQrTime[sectionId] ?? 0)) < _qrCooldownMs) return;
_lastQrTime[sectionId] = now;
// Priorité : configId passé explicitement (extrait du QR URL) > config active > null (mode instance)
final cfgId = configurationId ?? visitAppContext.configuration?.id;
final lang = visitAppContext.language ?? 'FR';
try {
final reply = await llmClient.chat(
'Le visiteur vient de scanner le QR code de la section "$sectionId". '
'Appelle GetSectionDetail avec cet ID, puis présente le contenu de façon engageante en 2-3 phrases. '
'Utilise les informations réelles du champ Contenu — cite des détails concrets, pas de généralités.',
configurationId: cfgId,
languageCode: lang,
);
if (reply.isNotEmpty) {
lastTtsText.value = reply;
await ttsEngine.speak(reply, languageCode: _toLangCode(lang));
}
} catch (e) {
debugPrint('[GlassesOrchestrator] explainSection error: $e');
}
}
// ── Helpers ────────────────────────────────────────────────────────────────
bool _isStopCommand(String text) {
final t = text.toLowerCase().trim();
return t == 'non' ||
t == 'rien' ||
t == 'non rien' ||
t == 'rien merci' ||
t == 'non merci' ||
t == 'laisse tomber' ||
t == 'annule' ||
t == 'annuler' ||
t.contains('stop') ||
t.contains('arrête') ||
t.contains('au revoir') ||
t.contains('c\'est bon') ||
t.contains('ok merci') ||
t.contains('laisse tomber') ||
(t.length < 10 && (t.contains('non') || t.contains('rien')));
}
bool _isQrScanCommand(String text) {
final t = text.toLowerCase();
return t.contains('scan') || t.contains('qr') || t.contains('code') ||
t.contains('regarde');
}
bool _isPhotoCommand(String text) {
final t = text.toLowerCase();
return t.contains('photo') || t.contains('prends') || t.contains('capture');
}
bool _isRepeatCommand(String text) {
final t = text.toLowerCase();
return t.contains('répète') || t.contains('repete') || t.contains('encore');
}
String _toLangCode(String lang) {
switch (lang.toUpperCase()) {
case 'FR': return 'fr-FR';
case 'NL': return 'nl-NL';
case 'EN': return 'en-US';
case 'DE': return 'de-DE';
default: return 'fr-FR';
}
}
}