165 lines
5.4 KiB
Dart

import 'dart:convert';
import 'dart:io';
import 'dart:typed_data';
import 'package:flutter/foundation.dart';
import 'package:http/http.dart' as http;
import 'package:just_audio/just_audio.dart';
import 'package:path_provider/path_provider.dart';
import 'package:mymuseum_visitapp/Services/Glasses/engines/tts_engine.dart';
/// TTS via Gemini 2.5 Flash — voix naturelle, style configurable via prompt.
///
/// Voix disponibles : Algieba, Iapetus, Aoede, Charon, Fenrir, Kore,
/// Leda, Orus, Puck, Schedar, Sulafat, Umbriel...
///
/// Exemple :
/// GeminiTtsEngine(
/// apiKey: kGeminiApiKey,
/// voiceName: 'Algieba',
/// voicePrompt: 'Voix de guide de musée, ton chaleureux, rythme posé, '
/// 'comme un narrateur de documentaire culturel.',
/// )
class GeminiTtsEngine implements TtsEngine {
final String apiKey;
final String voiceName;
final String voicePrompt;
static const String _model = 'gemini-2.5-flash-preview-tts';
static const String _baseUrl = 'https://generativelanguage.googleapis.com/v1beta';
final AudioPlayer _player = AudioPlayer();
bool _speaking = false;
String? _lastTempPath;
GeminiTtsEngine({
required this.apiKey,
this.voiceName = 'Algieba',
this.voicePrompt = 'Voix de guide de musée, ton chaleureux et bienveillant, '
'rythme posé et clair, comme un narrateur de documentaire culturel.',
});
@override
bool get isSpeaking => _speaking;
@override
Future<void> speak(String text, {String languageCode = 'fr-FR'}) async {
if (text.isEmpty) return;
try {
_speaking = true;
final wavPath = await _synthesize(text, languageCode);
if (wavPath == null) return;
_lastTempPath = wavPath;
await _player.setFilePath(wavPath);
await _player.play();
} catch (e) {
debugPrint('[GeminiTtsEngine] speak error: $e');
} finally {
_speaking = false;
}
}
@override
Future<void> stop() async {
await _player.stop();
_speaking = false;
}
@override
Future<void> replay() async {
if (_lastTempPath == null) return;
await _player.seek(Duration.zero);
await _player.play();
}
Future<String?> _synthesize(String text, String languageCode) async {
final body = jsonEncode({
'contents': [
{
'parts': [{'text': text}]
}
],
'systemInstruction': {
'parts': [{'text': '$voicePrompt\nLangue : $languageCode.'}]
},
'generationConfig': {
'responseModalities': ['AUDIO'],
'speechConfig': {
'voiceConfig': {
'prebuiltVoiceConfig': {'voiceName': voiceName}
}
}
}
});
final response = await http.post(
Uri.parse('$_baseUrl/models/$_model:generateContent?key=$apiKey'),
headers: {'Content-Type': 'application/json'},
body: body,
).timeout(const Duration(seconds: 15));
if (response.statusCode != 200) {
debugPrint('[GeminiTtsEngine] HTTP ${response.statusCode}: ${response.body}');
return null;
}
final json = jsonDecode(response.body) as Map<String, dynamic>;
final candidates = json['candidates'] as List?;
if (candidates == null || candidates.isEmpty) return null;
final parts = candidates[0]['content']['parts'] as List?;
if (parts == null || parts.isEmpty) return null;
final inlineData = parts[0]['inlineData'] as Map<String, dynamic>?;
if (inlineData == null) return null;
final pcmBase64 = inlineData['data'] as String?;
if (pcmBase64 == null) return null;
final pcmBytes = base64Decode(pcmBase64);
return _pcmToWav(pcmBytes, sampleRate: 24000);
}
/// Convertit du PCM16 brut en fichier WAV lisible par just_audio.
Future<String> _pcmToWav(Uint8List pcm, {int sampleRate = 24000}) async {
const channels = 1;
const bitsPerSample = 16;
final byteRate = sampleRate * channels * bitsPerSample ~/ 8;
final blockAlign = channels * bitsPerSample ~/ 8;
final dataSize = pcm.length;
final chunkSize = 36 + dataSize;
final header = ByteData(44);
// RIFF chunk
header.setUint8(0, 0x52); header.setUint8(1, 0x49);
header.setUint8(2, 0x46); header.setUint8(3, 0x46); // "RIFF"
header.setUint32(4, chunkSize, Endian.little);
header.setUint8(8, 0x57); header.setUint8(9, 0x41);
header.setUint8(10, 0x56); header.setUint8(11, 0x45); // "WAVE"
// fmt chunk
header.setUint8(12, 0x66); header.setUint8(13, 0x6D);
header.setUint8(14, 0x74); header.setUint8(15, 0x20); // "fmt "
header.setUint32(16, 16, Endian.little); // subchunk size
header.setUint16(20, 1, Endian.little); // PCM format
header.setUint16(22, channels, Endian.little);
header.setUint32(24, sampleRate, Endian.little);
header.setUint32(28, byteRate, Endian.little);
header.setUint16(32, blockAlign, Endian.little);
header.setUint16(34, bitsPerSample, Endian.little);
// data chunk
header.setUint8(36, 0x64); header.setUint8(37, 0x61);
header.setUint8(38, 0x74); header.setUint8(39, 0x61); // "data"
header.setUint32(40, dataSize, Endian.little);
final wav = Uint8List(44 + dataSize)
..setAll(0, header.buffer.asUint8List())
..setAll(44, pcm);
final dir = await getTemporaryDirectory();
final path = '${dir.path}/gemini_tts_${DateTime.now().millisecondsSinceEpoch}.wav';
await File(path).writeAsBytes(wav, flush: true);
return path;
}
void dispose() => _player.dispose();
}