Add Micro auto

2026-04-05 10:44:46 +02:00
parent 56086ec557
commit 73a78fd7a5
2 changed files with 251 additions and 155 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,51 +0,0 @@
 services:
  postgres:
    image: postgres:16-alpine
    environment:
      POSTGRES_DB: tutor
      POSTGRES_USER: tutor
      POSTGRES_PASSWORD: tutor
    ports:
      - "5432:5432"
    volumes:
      - postgres_data:/var/lib/postgresql/data
  redis:
    image: redis:7-alpine
    ports:
      - "6379:6379"
  backend:
    build: ./backend
    environment:
      OPENAI_API_KEY: ${OPENAI_API_KEY}
      DATABASE_URL: postgresql+psycopg2://tutor:tutor@postgres:5432/tutor
      REDIS_URL: redis://redis:6379/0
      APP_ENV: development
      FRONTEND_ORIGIN: http://localhost:3000
    ports:
      - "8000:8000"
    depends_on:
      - postgres
      - redis
    volumes:
      - ./backend:/app
  frontend:
    build: ./frontend
    environment:
      VITE_ALLOWED_HOST: prof.open-squared.tech
      VITE_DEV_API_PROXY_TARGET: http://backend:8000
      VITE_HMR_HOST: prof.open-squared.tech
      VITE_HMR_PROTOCOL: wss
      VITE_HMR_CLIENT_PORT: 443
    ports:
      - "3000:3000"
    depends_on:
      - backend
    volumes:
      - ./frontend:/app
      - /app/node_modules
 volumes:
  postgres_data:
--- a/frontend/src/App.jsx
+++ b/frontend/src/App.jsx
@@ -1,6 +1,8 @@
 import React, { useEffect, useMemo, useRef, useState } from 'react'
 const API_BASE = '/api'
 const AUTO_STOP_SILENCE_MS = 2500
 const SPEECH_START_THRESHOLD = 0.05
 async function parseApiResponse(res) {
  const contentType = res.headers.get('content-type') || ''
@@ -84,14 +86,28 @@ export default function App() {
  const [assessmentAnswer, setAssessmentAnswer] = useState('')
  const [speaking, setSpeaking] = useState(false)
  const [isRecording, setIsRecording] = useState(false)
  const [isAutoListening, setIsAutoListening] = useState(false)
  const [isTranscribing, setIsTranscribing] = useState(false)
  const [voiceStatus, setVoiceStatus] = useState('')
  const [errorMessage, setErrorMessage] = useState('')
-  const recognitionRef = useRef(null)
+  const [voices, setVoices] = useState([])
  const [selectedVoiceURI, setSelectedVoiceURI] = useState('')
  const mediaRecorderRef = useRef(null)
  const mediaStreamRef = useRef(null)
  const recordedChunksRef = useRef([])
  const recordingMimeTypeRef = useRef('')
  const audioContextRef = useRef(null)
  const analyserRef = useRef(null)
  const sourceNodeRef = useRef(null)
  const animationFrameRef = useRef(null)
  const silenceStartedAtRef = useRef(null)
  const isRecordingRef = useRef(false)
  const isAutoListeningRef = useRef(false)
  const isTranscribingRef = useRef(false)
  const availableVoices = useMemo(() => {
    return [...voices].sort((a, b) => scoreVoice(b) - scoreVoice(a))
  }, [voices])
  const selectedStudent = useMemo(
    () => students.find((student) => String(student.id) === String(selectedStudentId)),
@@ -102,6 +118,18 @@ export default function App() {
    loadStudents()
  }, [])
  useEffect(() => {
    isRecordingRef.current = isRecording
  }, [isRecording])
  useEffect(() => {
    isAutoListeningRef.current = isAutoListening
  }, [isAutoListening])
  useEffect(() => {
    isTranscribingRef.current = isTranscribing
  }, [isTranscribing])
  useEffect(() => {
    if (selectedStudentId) {
      loadProgress(selectedStudentId)
@@ -109,11 +137,24 @@ export default function App() {
  }, [selectedStudentId])
  useEffect(() => {
    if (!('speechSynthesis' in window)) return undefined
    const loadVoices = () => {
      const nextVoices = window.speechSynthesis.getVoices()
      setVoices(nextVoices)
      setSelectedVoiceURI((currentVoiceURI) => {
        if (currentVoiceURI) return currentVoiceURI
        const preferredVoice = [...nextVoices].sort((a, b) => scoreVoice(b) - scoreVoice(a))[0]
        return preferredVoice?.voiceURI || ''
      })
    }
    loadVoices()
    window.speechSynthesis.onvoiceschanged = loadVoices
    return () => {
-      if (recognitionRef.current) {
+      window.speechSynthesis.onvoiceschanged = null
-        recognitionRef.current.abort()
+      deactivateAutoListening(false)
      }
      stopMediaStream()
    }
  }, [])
@@ -168,26 +209,6 @@ export default function App() {
    setMessages((prev) => [...prev, { role, content, id: crypto.randomUUID() }])
  }
  async function sendMessage(e) {
    e.preventDefault()
    if (!selectedStudentId || !input.trim()) return
    const text = input.trim()
    appendMessage('user', text)
    setInput('')
    try {
      setErrorMessage('')
      const data = await apiFetch('/chat', {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ student_id: Number(selectedStudentId), message: text }),
      })
      appendMessage('assistant', data.reply)
      speak(data.reply)
    } catch (error) {
      setErrorMessage(error.message || 'Impossible d’envoyer le message.')
    }
  }
  async function loadProgress(studentId) {
    try {
      setErrorMessage('')
@@ -238,7 +259,11 @@ export default function App() {
    if (!('speechSynthesis' in window)) return
    window.speechSynthesis.cancel()
    const utterance = new SpeechSynthesisUtterance(text)
-    utterance.lang = 'fr-FR'
+    const selectedVoice = voices.find((voice) => voice.voiceURI === selectedVoiceURI)
    utterance.lang = selectedVoice?.lang || 'fr-FR'
    if (selectedVoice) {
      utterance.voice = selectedVoice
    }
    utterance.onstart = () => setSpeaking(true)
    utterance.onend = () => setSpeaking(false)
    utterance.onerror = () => setSpeaking(false)
@@ -246,6 +271,20 @@ export default function App() {
  }
  function stopMediaStream() {
    if (animationFrameRef.current) {
      cancelAnimationFrame(animationFrameRef.current)
      animationFrameRef.current = null
    }
    if (sourceNodeRef.current) {
      sourceNodeRef.current.disconnect()
      sourceNodeRef.current = null
    }
    if (audioContextRef.current) {
      audioContextRef.current.close().catch(() => {})
      audioContextRef.current = null
    }
    analyserRef.current = null
    silenceStartedAtRef.current = null
    if (mediaStreamRef.current) {
      mediaStreamRef.current.getTracks().forEach((track) => track.stop())
      mediaStreamRef.current = null
@@ -255,7 +294,23 @@ export default function App() {
    recordingMimeTypeRef.current = ''
  }
-  async function transcribeRecording(audioBlob, mimeType) {
+  async function submitUserMessage(text) {
    if (!selectedStudentId) {
      throw new Error('Choisis un élève avant d’envoyer un message.')
    }
    appendMessage('user', text)
    setInput('')
    setErrorMessage('')
    const data = await apiFetch('/chat', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({ student_id: Number(selectedStudentId), message: text }),
    })
    appendMessage('assistant', data.reply)
    speak(data.reply)
  }
  async function transcribeRecording(audioBlob, mimeType, { autoSend = false } = {}) {
    const extension = mimeType.includes('mp4') ? 'mp4' : mimeType.includes('ogg') ? 'ogg' : 'webm'
    const formData = new FormData()
    formData.append('file', new File([audioBlob], `voice-input.${extension}`, { type: mimeType || 'audio/webm' }))
@@ -268,8 +323,21 @@ export default function App() {
        method: 'POST',
        body: formData,
      })
-      setInput(data.text || '')
+      const transcript = (data.text || '').trim()
-      setVoiceStatus(data.text ? 'Texte dicté prêt à être envoyé.' : 'Aucun texte reconnu.')
+      setInput(transcript)
      if (!transcript) {
        setVoiceStatus('Aucun texte reconnu.')
        return
      }
      if (autoSend && selectedStudentId) {
        setVoiceStatus('Texte reconnu, envoi automatique...')
        await submitUserMessage(transcript)
        setVoiceStatus('Message vocal envoyé automatiquement.')
      } else {
        setVoiceStatus('Texte dicté prêt à être envoyé.')
      }
    } catch (error) {
      setVoiceStatus(error.message || 'Impossible de transcrire cet enregistrement.')
    } finally {
@@ -277,103 +345,157 @@ export default function App() {
    }
  }
-  async function startRecordedVoiceInput() {
+  function calculateVolume(analyser) {
-    if (!navigator.mediaDevices?.getUserMedia || typeof MediaRecorder === 'undefined') {
+    const data = new Uint8Array(analyser.fftSize)
-      setVoiceStatus('La dictée vocale n’est pas prise en charge par ce navigateur.')
+    analyser.getByteTimeDomainData(data)
-      return
+
    let sumSquares = 0
    for (const value of data) {
      const normalized = (value - 128) / 128
      sumSquares += normalized * normalized
    }
    return Math.sqrt(sumSquares / data.length)
  }
  async function startSegmentRecording() {
    if (!mediaStreamRef.current || isRecordingRef.current || isTranscribingRef.current) return
    const mimeType = getSupportedRecordingMimeType()
-    if (!mimeType) {
+    const recorder = mimeType
-      setVoiceStatus('Aucun format audio compatible n’est disponible dans ce navigateur.')
+      ? new MediaRecorder(mediaStreamRef.current, { mimeType })
      : new MediaRecorder(mediaStreamRef.current)
    mediaRecorderRef.current = recorder
    recordedChunksRef.current = []
    recordingMimeTypeRef.current = mimeType || recorder.mimeType || 'audio/webm'
    silenceStartedAtRef.current = null
    recorder.ondataavailable = (event) => {
      if (event.data && event.data.size > 0) {
        recordedChunksRef.current.push(event.data)
      }
    }
    recorder.onstop = async () => {
      const finalMimeType = recordingMimeTypeRef.current || recorder.mimeType || 'audio/webm'
      const audioBlob = new Blob(recordedChunksRef.current, { type: finalMimeType })
      recordedChunksRef.current = []
      mediaRecorderRef.current = null
      setIsRecording(false)
      if (audioBlob.size > 0) {
        await transcribeRecording(audioBlob, finalMimeType, { autoSend: true })
      } else if (isAutoListeningRef.current) {
        setVoiceStatus('Micro actif. Parle quand tu veux.')
      }
    }
    recorder.onerror = () => {
      setVoiceStatus('Le navigateur a rencontré une erreur pendant l’enregistrement.')
      setIsRecording(false)
    }
    recorder.start()
    setIsRecording(true)
    setVoiceStatus('Je t’écoute...')
  }
  function stopSegmentRecording(statusMessage = 'Silence détecté, transcription...') {
    const recorder = mediaRecorderRef.current
    if (recorder && recorder.state !== 'inactive') {
      silenceStartedAtRef.current = null
      recorder.stop()
      setVoiceStatus(statusMessage)
    }
  }
  function monitorMicrophone() {
    if (!analyserRef.current || !isAutoListeningRef.current) return
    const volume = calculateVolume(analyserRef.current)
    const now = Date.now()
    if (volume >= SPEECH_START_THRESHOLD) {
      silenceStartedAtRef.current = null
      if (!isRecordingRef.current && !isTranscribingRef.current) {
        startSegmentRecording()
      }
    } else if (isRecordingRef.current) {
      if (!silenceStartedAtRef.current) {
        silenceStartedAtRef.current = now
      } else if (now - silenceStartedAtRef.current >= AUTO_STOP_SILENCE_MS) {
        stopSegmentRecording()
      }
    }
    animationFrameRef.current = requestAnimationFrame(monitorMicrophone)
  }
  async function activateAutoListening() {
    if (!navigator.mediaDevices?.getUserMedia || typeof MediaRecorder === 'undefined') {
      setVoiceStatus('Le micro automatique n’est pas pris en charge par ce navigateur.')
      return
    }
    try {
-      const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
+      const stream = await navigator.mediaDevices.getUserMedia({
-      const recorder = new MediaRecorder(stream, { mimeType })
+        audio: {
          echoCancellation: true,
          noiseSuppression: true,
          autoGainControl: true,
        },
      })
      const audioContext = new window.AudioContext()
      const analyser = audioContext.createAnalyser()
      analyser.fftSize = 2048
      analyser.smoothingTimeConstant = 0.85
      const sourceNode = audioContext.createMediaStreamSource(stream)
      sourceNode.connect(analyser)
      audioContextRef.current = audioContext
      analyserRef.current = analyser
      sourceNodeRef.current = sourceNode
      mediaStreamRef.current = stream
-      mediaRecorderRef.current = recorder
+      setIsAutoListening(true)
-      recordedChunksRef.current = []
+      setVoiceStatus('Micro actif. Parle quand tu veux, j’enverrai après 2,5 s de silence.')
-      recordingMimeTypeRef.current = mimeType
+      monitorMicrophone()
      recorder.ondataavailable = (event) => {
        if (event.data && event.data.size > 0) {
          recordedChunksRef.current.push(event.data)
        }
      }
      recorder.onstop = async () => {
        const finalMimeType = recordingMimeTypeRef.current || mimeType
        const audioBlob = new Blob(recordedChunksRef.current, { type: finalMimeType })
        stopMediaStream()
        setIsRecording(false)
        if (audioBlob.size > 0) {
          await transcribeRecording(audioBlob, finalMimeType)
        } else {
          setVoiceStatus('Aucun son détecté. Réessaie en parlant plus près du micro.')
        }
      }
      recorder.onerror = () => {
        setVoiceStatus('Le navigateur a rencontré une erreur pendant l’enregistrement.')
        setIsRecording(false)
        stopMediaStream()
      }
      recorder.start()
      setIsRecording(true)
      setVoiceStatus('Enregistrement en cours... clique à nouveau pour arrêter.')
    } catch {
      setVoiceStatus('Accès au micro refusé ou indisponible.')
      setIsRecording(false)
      stopMediaStream()
    }
  }
-  function stopRecordedVoiceInput() {
+  function deactivateAutoListening(resetStatus = true) {
-    const recorder = mediaRecorderRef.current
+    setIsAutoListening(false)
-    if (recorder && recorder.state !== 'inactive') {
+    if (mediaRecorderRef.current && mediaRecorderRef.current.state !== 'inactive') {
-      recorder.stop()
+      mediaRecorderRef.current.onstop = null
-      setVoiceStatus('Finalisation de l’enregistrement...')
+      mediaRecorderRef.current.stop()
    }
-  }
+    setIsRecording(false)
-
+    stopMediaStream()
-  function startBrowserRecognition() {
+    if (resetStatus) {
-    const Recognition = window.SpeechRecognition || window.webkitSpeechRecognition
+      setVoiceStatus('Micro automatique désactivé.')
    if (!Recognition) return false
    const recognition = new Recognition()
    recognition.lang = 'fr-FR'
    recognition.interimResults = false
    recognition.maxAlternatives = 1
    recognition.onresult = (event) => {
      const transcript = event.results[0][0].transcript
      setInput(transcript)
      setVoiceStatus('Texte dicté prêt à être envoyé.')
    }
    recognition.onerror = () => {
      setVoiceStatus('La reconnaissance vocale du navigateur a échoué. Essaie l’enregistrement audio.')
    }
    recognition.onstart = () => {
      setVoiceStatus('Écoute en cours...')
    }
    recognitionRef.current = recognition
    recognition.start()
    return true
  }
  async function startVoiceInput() {
-    if (isRecording) {
+    if (isAutoListening) {
-      stopRecordedVoiceInput()
+      deactivateAutoListening()
      return
    }
    await activateAutoListening()
  }
-    const startedNativeRecognition = startBrowserRecognition()
+  async function sendMessage(e) {
-    if (!startedNativeRecognition) {
+    e.preventDefault()
-      await startRecordedVoiceInput()
+    if (!selectedStudentId || !input.trim()) return
    const text = input.trim()
    try {
      await submitUserMessage(text)
    } catch (error) {
      setInput(text)
      setErrorMessage(error.message || 'Impossible d’envoyer le message.')
    }
  }
@@ -463,8 +585,20 @@ export default function App() {
            onChange={(event) => setInput(event.target.value)}
            placeholder="Pose une question ou demande une explication..."
          />
          <select
            value={selectedVoiceURI}
            onChange={(event) => setSelectedVoiceURI(event.target.value)}
            title="Choisir la voix du professeur"
          >
            <option value="">Voix du professeur</option>
            {availableVoices.map((voice) => (
              <option key={voice.voiceURI} value={voice.voiceURI}>
                {voice.name} · {voice.lang}
              </option>
            ))}
          </select>
          <button type="button" onClick={startVoiceInput} disabled={isTranscribing}>
-            {isRecording ? 'Arrêter' : isTranscribing ? 'Transcription...' : 'Dicter'}
+            {isAutoListening ? 'Couper le micro' : isTranscribing ? 'Transcription...' : 'Micro auto'}
          </button>
          <button type="submit">Envoyer</button>
        </form>
@@ -475,3 +609,16 @@ export default function App() {
    </div>
  )
 }
 function scoreVoice(voice) {
  let score = 0
  const name = `${voice.name} ${voice.voiceURI}`.toLowerCase()
  const lang = (voice.lang || '').toLowerCase()
  if (lang.startsWith('fr')) score += 100
  if (name.includes('google') || name.includes('microsoft')) score += 20
  if (name.includes('natural') || name.includes('premium') || name.includes('enhanced')) score += 15
  if (name.includes('hortense') || name.includes('amelie') || name.includes('thomas')) score += 10
  return score
 }