V4.9.4 feature (#4470)

* Training status (#4424) * dataset data training state (#4311) * dataset data training state * fix * fix ts * fix * fix api format * fix * fix * perf: count training * format * fix: dataset training state (#4417) * fix * add test * fix * fix * fix test * fix test * perf: training count * count * loading status --------- Co-authored-by: heheer <heheer@sealos.io> * doc * website sync feature (#4429) * perf: introduce BullMQ for website sync (#4403) * perf: introduce BullMQ for website sync * feat: new redis module * fix: remove graceful shutdown * perf: improve UI in dataset detail - Updated the "change" icon SVG file. - Modified i18n strings. - Added new i18n string "immediate_sync". - Improved UI in dataset detail page, including button icons and background colors. * refactor: Add chunkSettings to DatasetSchema * perf: website sync ux * env template * fix: clean up website dataset when updating chunk settings (#4420) * perf: check setting updated * perf: worker currency * feat: init script for website sync refactor (#4425) * website feature doc --------- Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com> * pro migration (#4388) (#4433) * pro migration * reuse customPdfParseType Co-authored-by: gggaaallleee <91131304+gggaaallleee@users.noreply.github.com> * perf: remove loading ui * feat: config chat file expired time * Redis cache (#4436) * perf: add Redis cache for vector counting (#4432) * feat: cache * perf: get cache key --------- Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com> * perf: mobile voice input (#4437) * update:Mobile voice interaction (#4362) * Add files via upload * Add files via upload * Update ollama.md * Update ollama.md * Add files via upload * Update useSpeech.ts * Update ChatInput.tsx * Update useSpeech.ts * Update ChatInput.tsx * Update useSpeech.ts * Update constants.ts * Add files via upload * Update ChatInput.tsx * Update useSpeech.ts * Update useSpeech.ts * Update useSpeech.ts * Update ChatInput.tsx * Add files via upload * Update common.json * Update VoiceInput.tsx * Update ChatInput.tsx * Update VoiceInput.tsx * Update useSpeech.ts * Update useSpeech.ts * Update common.json * Update common.json * Update common.json * Update VoiceInput.tsx * Update VoiceInput.tsx * Update ChatInput.tsx * Update VoiceInput.tsx * Update ChatInput.tsx * Update VoiceInput.tsx * Update ChatInput.tsx * Update useSpeech.ts * Update common.json * Update chat.json * Update common.json * Update chat.json * Update common.json * Update chat.json * Update VoiceInput.tsx * Update ChatInput.tsx * Update useSpeech.ts * Update VoiceInput.tsx * speech ui * 优化语音输入组件，调整输入框显示逻辑，修复语音输入遮罩层样式，更新画布背景透明度，增强用户交互体验。 (#4435) * perf: mobil voice input --------- Co-authored-by: dreamer6680 <1468683855@qq.com> * Test completion v2 (#4438) * add v2 completions (#4364) * add v2 completions * completion config * config version * fix * frontend * doc * fix * fix: completions v2 api --------- Co-authored-by: heheer <heheer@sealos.io> * package * Test mongo log (#4443) * feat: mongodb-log (#4426) * perf: mongo log * feat: completions stop reasoner * mongo db log --------- Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> * update doc * Update doc * fix external var ui (#4444) * action * fix: ts (#4458) * preview doc action add docs preview permission update preview action udpate action * update doc (#4460) * update preview action * update doc * remove * update * schema * update mq export;perf: redis cache (#4465) * perf: redis cache * update mq export * perf: website sync error tip * add error worker * website sync ui (#4466) * Updated the dynamic display of the voice input pop-up (#4469) * Update VoiceInput.tsx * Update VoiceInput.tsx * Update VoiceInput.tsx * fix: voice input --------- Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com> Co-authored-by: gggaaallleee <91131304+gggaaallleee@users.noreply.github.com> Co-authored-by: dreamer6680 <1468683855@qq.com> Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
2025-04-08 12:05:04 +08:00
parent 5839325f77
commit f642c9603b
151 changed files with 5434 additions and 1354 deletions
--- a/projects/app/src/web/common/hooks/useSpeech.ts
+++ b/projects/app/src/web/common/hooks/useSpeech.ts
@@ -7,16 +7,21 @@ import { OutLinkChatAuthProps } from '@fastgpt/global/support/permission/chat';

 export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) => {
  const { t } = useTranslation();
-  const mediaRecorder = useRef<MediaRecorder>();
-  const [mediaStream, setMediaStream] = useState<MediaStream>();
  const { toast } = useToast();
+
  const [isSpeaking, setIsSpeaking] = useState(false);
  const [isTransCription, setIsTransCription] = useState(false);
-  const [audioSecond, setAudioSecond] = useState(0);
-  const intervalRef = useRef<any>();
-  const startTimestamp = useRef(0);
-  const cancelWhisperSignal = useRef(false);

+  const mediaRecorder = useRef<MediaRecorder>();
+  const [mediaStream, setMediaStream] = useState<MediaStream>();
+
+  const timeIntervalRef = useRef<any>();
+  const cancelWhisperSignal = useRef(false);
+  const stopCalledRef = useRef(false);
+
+  const startTimestamp = useRef(0);
+
+  const [audioSecond, setAudioSecond] = useState(0);
  const speakingTimeString = useMemo(() => {
    const minutes: number = Math.floor(audioSecond / 60);
    const remainingSeconds: number = Math.floor(audioSecond % 60);
@@ -25,17 +30,16 @@ export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) =>
    return `${formattedMinutes}:${formattedSeconds}`;
  }, [audioSecond]);

-  const renderAudioGraph = useCallback((analyser: AnalyserNode, canvas: HTMLCanvasElement) => {
+  const renderAudioGraphPc = useCallback((analyser: AnalyserNode, canvas: HTMLCanvasElement) => {
    const bufferLength = analyser.frequencyBinCount;
-    const backgroundColor = 'white';
    const dataArray = new Uint8Array(bufferLength);
    analyser.getByteTimeDomainData(dataArray);
    const canvasCtx = canvas?.getContext('2d');
-    const width = 300;
-    const height = 200;
+    const width = canvas.width;
+    const height = canvas.height;
    if (!canvasCtx) return;
    canvasCtx.clearRect(0, 0, width, height);
-    canvasCtx.fillStyle = backgroundColor;
+    canvasCtx.fillStyle = 'white';
    canvasCtx.fillRect(0, 0, width, height);
    const barWidth = (width / bufferLength) * 2.5;
    let x = 0;
@@ -49,127 +53,212 @@ export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) =>
      x += barWidth + 1;
    }
  }, []);
+  const renderAudioGraphMobile = useCallback(
+    (analyser: AnalyserNode, canvas: HTMLCanvasElement) => {
+      const canvasCtx = canvas?.getContext('2d');
+      if (!canvasCtx) return;

-  const startSpeak = async (onFinish: (text: string) => void) => {
-    if (!navigator?.mediaDevices?.getUserMedia) {
-      return toast({
-        status: 'warning',
-        title: t('common:common.speech.not support')
-      });
-    }
-    try {
+      const bufferLength = analyser.frequencyBinCount;
+      const dataArray = new Uint8Array(bufferLength);
+      analyser.getByteTimeDomainData(dataArray);
+
+      const width = canvas.width;
+      const height = canvas.height;
+      canvasCtx.clearRect(0, 0, width, height);
+
+      // Set transparent background
+      canvasCtx.fillStyle = 'rgba(255, 255, 255, 0)';
+      canvasCtx.fillRect(0, 0, width, height);
+
+      const centerY = height / 2;
+      const barWidth = (width / bufferLength) * 15;
+      const gap = 2; // 添加间隙
+      let x = width * 0.1;
+
+      let sum = 0;
+      let maxDiff = 0;
+
+      for (let i = 0; i < bufferLength; i++) {
+        sum += dataArray[i];
+        maxDiff = Math.max(maxDiff, Math.abs(dataArray[i] - 128));
+      }
+      const average = sum / bufferLength;
+
+      // draw initial rectangle waveform
+      canvasCtx.beginPath();
+      canvasCtx.fillStyle = '#FFFFFF';
+
+      const initialHeight = height * 0.1;
+      for (let i = 0; i < width * 0.8; i += barWidth + gap) {
+        canvasCtx.fillRect(i + width * 0.1, centerY - initialHeight, barWidth, initialHeight);
+        canvasCtx.fillRect(i + width * 0.1, centerY, barWidth, initialHeight);
+      }
+
+      // draw dynamic waveform
+      canvasCtx.beginPath();
+      for (let i = 0; i < bufferLength; i += 4) {
+        const value = dataArray[i];
+        const normalizedValue = (value - average) / 128;
+        const amplification = 2.5;
+        const barHeight = normalizedValue * height * 0.4 * amplification;
+
+        canvasCtx.fillStyle = '#FFFFFF';
+
+        canvasCtx.fillRect(x, centerY - Math.abs(barHeight), barWidth, Math.abs(barHeight));
+        canvasCtx.fillRect(x, centerY, barWidth, Math.abs(barHeight));
+
+        x += barWidth + gap; // 增加间隔
+
+        if (x > width * 0.9) break;
+      }
+    },
+    []
+  );
+
+  const startSpeak = useCallback(
+    async (onFinish: (text: string) => void) => {
+      if (!navigator?.mediaDevices?.getUserMedia) {
+        return toast({
+          status: 'warning',
+          title: t('common:common.speech.not support')
+        });
+      }
+
+      // Init status
+      if (timeIntervalRef.current) {
+        clearInterval(timeIntervalRef.current);
+      }
      cancelWhisperSignal.current = false;
+      stopCalledRef.current = false;

-      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-      setMediaStream(stream);
-
-      mediaRecorder.current = new MediaRecorder(stream);
-      const chunks: Blob[] = [];
      setIsSpeaking(true);
+      setAudioSecond(0);

-      mediaRecorder.current.onstart = () => {
-        startTimestamp.current = Date.now();
-        setAudioSecond(0);
-        intervalRef.current = setInterval(() => {
-          const currentTimestamp = Date.now();
-          const duration = (currentTimestamp - startTimestamp.current) / 1000;
-          setAudioSecond(duration);
-        }, 1000);
-      };
+      try {
+        const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+        setMediaStream(stream);

-      mediaRecorder.current.ondataavailable = (e) => {
-        chunks.push(e.data);
-      };
+        mediaRecorder.current = new MediaRecorder(stream);
+        const chunks: Blob[] = [];

-      mediaRecorder.current.onstop = async () => {
-        if (!cancelWhisperSignal.current) {
-          const formData = new FormData();
-          const { options, filename } = (() => {
-            if (MediaRecorder.isTypeSupported('video/webm; codecs=vp9')) {
-              return {
-                options: { mimeType: 'video/webm; codecs=vp9' },
-                filename: 'recording.mp3'
-              };
-            }
-            if (MediaRecorder.isTypeSupported('video/webm')) {
+        mediaRecorder.current.onstart = () => {
+          startTimestamp.current = Date.now();
+          timeIntervalRef.current = setInterval(() => {
+            const currentTimestamp = Date.now();
+            const duration = (currentTimestamp - startTimestamp.current) / 1000;
+            setAudioSecond(duration);
+          }, 1000);
+        };
+        mediaRecorder.current.ondataavailable = (e) => {
+          chunks.push(e.data);
+        };
+        mediaRecorder.current.onstop = async () => {
+          // close media stream
+          stream.getTracks().forEach((track) => track.stop());
+          setIsSpeaking(false);
+
+          if (timeIntervalRef.current) {
+            clearInterval(timeIntervalRef.current);
+          }
+
+          if (!cancelWhisperSignal.current) {
+            const formData = new FormData();
+            const { options, filename } = (() => {
+              if (MediaRecorder.isTypeSupported('video/webm; codecs=vp9')) {
+                return {
+                  options: { mimeType: 'video/webm; codecs=vp9' },
+                  filename: 'recording.mp3'
+                };
+              }
+              if (MediaRecorder.isTypeSupported('video/webm')) {
+                return {
+                  options: { type: 'video/webm' },
+                  filename: 'recording.mp3'
+                };
+              }
+              if (MediaRecorder.isTypeSupported('video/mp4')) {
+                return {
+                  options: { mimeType: 'video/mp4', videoBitsPerSecond: 100000 },
+                  filename: 'recording.mp4'
+                };
+              }
              return {
                options: { type: 'video/webm' },
                filename: 'recording.mp3'
              };
-            }
-            if (MediaRecorder.isTypeSupported('video/mp4')) {
-              return {
-                options: { mimeType: 'video/mp4', videoBitsPerSecond: 100000 },
-                filename: 'recording.mp4'
-              };
-            }
-            return {
-              options: { type: 'video/webm' },
-              filename: 'recording.mp3'
-            };
-          })();
+            })();

-          const blob = new Blob(chunks, options);
-          const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
-          formData.append('file', blob, filename);
-          formData.append(
-            'data',
-            JSON.stringify({
-              ...props,
-              duration
-            })
-          );
+            const blob = new Blob(chunks, options);
+            const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
+            formData.append('file', blob, filename);
+            formData.append(
+              'data',
+              JSON.stringify({
+                ...props,
+                duration
+              })
+            );

-          setIsTransCription(true);
-          try {
-            const result = await POST<string>('/v1/audio/transcriptions', formData, {
-              timeout: 60000,
-              headers: {
-                'Content-Type': 'multipart/form-data; charset=utf-8'
-              }
-            });
-            onFinish(result);
-          } catch (error) {
-            toast({
-              status: 'warning',
-              title: getErrText(error, t('common:common.speech.error tip'))
-            });
+            setIsTransCription(true);
+            try {
+              const result = await POST<string>('/v1/audio/transcriptions', formData, {
+                timeout: 60000,
+                headers: {
+                  'Content-Type': 'multipart/form-data; charset=utf-8'
+                }
+              });
+              onFinish(result);
+            } catch (error) {
+              toast({
+                status: 'warning',
+                title: getErrText(error, t('common:common.speech.error tip'))
+              });
+            }
+            setIsTransCription(false);
          }
+        };
+        mediaRecorder.current.onerror = (e) => {
+          if (timeIntervalRef.current) {
+            clearInterval(timeIntervalRef.current);
+          }
+          console.log('error', e);
+          setIsSpeaking(false);
+        };
+
+        // If onclick stop, stop speak
+        if (stopCalledRef.current) {
+          mediaRecorder.current.stop();
+        } else {
+          mediaRecorder.current.start();
        }
+      } catch (error) {
+        toast({
+          status: 'warning',
+          title: getErrText(error, 'Whisper error')
+        });
+        console.log(error);
+      }
+    },
+    [toast, t, props]
+  );

-        // close media stream
-        stream.getTracks().forEach((track) => track.stop());
-
-        setIsTransCription(false);
-        setIsSpeaking(false);
-      };
-
-      mediaRecorder.current.onerror = (e) => {
-        console.log('error', e);
-        setIsSpeaking(false);
-      };
-
-      mediaRecorder.current.start();
-    } catch (error) {
-      toast({
-        status: 'warning',
-        title: getErrText(error, 'Whisper error')
-      });
-      console.log(error);
-    }
-  };
-
-  const stopSpeak = (cancel = false) => {
+  const stopSpeak = useCallback((cancel = false) => {
    cancelWhisperSignal.current = cancel;
-    if (mediaRecorder.current) {
-      mediaRecorder.current?.stop();
-      clearInterval(intervalRef.current);
-    }
-  };
+    stopCalledRef.current = true;

+    if (timeIntervalRef.current) {
+      clearInterval(timeIntervalRef.current);
+    }
+
+    if (mediaRecorder.current && mediaRecorder.current.state !== 'inactive') {
+      mediaRecorder.current.stop();
+    }
+  }, []);
+
+  // Leave page, stop speak
  useEffect(() => {
    return () => {
-      clearInterval(intervalRef.current);
+      clearInterval(timeIntervalRef.current);
      if (mediaRecorder.current && mediaRecorder.current.state !== 'inactive') {
        mediaRecorder.current.stop();
      }
@@ -184,14 +273,15 @@ export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) =>
    if (audioSecond >= 60) {
      stopSpeak();
    }
-  }, [audioSecond]);
+  }, [audioSecond, stopSpeak]);

  return {
    startSpeak,
    stopSpeak,
    isSpeaking,
    isTransCription,
-    renderAudioGraph,
+    renderAudioGraphPc,
+    renderAudioGraphMobile,
    stream: mediaStream,
    speakingTimeString
  };