V4.9.4 feature (#4470)
* Training status (#4424) * dataset data training state (#4311) * dataset data training state * fix * fix ts * fix * fix api format * fix * fix * perf: count training * format * fix: dataset training state (#4417) * fix * add test * fix * fix * fix test * fix test * perf: training count * count * loading status --------- Co-authored-by: heheer <heheer@sealos.io> * doc * website sync feature (#4429) * perf: introduce BullMQ for website sync (#4403) * perf: introduce BullMQ for website sync * feat: new redis module * fix: remove graceful shutdown * perf: improve UI in dataset detail - Updated the "change" icon SVG file. - Modified i18n strings. - Added new i18n string "immediate_sync". - Improved UI in dataset detail page, including button icons and background colors. * refactor: Add chunkSettings to DatasetSchema * perf: website sync ux * env template * fix: clean up website dataset when updating chunk settings (#4420) * perf: check setting updated * perf: worker currency * feat: init script for website sync refactor (#4425) * website feature doc --------- Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com> * pro migration (#4388) (#4433) * pro migration * reuse customPdfParseType Co-authored-by: gggaaallleee <91131304+gggaaallleee@users.noreply.github.com> * perf: remove loading ui * feat: config chat file expired time * Redis cache (#4436) * perf: add Redis cache for vector counting (#4432) * feat: cache * perf: get cache key --------- Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com> * perf: mobile voice input (#4437) * update:Mobile voice interaction (#4362) * Add files via upload * Add files via upload * Update ollama.md * Update ollama.md * Add files via upload * Update useSpeech.ts * Update ChatInput.tsx * Update useSpeech.ts * Update ChatInput.tsx * Update useSpeech.ts * Update constants.ts * Add files via upload * Update ChatInput.tsx * Update useSpeech.ts * Update useSpeech.ts * Update useSpeech.ts * Update ChatInput.tsx * Add files via upload * Update common.json * Update VoiceInput.tsx * Update ChatInput.tsx * Update VoiceInput.tsx * Update useSpeech.ts * Update useSpeech.ts * Update common.json * Update common.json * Update common.json * Update VoiceInput.tsx * Update VoiceInput.tsx * Update ChatInput.tsx * Update VoiceInput.tsx * Update ChatInput.tsx * Update VoiceInput.tsx * Update ChatInput.tsx * Update useSpeech.ts * Update common.json * Update chat.json * Update common.json * Update chat.json * Update common.json * Update chat.json * Update VoiceInput.tsx * Update ChatInput.tsx * Update useSpeech.ts * Update VoiceInput.tsx * speech ui * 优化语音输入组件,调整输入框显示逻辑,修复语音输入遮罩层样式,更新画布背景透明度,增强用户交互体验。 (#4435) * perf: mobil voice input --------- Co-authored-by: dreamer6680 <1468683855@qq.com> * Test completion v2 (#4438) * add v2 completions (#4364) * add v2 completions * completion config * config version * fix * frontend * doc * fix * fix: completions v2 api --------- Co-authored-by: heheer <heheer@sealos.io> * package * Test mongo log (#4443) * feat: mongodb-log (#4426) * perf: mongo log * feat: completions stop reasoner * mongo db log --------- Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> * update doc * Update doc * fix external var ui (#4444) * action * fix: ts (#4458) * preview doc action add docs preview permission update preview action udpate action * update doc (#4460) * update preview action * update doc * remove * update * schema * update mq export;perf: redis cache (#4465) * perf: redis cache * update mq export * perf: website sync error tip * add error worker * website sync ui (#4466) * Updated the dynamic display of the voice input pop-up (#4469) * Update VoiceInput.tsx * Update VoiceInput.tsx * Update VoiceInput.tsx * fix: voice input --------- Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com> Co-authored-by: gggaaallleee <91131304+gggaaallleee@users.noreply.github.com> Co-authored-by: dreamer6680 <1468683855@qq.com> Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
This commit is contained in:
@@ -7,16 +7,21 @@ import { OutLinkChatAuthProps } from '@fastgpt/global/support/permission/chat';
|
||||
|
||||
export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) => {
|
||||
const { t } = useTranslation();
|
||||
const mediaRecorder = useRef<MediaRecorder>();
|
||||
const [mediaStream, setMediaStream] = useState<MediaStream>();
|
||||
const { toast } = useToast();
|
||||
|
||||
const [isSpeaking, setIsSpeaking] = useState(false);
|
||||
const [isTransCription, setIsTransCription] = useState(false);
|
||||
const [audioSecond, setAudioSecond] = useState(0);
|
||||
const intervalRef = useRef<any>();
|
||||
const startTimestamp = useRef(0);
|
||||
const cancelWhisperSignal = useRef(false);
|
||||
|
||||
const mediaRecorder = useRef<MediaRecorder>();
|
||||
const [mediaStream, setMediaStream] = useState<MediaStream>();
|
||||
|
||||
const timeIntervalRef = useRef<any>();
|
||||
const cancelWhisperSignal = useRef(false);
|
||||
const stopCalledRef = useRef(false);
|
||||
|
||||
const startTimestamp = useRef(0);
|
||||
|
||||
const [audioSecond, setAudioSecond] = useState(0);
|
||||
const speakingTimeString = useMemo(() => {
|
||||
const minutes: number = Math.floor(audioSecond / 60);
|
||||
const remainingSeconds: number = Math.floor(audioSecond % 60);
|
||||
@@ -25,17 +30,16 @@ export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) =>
|
||||
return `${formattedMinutes}:${formattedSeconds}`;
|
||||
}, [audioSecond]);
|
||||
|
||||
const renderAudioGraph = useCallback((analyser: AnalyserNode, canvas: HTMLCanvasElement) => {
|
||||
const renderAudioGraphPc = useCallback((analyser: AnalyserNode, canvas: HTMLCanvasElement) => {
|
||||
const bufferLength = analyser.frequencyBinCount;
|
||||
const backgroundColor = 'white';
|
||||
const dataArray = new Uint8Array(bufferLength);
|
||||
analyser.getByteTimeDomainData(dataArray);
|
||||
const canvasCtx = canvas?.getContext('2d');
|
||||
const width = 300;
|
||||
const height = 200;
|
||||
const width = canvas.width;
|
||||
const height = canvas.height;
|
||||
if (!canvasCtx) return;
|
||||
canvasCtx.clearRect(0, 0, width, height);
|
||||
canvasCtx.fillStyle = backgroundColor;
|
||||
canvasCtx.fillStyle = 'white';
|
||||
canvasCtx.fillRect(0, 0, width, height);
|
||||
const barWidth = (width / bufferLength) * 2.5;
|
||||
let x = 0;
|
||||
@@ -49,127 +53,212 @@ export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) =>
|
||||
x += barWidth + 1;
|
||||
}
|
||||
}, []);
|
||||
const renderAudioGraphMobile = useCallback(
|
||||
(analyser: AnalyserNode, canvas: HTMLCanvasElement) => {
|
||||
const canvasCtx = canvas?.getContext('2d');
|
||||
if (!canvasCtx) return;
|
||||
|
||||
const startSpeak = async (onFinish: (text: string) => void) => {
|
||||
if (!navigator?.mediaDevices?.getUserMedia) {
|
||||
return toast({
|
||||
status: 'warning',
|
||||
title: t('common:common.speech.not support')
|
||||
});
|
||||
}
|
||||
try {
|
||||
const bufferLength = analyser.frequencyBinCount;
|
||||
const dataArray = new Uint8Array(bufferLength);
|
||||
analyser.getByteTimeDomainData(dataArray);
|
||||
|
||||
const width = canvas.width;
|
||||
const height = canvas.height;
|
||||
canvasCtx.clearRect(0, 0, width, height);
|
||||
|
||||
// Set transparent background
|
||||
canvasCtx.fillStyle = 'rgba(255, 255, 255, 0)';
|
||||
canvasCtx.fillRect(0, 0, width, height);
|
||||
|
||||
const centerY = height / 2;
|
||||
const barWidth = (width / bufferLength) * 15;
|
||||
const gap = 2; // 添加间隙
|
||||
let x = width * 0.1;
|
||||
|
||||
let sum = 0;
|
||||
let maxDiff = 0;
|
||||
|
||||
for (let i = 0; i < bufferLength; i++) {
|
||||
sum += dataArray[i];
|
||||
maxDiff = Math.max(maxDiff, Math.abs(dataArray[i] - 128));
|
||||
}
|
||||
const average = sum / bufferLength;
|
||||
|
||||
// draw initial rectangle waveform
|
||||
canvasCtx.beginPath();
|
||||
canvasCtx.fillStyle = '#FFFFFF';
|
||||
|
||||
const initialHeight = height * 0.1;
|
||||
for (let i = 0; i < width * 0.8; i += barWidth + gap) {
|
||||
canvasCtx.fillRect(i + width * 0.1, centerY - initialHeight, barWidth, initialHeight);
|
||||
canvasCtx.fillRect(i + width * 0.1, centerY, barWidth, initialHeight);
|
||||
}
|
||||
|
||||
// draw dynamic waveform
|
||||
canvasCtx.beginPath();
|
||||
for (let i = 0; i < bufferLength; i += 4) {
|
||||
const value = dataArray[i];
|
||||
const normalizedValue = (value - average) / 128;
|
||||
const amplification = 2.5;
|
||||
const barHeight = normalizedValue * height * 0.4 * amplification;
|
||||
|
||||
canvasCtx.fillStyle = '#FFFFFF';
|
||||
|
||||
canvasCtx.fillRect(x, centerY - Math.abs(barHeight), barWidth, Math.abs(barHeight));
|
||||
canvasCtx.fillRect(x, centerY, barWidth, Math.abs(barHeight));
|
||||
|
||||
x += barWidth + gap; // 增加间隔
|
||||
|
||||
if (x > width * 0.9) break;
|
||||
}
|
||||
},
|
||||
[]
|
||||
);
|
||||
|
||||
const startSpeak = useCallback(
|
||||
async (onFinish: (text: string) => void) => {
|
||||
if (!navigator?.mediaDevices?.getUserMedia) {
|
||||
return toast({
|
||||
status: 'warning',
|
||||
title: t('common:common.speech.not support')
|
||||
});
|
||||
}
|
||||
|
||||
// Init status
|
||||
if (timeIntervalRef.current) {
|
||||
clearInterval(timeIntervalRef.current);
|
||||
}
|
||||
cancelWhisperSignal.current = false;
|
||||
stopCalledRef.current = false;
|
||||
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
setMediaStream(stream);
|
||||
|
||||
mediaRecorder.current = new MediaRecorder(stream);
|
||||
const chunks: Blob[] = [];
|
||||
setIsSpeaking(true);
|
||||
setAudioSecond(0);
|
||||
|
||||
mediaRecorder.current.onstart = () => {
|
||||
startTimestamp.current = Date.now();
|
||||
setAudioSecond(0);
|
||||
intervalRef.current = setInterval(() => {
|
||||
const currentTimestamp = Date.now();
|
||||
const duration = (currentTimestamp - startTimestamp.current) / 1000;
|
||||
setAudioSecond(duration);
|
||||
}, 1000);
|
||||
};
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
setMediaStream(stream);
|
||||
|
||||
mediaRecorder.current.ondataavailable = (e) => {
|
||||
chunks.push(e.data);
|
||||
};
|
||||
mediaRecorder.current = new MediaRecorder(stream);
|
||||
const chunks: Blob[] = [];
|
||||
|
||||
mediaRecorder.current.onstop = async () => {
|
||||
if (!cancelWhisperSignal.current) {
|
||||
const formData = new FormData();
|
||||
const { options, filename } = (() => {
|
||||
if (MediaRecorder.isTypeSupported('video/webm; codecs=vp9')) {
|
||||
return {
|
||||
options: { mimeType: 'video/webm; codecs=vp9' },
|
||||
filename: 'recording.mp3'
|
||||
};
|
||||
}
|
||||
if (MediaRecorder.isTypeSupported('video/webm')) {
|
||||
mediaRecorder.current.onstart = () => {
|
||||
startTimestamp.current = Date.now();
|
||||
timeIntervalRef.current = setInterval(() => {
|
||||
const currentTimestamp = Date.now();
|
||||
const duration = (currentTimestamp - startTimestamp.current) / 1000;
|
||||
setAudioSecond(duration);
|
||||
}, 1000);
|
||||
};
|
||||
mediaRecorder.current.ondataavailable = (e) => {
|
||||
chunks.push(e.data);
|
||||
};
|
||||
mediaRecorder.current.onstop = async () => {
|
||||
// close media stream
|
||||
stream.getTracks().forEach((track) => track.stop());
|
||||
setIsSpeaking(false);
|
||||
|
||||
if (timeIntervalRef.current) {
|
||||
clearInterval(timeIntervalRef.current);
|
||||
}
|
||||
|
||||
if (!cancelWhisperSignal.current) {
|
||||
const formData = new FormData();
|
||||
const { options, filename } = (() => {
|
||||
if (MediaRecorder.isTypeSupported('video/webm; codecs=vp9')) {
|
||||
return {
|
||||
options: { mimeType: 'video/webm; codecs=vp9' },
|
||||
filename: 'recording.mp3'
|
||||
};
|
||||
}
|
||||
if (MediaRecorder.isTypeSupported('video/webm')) {
|
||||
return {
|
||||
options: { type: 'video/webm' },
|
||||
filename: 'recording.mp3'
|
||||
};
|
||||
}
|
||||
if (MediaRecorder.isTypeSupported('video/mp4')) {
|
||||
return {
|
||||
options: { mimeType: 'video/mp4', videoBitsPerSecond: 100000 },
|
||||
filename: 'recording.mp4'
|
||||
};
|
||||
}
|
||||
return {
|
||||
options: { type: 'video/webm' },
|
||||
filename: 'recording.mp3'
|
||||
};
|
||||
}
|
||||
if (MediaRecorder.isTypeSupported('video/mp4')) {
|
||||
return {
|
||||
options: { mimeType: 'video/mp4', videoBitsPerSecond: 100000 },
|
||||
filename: 'recording.mp4'
|
||||
};
|
||||
}
|
||||
return {
|
||||
options: { type: 'video/webm' },
|
||||
filename: 'recording.mp3'
|
||||
};
|
||||
})();
|
||||
})();
|
||||
|
||||
const blob = new Blob(chunks, options);
|
||||
const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
|
||||
formData.append('file', blob, filename);
|
||||
formData.append(
|
||||
'data',
|
||||
JSON.stringify({
|
||||
...props,
|
||||
duration
|
||||
})
|
||||
);
|
||||
const blob = new Blob(chunks, options);
|
||||
const duration = Math.round((Date.now() - startTimestamp.current) / 1000);
|
||||
formData.append('file', blob, filename);
|
||||
formData.append(
|
||||
'data',
|
||||
JSON.stringify({
|
||||
...props,
|
||||
duration
|
||||
})
|
||||
);
|
||||
|
||||
setIsTransCription(true);
|
||||
try {
|
||||
const result = await POST<string>('/v1/audio/transcriptions', formData, {
|
||||
timeout: 60000,
|
||||
headers: {
|
||||
'Content-Type': 'multipart/form-data; charset=utf-8'
|
||||
}
|
||||
});
|
||||
onFinish(result);
|
||||
} catch (error) {
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: getErrText(error, t('common:common.speech.error tip'))
|
||||
});
|
||||
setIsTransCription(true);
|
||||
try {
|
||||
const result = await POST<string>('/v1/audio/transcriptions', formData, {
|
||||
timeout: 60000,
|
||||
headers: {
|
||||
'Content-Type': 'multipart/form-data; charset=utf-8'
|
||||
}
|
||||
});
|
||||
onFinish(result);
|
||||
} catch (error) {
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: getErrText(error, t('common:common.speech.error tip'))
|
||||
});
|
||||
}
|
||||
setIsTransCription(false);
|
||||
}
|
||||
};
|
||||
mediaRecorder.current.onerror = (e) => {
|
||||
if (timeIntervalRef.current) {
|
||||
clearInterval(timeIntervalRef.current);
|
||||
}
|
||||
console.log('error', e);
|
||||
setIsSpeaking(false);
|
||||
};
|
||||
|
||||
// If onclick stop, stop speak
|
||||
if (stopCalledRef.current) {
|
||||
mediaRecorder.current.stop();
|
||||
} else {
|
||||
mediaRecorder.current.start();
|
||||
}
|
||||
} catch (error) {
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: getErrText(error, 'Whisper error')
|
||||
});
|
||||
console.log(error);
|
||||
}
|
||||
},
|
||||
[toast, t, props]
|
||||
);
|
||||
|
||||
// close media stream
|
||||
stream.getTracks().forEach((track) => track.stop());
|
||||
|
||||
setIsTransCription(false);
|
||||
setIsSpeaking(false);
|
||||
};
|
||||
|
||||
mediaRecorder.current.onerror = (e) => {
|
||||
console.log('error', e);
|
||||
setIsSpeaking(false);
|
||||
};
|
||||
|
||||
mediaRecorder.current.start();
|
||||
} catch (error) {
|
||||
toast({
|
||||
status: 'warning',
|
||||
title: getErrText(error, 'Whisper error')
|
||||
});
|
||||
console.log(error);
|
||||
}
|
||||
};
|
||||
|
||||
const stopSpeak = (cancel = false) => {
|
||||
const stopSpeak = useCallback((cancel = false) => {
|
||||
cancelWhisperSignal.current = cancel;
|
||||
if (mediaRecorder.current) {
|
||||
mediaRecorder.current?.stop();
|
||||
clearInterval(intervalRef.current);
|
||||
}
|
||||
};
|
||||
stopCalledRef.current = true;
|
||||
|
||||
if (timeIntervalRef.current) {
|
||||
clearInterval(timeIntervalRef.current);
|
||||
}
|
||||
|
||||
if (mediaRecorder.current && mediaRecorder.current.state !== 'inactive') {
|
||||
mediaRecorder.current.stop();
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Leave page, stop speak
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
clearInterval(intervalRef.current);
|
||||
clearInterval(timeIntervalRef.current);
|
||||
if (mediaRecorder.current && mediaRecorder.current.state !== 'inactive') {
|
||||
mediaRecorder.current.stop();
|
||||
}
|
||||
@@ -184,14 +273,15 @@ export const useSpeech = (props?: OutLinkChatAuthProps & { appId?: string }) =>
|
||||
if (audioSecond >= 60) {
|
||||
stopSpeak();
|
||||
}
|
||||
}, [audioSecond]);
|
||||
}, [audioSecond, stopSpeak]);
|
||||
|
||||
return {
|
||||
startSpeak,
|
||||
stopSpeak,
|
||||
isSpeaking,
|
||||
isTransCription,
|
||||
renderAudioGraph,
|
||||
renderAudioGraphPc,
|
||||
renderAudioGraphMobile,
|
||||
stream: mediaStream,
|
||||
speakingTimeString
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user