Compare commits
21 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fbbc32361b | ||
|
|
dc329041f3 | ||
|
|
5feb2e19bf | ||
|
|
ec22cd8320 | ||
|
|
8c7efcbd1a | ||
|
|
afc5947bfb | ||
|
|
40189a6899 | ||
|
|
b73829a25c | ||
|
|
a7c5d3cc05 | ||
|
|
cc36a13f17 | ||
|
|
943abbe0fb | ||
|
|
b13c3c4da5 | ||
|
|
c12aa7fdf7 | ||
|
|
e08e8aa00b | ||
|
|
85e11abc0a | ||
|
|
becee69d6a | ||
|
|
042b0c535a | ||
|
|
f97c29b41e | ||
|
|
4d6616cbfa | ||
|
|
cf37992b5c | ||
|
|
6c4026ccef |
BIN
public/imgs/wxerweima300.jpg
Normal file
BIN
public/imgs/wxerweima300.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 50 KiB |
@@ -5,15 +5,30 @@ import { TrainingItemType } from '../types/training';
|
||||
import { RequestPaging } from '../types/index';
|
||||
import { Obj2Query } from '@/utils/tools';
|
||||
|
||||
/**
|
||||
* 获取模型列表
|
||||
*/
|
||||
export const getMyModels = () => GET<ModelSchema[]>('/model/list');
|
||||
|
||||
/**
|
||||
* 创建一个模型
|
||||
*/
|
||||
export const postCreateModel = (data: { name: string; serviceModelName: string }) =>
|
||||
POST<ModelSchema>('/model/create', data);
|
||||
|
||||
/**
|
||||
* 根据 ID 删除模型
|
||||
*/
|
||||
export const delModelById = (id: string) => DELETE(`/model/del?modelId=${id}`);
|
||||
|
||||
/**
|
||||
* 根据 ID 获取模型
|
||||
*/
|
||||
export const getModelById = (id: string) => GET<ModelSchema>(`/model/detail?modelId=${id}`);
|
||||
|
||||
/**
|
||||
* 根据 ID 更新模型
|
||||
*/
|
||||
export const putModelById = (id: string, data: ModelUpdateParams) =>
|
||||
PUT(`/model/update?modelId=${id}`, data);
|
||||
|
||||
@@ -35,29 +50,58 @@ export const getModelTrainings = (id: string) =>
|
||||
type GetModelDataListProps = RequestPaging & {
|
||||
modelId: string;
|
||||
};
|
||||
/**
|
||||
* 获取模型的知识库数据
|
||||
*/
|
||||
export const getModelDataList = (props: GetModelDataListProps) =>
|
||||
GET(`/model/data/getModelData?${Obj2Query(props)}`);
|
||||
|
||||
/**
|
||||
* 获取导出数据(不分页)
|
||||
*/
|
||||
export const getExportDataList = (modelId: string) =>
|
||||
GET<string>(`/model/data/exportModelData?modelId=${modelId}`);
|
||||
|
||||
export const getModelSplitDataList = (modelId: string) =>
|
||||
GET<ModelSplitDataSchema[]>(`/model/data/getSplitData?modelId=${modelId}`);
|
||||
/**
|
||||
* 获取模型正在拆分数据的数量
|
||||
*/
|
||||
export const getModelSplitDataListLen = (modelId: string) =>
|
||||
GET<number>(`/model/data/getSplitData?modelId=${modelId}`);
|
||||
|
||||
/**
|
||||
* 获取 web 页面内容
|
||||
*/
|
||||
export const getWebContent = (url: string) => POST<string>(`/model/data/fetchingUrlData`, { url });
|
||||
|
||||
/**
|
||||
* 手动输入数据
|
||||
*/
|
||||
export const postModelDataInput = (data: {
|
||||
modelId: string;
|
||||
data: { text: ModelDataSchema['text']; q: ModelDataSchema['q'] }[];
|
||||
}) => POST<number>(`/model/data/pushModelDataInput`, data);
|
||||
|
||||
export const postModelDataFileText = (data: { modelId: string; text: string; prompt: string }) =>
|
||||
/**
|
||||
* 拆分数据
|
||||
*/
|
||||
export const postModelDataSplitData = (data: { modelId: string; text: string; prompt: string }) =>
|
||||
POST(`/model/data/splitData`, data);
|
||||
|
||||
/**
|
||||
* json导入数据
|
||||
*/
|
||||
export const postModelDataJsonData = (
|
||||
modelId: string,
|
||||
jsonData: { prompt: string; completion: string; vector?: number[] }[]
|
||||
) => POST(`/model/data/pushModelDataJson`, { modelId, data: jsonData });
|
||||
|
||||
export const putModelDataById = (data: { dataId: string; text: string }) =>
|
||||
/**
|
||||
* 更新模型数据
|
||||
*/
|
||||
export const putModelDataById = (data: { dataId: string; text: string; q?: string }) =>
|
||||
PUT('/model/data/putModelData', data);
|
||||
/**
|
||||
* 删除一条模型数据
|
||||
*/
|
||||
export const delOneModelData = (dataId: string) =>
|
||||
DELETE(`/model/data/delModelDataById?dataId=${dataId}`);
|
||||
|
||||
@@ -23,13 +23,13 @@ const WxConcat = ({ onClose }: { onClose: () => void }) => {
|
||||
<ModalBody textAlign={'center'}>
|
||||
<Image
|
||||
style={{ margin: 'auto' }}
|
||||
src={'/imgs/wxcode300.jpg'}
|
||||
src={'/imgs/wxerweima300.jpg'}
|
||||
width={200}
|
||||
height={200}
|
||||
alt=""
|
||||
/>
|
||||
<Box mt={2}>
|
||||
微信号:{' '}
|
||||
微信号:
|
||||
<Box as={'span'} userSelect={'all'}>
|
||||
YNyiqi
|
||||
</Box>
|
||||
|
||||
@@ -11,8 +11,8 @@ export const introPage = `
|
||||
[Git 仓库](https://github.com/c121914yu/FastGPT)
|
||||
|
||||
### 交流群/问题反馈
|
||||
wx: YNyiqi
|
||||

|
||||
wx号: YNyiqi
|
||||

|
||||
|
||||
|
||||
### 快速开始
|
||||
@@ -36,6 +36,15 @@ wx: YNyiqi
|
||||
4. 使用该模型对话。
|
||||
|
||||
注意:使用知识库模型对话时,tokens 消耗会加快。
|
||||
|
||||
### 价格表
|
||||
如果使用了自己的 Api Key,不会计费。可以在账号页,看到详细账单。单纯使用 chatGPT 模型进行对话,只有一个计费项目。使用知识库时,包含**对话**和**索引**生成两个计费项。
|
||||
| 计费项 | 价格: 元/ 1K tokens(包含上下文)|
|
||||
| --- | --- |
|
||||
| chatgpt - 对话 | 0.03 |
|
||||
| 知识库 - 对话 | 0.03 |
|
||||
| 知识库 - 索引 | 0.01 |
|
||||
| 文件拆分 | 0.03 |
|
||||
`;
|
||||
|
||||
export const chatProblem = `
|
||||
|
||||
@@ -4,13 +4,15 @@ import type { RedisModelDataItemType } from '@/types/redis';
|
||||
export enum ChatModelNameEnum {
|
||||
GPT35 = 'gpt-3.5-turbo',
|
||||
VECTOR_GPT = 'VECTOR_GPT',
|
||||
GPT3 = 'text-davinci-003'
|
||||
GPT3 = 'text-davinci-003',
|
||||
VECTOR = 'text-embedding-ada-002'
|
||||
}
|
||||
|
||||
export const ChatModelNameMap = {
|
||||
[ChatModelNameEnum.GPT35]: 'gpt-3.5-turbo',
|
||||
[ChatModelNameEnum.VECTOR_GPT]: 'gpt-3.5-turbo',
|
||||
[ChatModelNameEnum.GPT3]: 'text-davinci-003'
|
||||
[ChatModelNameEnum.GPT3]: 'text-davinci-003',
|
||||
[ChatModelNameEnum.VECTOR]: 'text-embedding-ada-002'
|
||||
};
|
||||
|
||||
export type ModelConstantsData = {
|
||||
@@ -21,7 +23,6 @@ export type ModelConstantsData = {
|
||||
maxToken: number;
|
||||
contextMaxToken: number;
|
||||
maxTemperature: number;
|
||||
trainedMaxToken: number; // 训练后最大多少tokens
|
||||
price: number; // 多少钱 / 1token,单位: 0.00001元
|
||||
};
|
||||
|
||||
@@ -33,7 +34,6 @@ export const modelList: ModelConstantsData[] = [
|
||||
trainName: '',
|
||||
maxToken: 4000,
|
||||
contextMaxToken: 7500,
|
||||
trainedMaxToken: 2000,
|
||||
maxTemperature: 2,
|
||||
price: 3
|
||||
},
|
||||
@@ -43,8 +43,7 @@ export const modelList: ModelConstantsData[] = [
|
||||
model: ChatModelNameEnum.VECTOR_GPT,
|
||||
trainName: 'vector',
|
||||
maxToken: 4000,
|
||||
contextMaxToken: 7500,
|
||||
trainedMaxToken: 2000,
|
||||
contextMaxToken: 7000,
|
||||
maxTemperature: 1,
|
||||
price: 3
|
||||
}
|
||||
@@ -55,7 +54,6 @@ export const modelList: ModelConstantsData[] = [
|
||||
// trainName: 'davinci',
|
||||
// maxToken: 4000,
|
||||
// contextMaxToken: 7500,
|
||||
// trainedMaxToken: 2000,
|
||||
// maxTemperature: 2,
|
||||
// price: 30
|
||||
// }
|
||||
|
||||
@@ -3,6 +3,7 @@ export enum BillTypeEnum {
|
||||
splitData = 'splitData',
|
||||
QA = 'QA',
|
||||
abstract = 'abstract',
|
||||
vector = 'vector',
|
||||
return = 'return'
|
||||
}
|
||||
export enum PageTypeEnum {
|
||||
@@ -16,5 +17,6 @@ export const BillTypeMap: Record<`${BillTypeEnum}`, string> = {
|
||||
[BillTypeEnum.splitData]: 'QA拆分',
|
||||
[BillTypeEnum.QA]: 'QA拆分',
|
||||
[BillTypeEnum.abstract]: '摘要总结',
|
||||
[BillTypeEnum.vector]: '索引生成',
|
||||
[BillTypeEnum.return]: '退款'
|
||||
};
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import type { AppProps, NextWebVitalsMetric } from 'next/app';
|
||||
import { useEffect } from 'react';
|
||||
import type { AppProps } from 'next/app';
|
||||
import Script from 'next/script';
|
||||
import Head from 'next/head';
|
||||
import { ChakraProvider, ColorModeScript } from '@chakra-ui/react';
|
||||
@@ -9,6 +10,7 @@ import NProgress from 'nprogress'; //nprogress module
|
||||
import Router from 'next/router';
|
||||
import 'nprogress/nprogress.css';
|
||||
import '../styles/reset.scss';
|
||||
import { useToast } from '@/hooks/useToast';
|
||||
|
||||
//Binding events.
|
||||
Router.events.on('routeChangeStart', () => NProgress.start());
|
||||
@@ -27,6 +29,17 @@ const queryClient = new QueryClient({
|
||||
});
|
||||
|
||||
export default function App({ Component, pageProps }: AppProps) {
|
||||
const { toast } = useToast();
|
||||
// 校验是否支持 click 事件
|
||||
useEffect(() => {
|
||||
if (typeof document.createElement('div').click !== 'function') {
|
||||
toast({
|
||||
title: '你的浏览器版本过低',
|
||||
status: 'warning'
|
||||
});
|
||||
}
|
||||
}, [toast]);
|
||||
|
||||
return (
|
||||
<>
|
||||
<Head>
|
||||
|
||||
@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { createParser, ParsedEvent, ReconnectInterval } from 'eventsource-parser';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { getOpenAIApi, authChat } from '@/service/utils/chat';
|
||||
import { httpsAgent } from '@/service/utils/tools';
|
||||
import { httpsAgent, openaiChatFilter } from '@/service/utils/tools';
|
||||
import { ChatCompletionRequestMessage, ChatCompletionRequestMessageRoleEnum } from 'openai';
|
||||
import { ChatItemType } from '@/types/chat';
|
||||
import { jsonRes } from '@/service/response';
|
||||
@@ -10,7 +10,6 @@ import type { ModelSchema } from '@/types/mongoSchema';
|
||||
import { PassThrough } from 'stream';
|
||||
import { modelList } from '@/constants/model';
|
||||
import { pushChatBill } from '@/service/events/pushBill';
|
||||
import { openaiChatFilter } from '@/service/utils/tools';
|
||||
|
||||
/* 发送提示词 */
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
|
||||
@@ -87,10 +87,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
temperature: temperature,
|
||||
prompt: promptText,
|
||||
stream: true,
|
||||
max_tokens:
|
||||
model.trainingTimes > 0
|
||||
? modelConstantsData.trainedMaxToken
|
||||
: modelConstantsData.maxToken,
|
||||
max_tokens: modelConstantsData.maxToken,
|
||||
presence_penalty: -0.5, // 越大,越容易出现新内容
|
||||
frequency_penalty: 0.5, // 越大,重复内容越少
|
||||
stop: [`###`, '。!?.!.']
|
||||
|
||||
277
src/pages/api/chat/lafGpt.ts
Normal file
277
src/pages/api/chat/lafGpt.ts
Normal file
@@ -0,0 +1,277 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { createParser, ParsedEvent, ReconnectInterval } from 'eventsource-parser';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { getOpenAIApi, authChat } from '@/service/utils/chat';
|
||||
import { httpsAgent, openaiChatFilter, systemPromptFilter } from '@/service/utils/tools';
|
||||
import { ChatCompletionRequestMessage, ChatCompletionRequestMessageRoleEnum } from 'openai';
|
||||
import { ChatItemType } from '@/types/chat';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import type { ModelSchema } from '@/types/mongoSchema';
|
||||
import { PassThrough } from 'stream';
|
||||
import { modelList } from '@/constants/model';
|
||||
import { pushChatBill } from '@/service/events/pushBill';
|
||||
import { connectRedis } from '@/service/redis';
|
||||
import { VecModelDataPrefix } from '@/constants/redis';
|
||||
import { vectorToBuffer } from '@/utils/tools';
|
||||
import { openaiCreateEmbedding } from '@/service/utils/openai';
|
||||
|
||||
/* 发送提示词 */
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
let step = 0; // step=1时,表示开始了流响应
|
||||
const stream = new PassThrough();
|
||||
stream.on('error', () => {
|
||||
console.log('error: ', 'stream error');
|
||||
stream.destroy();
|
||||
});
|
||||
res.on('close', () => {
|
||||
stream.destroy();
|
||||
});
|
||||
res.on('error', () => {
|
||||
console.log('error: ', 'request error');
|
||||
stream.destroy();
|
||||
});
|
||||
|
||||
try {
|
||||
const { chatId, prompt } = req.body as {
|
||||
prompt: ChatItemType;
|
||||
chatId: string;
|
||||
};
|
||||
|
||||
const { authorization } = req.headers;
|
||||
if (!chatId || !prompt) {
|
||||
throw new Error('缺少参数');
|
||||
}
|
||||
|
||||
await connectToDatabase();
|
||||
const redis = await connectRedis();
|
||||
let startTime = Date.now();
|
||||
|
||||
const { chat, userApiKey, systemKey, userId } = await authChat(chatId, authorization);
|
||||
|
||||
const model: ModelSchema = chat.modelId;
|
||||
const modelConstantsData = modelList.find((item) => item.model === model.service.modelName);
|
||||
if (!modelConstantsData) {
|
||||
throw new Error('模型加载异常');
|
||||
}
|
||||
// 获取 chatAPI
|
||||
const chatAPI = getOpenAIApi(userApiKey || systemKey);
|
||||
|
||||
// 请求一次 chatgpt 拆解需求
|
||||
const promptResponse = await chatAPI.createChatCompletion(
|
||||
{
|
||||
model: model.service.chatModel,
|
||||
temperature: 0,
|
||||
// max_tokens: modelConstantsData.maxToken,
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: `服务端逻辑生成器。根据用户输入的需求,拆解成代码实现的步骤,并按格式返回: 1.\n2.\n3.\n ......
|
||||
|
||||
下面是一些例子:
|
||||
实现一个手机号注册账号的方法,包含两个函数
|
||||
* 发送手机验证码函数:
|
||||
1. 从 query 中获取 phone
|
||||
2. 校验手机号格式是否正确,不正确返回{error: "手机号格式错误"}
|
||||
3. 给 phone 发送一个短信验证码,验证码长度为6位字符串,内容为:你正在注册laf, 验证码为:code
|
||||
4. 数据库添加数据,表为"codes",内容为 {phone, code}
|
||||
* 注册函数
|
||||
1. 从 body 中获取 phone 和 code
|
||||
2. 校验手机号格式是否正确,不正确返回{error: "手机号格式错误"}
|
||||
2. 获取数据库数据,表为"codes",查找是否有符合 phone, code 等于body参数的记录,没有的话返回 {error:"验证码不正确"}
|
||||
4. 添加数据库数据,表为"users" ,内容为{phone, code, createTime}
|
||||
5. 删除数据库数据,删除 code 记录
|
||||
---------------
|
||||
更新博客记录。传入blogId,blogText,tags,还需要记录更新的时间
|
||||
1. 从 body 中获取 blogId,blogText 和 tags
|
||||
2. 校验 blogId 是否为空,为空则返回 {error: "博客ID不能为空"}
|
||||
3. 校验 blogText 是否为空,为空则返回 {error: "博客内容不能为空"}
|
||||
4. 校验 tags 是否为数组,不是则返回 {error: "标签必须为数组"}
|
||||
5. 获取当前时间,记录为 updateTime
|
||||
6. 更新数据库数据,表为"blogs",更新符合 blogId 的记录的内容为{blogText, tags, updateTime}
|
||||
7. 返回结果 {message: "更新博客记录成功"}`
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: prompt.value
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
timeout: 40000,
|
||||
httpsAgent
|
||||
}
|
||||
);
|
||||
|
||||
const promptResolve = promptResponse.data.choices?.[0]?.message?.content || '';
|
||||
if (!promptResolve) {
|
||||
throw new Error('gpt 异常');
|
||||
}
|
||||
|
||||
prompt.value += `\n${promptResolve}`;
|
||||
console.log('prompt resolve success, time:', `${(Date.now() - startTime) / 1000}s`);
|
||||
|
||||
// 获取提示词的向量
|
||||
const { vector: promptVector } = await openaiCreateEmbedding({
|
||||
isPay: !userApiKey,
|
||||
apiKey: userApiKey || systemKey,
|
||||
userId,
|
||||
text: prompt.value
|
||||
});
|
||||
|
||||
// 读取对话内容
|
||||
const prompts = [...chat.content, prompt];
|
||||
|
||||
// 搜索系统提示词, 按相似度从 redis 中搜出相关的 q 和 text
|
||||
const redisData: any[] = await redis.sendCommand([
|
||||
'FT.SEARCH',
|
||||
`idx:${VecModelDataPrefix}:hash`,
|
||||
`@modelId:{${String(
|
||||
chat.modelId._id
|
||||
)}} @vector:[VECTOR_RANGE 0.25 $blob]=>{$YIELD_DISTANCE_AS: score}`,
|
||||
// `@modelId:{${String(chat.modelId._id)}}=>[KNN 10 @vector $blob AS score]`,
|
||||
'RETURN',
|
||||
'1',
|
||||
'text',
|
||||
'SORTBY',
|
||||
'score',
|
||||
'PARAMS',
|
||||
'2',
|
||||
'blob',
|
||||
vectorToBuffer(promptVector),
|
||||
'LIMIT',
|
||||
'0',
|
||||
'20',
|
||||
'DIALECT',
|
||||
'2'
|
||||
]);
|
||||
|
||||
// 格式化响应值,获取 qa
|
||||
const formatRedisPrompt = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
|
||||
.map((i) => {
|
||||
if (!redisData[i]) return '';
|
||||
const text = (redisData[i][1] as string) || '';
|
||||
|
||||
if (!text) return '';
|
||||
|
||||
return text;
|
||||
})
|
||||
.filter((item) => item);
|
||||
|
||||
if (formatRedisPrompt.length === 0) {
|
||||
throw new Error('对不起,我没有找到你的问题');
|
||||
}
|
||||
|
||||
// textArr 筛选,最多 3000 tokens
|
||||
const systemPrompt = systemPromptFilter(formatRedisPrompt, 3400);
|
||||
|
||||
prompts.unshift({
|
||||
obj: 'SYSTEM',
|
||||
value: `${model.systemPrompt} 知识库内容是最新的,知识库内容为: "${systemPrompt}"`
|
||||
});
|
||||
|
||||
// 控制在 tokens 数量,防止超出
|
||||
const filterPrompts = openaiChatFilter(prompts, modelConstantsData.contextMaxToken);
|
||||
|
||||
// 格式化文本内容成 chatgpt 格式
|
||||
const map = {
|
||||
Human: ChatCompletionRequestMessageRoleEnum.User,
|
||||
AI: ChatCompletionRequestMessageRoleEnum.Assistant,
|
||||
SYSTEM: ChatCompletionRequestMessageRoleEnum.System
|
||||
};
|
||||
const formatPrompts: ChatCompletionRequestMessage[] = filterPrompts.map(
|
||||
(item: ChatItemType) => ({
|
||||
role: map[item.obj],
|
||||
content: item.value
|
||||
})
|
||||
);
|
||||
console.log(formatPrompts);
|
||||
// 计算温度
|
||||
const temperature = modelConstantsData.maxTemperature * (model.temperature / 10);
|
||||
|
||||
// 发出请求
|
||||
const chatResponse = await chatAPI.createChatCompletion(
|
||||
{
|
||||
model: model.service.chatModel,
|
||||
temperature: temperature,
|
||||
// max_tokens: modelConstantsData.maxToken,
|
||||
messages: formatPrompts,
|
||||
frequency_penalty: 0.5, // 越大,重复内容越少
|
||||
presence_penalty: -0.5, // 越大,越容易出现新内容
|
||||
stream: true
|
||||
},
|
||||
{
|
||||
timeout: 40000,
|
||||
responseType: 'stream',
|
||||
httpsAgent
|
||||
}
|
||||
);
|
||||
|
||||
console.log('api response time:', `${(Date.now() - startTime) / 1000}s`);
|
||||
|
||||
// 创建响应流
|
||||
res.setHeader('Content-Type', 'text/event-stream;charset-utf-8');
|
||||
res.setHeader('Access-Control-Allow-Origin', '*');
|
||||
res.setHeader('X-Accel-Buffering', 'no');
|
||||
res.setHeader('Cache-Control', 'no-cache, no-transform');
|
||||
step = 1;
|
||||
|
||||
let responseContent = '';
|
||||
stream.pipe(res);
|
||||
|
||||
const onParse = async (event: ParsedEvent | ReconnectInterval) => {
|
||||
if (event.type !== 'event') return;
|
||||
const data = event.data;
|
||||
if (data === '[DONE]') return;
|
||||
try {
|
||||
const json = JSON.parse(data);
|
||||
const content: string = json?.choices?.[0].delta.content || '';
|
||||
if (!content || (responseContent === '' && content === '\n')) return;
|
||||
|
||||
responseContent += content;
|
||||
// console.log('content:', content)
|
||||
!stream.destroyed && stream.push(content.replace(/\n/g, '<br/>'));
|
||||
} catch (error) {
|
||||
error;
|
||||
}
|
||||
};
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
try {
|
||||
for await (const chunk of chatResponse.data as any) {
|
||||
if (stream.destroyed) {
|
||||
// 流被中断了,直接忽略后面的内容
|
||||
break;
|
||||
}
|
||||
const parser = createParser(onParse);
|
||||
parser.feed(decoder.decode(chunk));
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('pipe error', error);
|
||||
}
|
||||
// close stream
|
||||
!stream.destroyed && stream.push(null);
|
||||
stream.destroy();
|
||||
|
||||
const promptsContent = formatPrompts.map((item) => item.content).join('');
|
||||
// 只有使用平台的 key 才计费
|
||||
pushChatBill({
|
||||
isPay: !userApiKey,
|
||||
modelName: model.service.modelName,
|
||||
userId,
|
||||
chatId,
|
||||
text: promptsContent + responseContent
|
||||
});
|
||||
} catch (err: any) {
|
||||
if (step === 1) {
|
||||
// 直接结束流
|
||||
console.log('error,结束');
|
||||
stream.destroy();
|
||||
} else {
|
||||
res.status(500);
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error: err
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -13,6 +13,7 @@ import { pushChatBill } from '@/service/events/pushBill';
|
||||
import { connectRedis } from '@/service/redis';
|
||||
import { VecModelDataPrefix } from '@/constants/redis';
|
||||
import { vectorToBuffer } from '@/utils/tools';
|
||||
import { openaiCreateEmbedding } from '@/service/utils/openai';
|
||||
|
||||
/* 发送提示词 */
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
@@ -56,22 +57,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
// 读取对话内容
|
||||
const prompts = [...chat.content, prompt];
|
||||
|
||||
// 获取 chatAPI
|
||||
const chatAPI = getOpenAIApi(userApiKey || systemKey);
|
||||
|
||||
// 把输入的内容转成向量
|
||||
const promptVector = await chatAPI
|
||||
.createEmbedding(
|
||||
{
|
||||
model: 'text-embedding-ada-002',
|
||||
input: prompt.value
|
||||
},
|
||||
{
|
||||
timeout: 120000,
|
||||
httpsAgent
|
||||
}
|
||||
)
|
||||
.then((res) => res?.data?.data?.[0]?.embedding || []);
|
||||
// 获取提示词的向量
|
||||
const { vector: promptVector, chatAPI } = await openaiCreateEmbedding({
|
||||
isPay: !userApiKey,
|
||||
apiKey: userApiKey || systemKey,
|
||||
userId,
|
||||
text: prompt.value
|
||||
});
|
||||
|
||||
// 搜索系统提示词, 按相似度从 redis 中搜出相关的 q 和 text
|
||||
const redisData: any[] = await redis.sendCommand([
|
||||
@@ -79,7 +71,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
`idx:${VecModelDataPrefix}:hash`,
|
||||
`@modelId:{${String(
|
||||
chat.modelId._id
|
||||
)}} @vector:[VECTOR_RANGE 0.15 $blob]=>{$YIELD_DISTANCE_AS: score}`,
|
||||
)}} @vector:[VECTOR_RANGE 0.22 $blob]=>{$YIELD_DISTANCE_AS: score}`,
|
||||
// `@modelId:{${String(chat.modelId._id)}}=>[KNN 10 @vector $blob AS score]`,
|
||||
'RETURN',
|
||||
'1',
|
||||
|
||||
@@ -4,7 +4,6 @@ import { connectToDatabase } from '@/service/mongo';
|
||||
import { authToken } from '@/service/utils/tools';
|
||||
import { connectRedis } from '@/service/redis';
|
||||
import { VecModelDataIdx } from '@/constants/redis';
|
||||
import { BufferToVector } from '@/utils/tools';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
|
||||
36
src/pages/api/model/data/fetchingUrlData.ts
Normal file
36
src/pages/api/model/data/fetchingUrlData.ts
Normal file
@@ -0,0 +1,36 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authToken } from '@/service/utils/tools';
|
||||
import axios from 'axios';
|
||||
import { httpsAgent } from '@/service/utils/tools';
|
||||
|
||||
/**
|
||||
* 读取网站的内容
|
||||
*/
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
const { url } = req.body as { url: string };
|
||||
if (!url) {
|
||||
throw new Error('缺少 url');
|
||||
}
|
||||
await connectToDatabase();
|
||||
|
||||
const { authorization } = req.headers;
|
||||
|
||||
await authToken(authorization);
|
||||
|
||||
const data = await axios
|
||||
.get(url, {
|
||||
httpsAgent
|
||||
})
|
||||
.then((res) => res.data as string);
|
||||
|
||||
jsonRes(res, { data });
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error: err
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -24,7 +24,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data
|
||||
data: data.map((item) => item.textList).flat().length
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
|
||||
@@ -58,7 +58,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
})
|
||||
);
|
||||
|
||||
generateVector(true);
|
||||
generateVector();
|
||||
|
||||
jsonRes(res, {
|
||||
data: insertRes.filter((item) => item.status === 'rejected').length
|
||||
|
||||
@@ -66,7 +66,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
})
|
||||
);
|
||||
|
||||
generateVector(true);
|
||||
generateVector();
|
||||
|
||||
jsonRes(res, {
|
||||
data: insertRedisRes.filter((item) => item.status === 'rejected').length
|
||||
|
||||
@@ -2,13 +2,12 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { authToken } from '@/service/utils/tools';
|
||||
import { connectRedis } from '@/service/redis';
|
||||
import { ModelDataStatusEnum } from '@/constants/redis';
|
||||
import { generateVector } from '@/service/events/generateVector';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
let { dataId, text } = req.body as {
|
||||
dataId: string;
|
||||
text: string;
|
||||
};
|
||||
const { dataId, text, q } = req.body as { dataId: string; text: string; q?: string };
|
||||
const { authorization } = req.headers;
|
||||
|
||||
if (!authorization) {
|
||||
@@ -31,7 +30,17 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
}
|
||||
|
||||
// 更新
|
||||
await redis.hSet(dataId, 'text', text);
|
||||
await redis.sendCommand([
|
||||
'HMSET',
|
||||
dataId,
|
||||
...(q ? ['q', q, 'status', ModelDataStatusEnum.waiting] : []),
|
||||
'text',
|
||||
text
|
||||
]);
|
||||
|
||||
if (q) {
|
||||
generateVector();
|
||||
}
|
||||
|
||||
jsonRes(res);
|
||||
} catch (err) {
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { Chat, Model, Training, connectToDatabase } from '@/service/mongo';
|
||||
import { authToken, getUserApiOpenai } from '@/service/utils/tools';
|
||||
import { authToken } from '@/service/utils/tools';
|
||||
import { getUserApiOpenai } from '@/service/utils/openai';
|
||||
import { TrainingStatusEnum } from '@/constants/model';
|
||||
import { TrainingItemType } from '@/types/training';
|
||||
import { httpsAgent } from '@/service/utils/tools';
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { connectToDatabase, Model, Training } from '@/service/mongo';
|
||||
import { getOpenAIApi } from '@/service/utils/chat';
|
||||
import { authToken, getUserApiOpenai } from '@/service/utils/tools';
|
||||
import { authToken } from '@/service/utils/tools';
|
||||
import { getUserApiOpenai } from '@/service/utils/openai';
|
||||
import type { ModelSchema } from '@/types/mongoSchema';
|
||||
import { TrainingItemType } from '@/types/training';
|
||||
import { ModelStatusEnum, TrainingStatusEnum } from '@/constants/model';
|
||||
|
||||
@@ -3,7 +3,8 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { connectToDatabase, Model, Training } from '@/service/mongo';
|
||||
import formidable from 'formidable';
|
||||
import { authToken, getUserApiOpenai } from '@/service/utils/tools';
|
||||
import { authToken } from '@/service/utils/tools';
|
||||
import { getUserApiOpenai } from '@/service/utils/openai';
|
||||
import { join } from 'path';
|
||||
import fs from 'fs';
|
||||
import type { ModelSchema } from '@/types/mongoSchema';
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// Next.js API route support: https://nextjs.org/docs/api-routes/introduction
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { connectToDatabase, DataItem, Data } from '@/service/mongo';
|
||||
import { connectToDatabase, SplitData } from '@/service/mongo';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
@@ -10,20 +10,18 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
}
|
||||
await connectToDatabase();
|
||||
|
||||
// await DataItem.updateMany(
|
||||
// {},
|
||||
// {
|
||||
// type: 'QA'
|
||||
// // times: 2
|
||||
// }
|
||||
// );
|
||||
const data = await SplitData.aggregate([
|
||||
{ $match: { textList: { $exists: true, $ne: [] } } },
|
||||
{ $sample: { size: 1 } }
|
||||
]);
|
||||
|
||||
await Data.updateMany(
|
||||
{},
|
||||
{
|
||||
type: 'QA'
|
||||
}
|
||||
);
|
||||
const dataItem: any = data[0];
|
||||
const textList: string[] = dataItem.textList.slice(-5);
|
||||
console.log(textList);
|
||||
console.log(dataItem.textList.slice(0, -5));
|
||||
await SplitData.findByIdAndUpdate(dataItem._id, {
|
||||
textList: dataItem.textList.slice(0, -5)
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: {}
|
||||
@@ -4,8 +4,7 @@ import { jsonRes } from '@/service/response';
|
||||
import { connectToDatabase, Training, Model } from '@/service/mongo';
|
||||
import type { TrainingItemType } from '@/types/training';
|
||||
import { TrainingStatusEnum, ModelStatusEnum } from '@/constants/model';
|
||||
import { getOpenAIApi } from '@/service/utils/chat';
|
||||
import { getUserApiOpenai } from '@/service/utils/tools';
|
||||
import { getUserApiOpenai } from '@/service/utils/openai';
|
||||
import { OpenAiTuneStatusEnum } from '@/service/constants/training';
|
||||
import { sendTrainSucceed } from '@/service/utils/sendEmail';
|
||||
import { httpsAgent } from '@/service/utils/tools';
|
||||
|
||||
@@ -120,6 +120,7 @@ const Chat = ({ chatId }: { chatId: string }) => {
|
||||
const urlMap: Record<string, string> = {
|
||||
[ChatModelNameEnum.GPT35]: '/api/chat/chatGpt',
|
||||
[ChatModelNameEnum.VECTOR_GPT]: '/api/chat/vectorGpt',
|
||||
// [ChatModelNameEnum.VECTOR_GPT]: '/api/chat/lafGpt',
|
||||
[ChatModelNameEnum.GPT3]: '/api/chat/gpt3'
|
||||
};
|
||||
|
||||
@@ -191,14 +192,22 @@ const Chat = ({ chatId }: { chatId: string }) => {
|
||||
* 发送一个内容
|
||||
*/
|
||||
const sendPrompt = useCallback(async () => {
|
||||
if (isChatting) {
|
||||
toast({
|
||||
title: '正在聊天中...请等待结束',
|
||||
status: 'warning'
|
||||
});
|
||||
return;
|
||||
}
|
||||
const storeInput = inputVal;
|
||||
// 去除空行
|
||||
const val = inputVal
|
||||
.trim()
|
||||
.split('\n')
|
||||
.filter((val) => val)
|
||||
.join('\n');
|
||||
if (!chatData?.modelId || !val || !ChatBox.current || isChatting) {
|
||||
const val = inputVal.trim().replace(/\n\s*/g, '\n');
|
||||
|
||||
if (!chatData?.modelId || !val) {
|
||||
toast({
|
||||
title: '内容为空',
|
||||
status: 'warning'
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -452,9 +461,8 @@ const Chat = ({ chatId }: { chatId: string }) => {
|
||||
</Box>
|
||||
{/* 发送区 */}
|
||||
<Box m={media('20px auto', '0 auto')} w={'100%'} maxW={media('min(750px, 100%)', 'auto')}>
|
||||
<Flex
|
||||
alignItems={'flex-end'}
|
||||
py={5}
|
||||
<Box
|
||||
py={'18px'}
|
||||
position={'relative'}
|
||||
boxShadow={`0 0 15px rgba(0,0,0,0.1)`}
|
||||
border={media('1px solid', '0')}
|
||||
@@ -465,10 +473,8 @@ const Chat = ({ chatId }: { chatId: string }) => {
|
||||
{/* 输入框 */}
|
||||
<Textarea
|
||||
ref={TextareaDom}
|
||||
flex={1}
|
||||
w={0}
|
||||
py={0}
|
||||
pr={0}
|
||||
pr={['45px', '55px']}
|
||||
border={'none'}
|
||||
_focusVisible={{
|
||||
border: 'none'
|
||||
@@ -482,6 +488,8 @@ const Chat = ({ chatId }: { chatId: string }) => {
|
||||
maxHeight={'150px'}
|
||||
maxLength={-1}
|
||||
overflowY={'auto'}
|
||||
whiteSpace={'pre-wrap'}
|
||||
wordBreak={'break-all'}
|
||||
color={useColorModeValue('blackAlpha.700', 'white')}
|
||||
onChange={(e) => {
|
||||
const textarea = e.target;
|
||||
@@ -501,27 +509,34 @@ const Chat = ({ chatId }: { chatId: string }) => {
|
||||
}}
|
||||
/>
|
||||
{/* 发送和等待按键 */}
|
||||
<Box px={4} onClick={sendPrompt}>
|
||||
<Flex
|
||||
alignItems={'center'}
|
||||
justifyContent={'center'}
|
||||
h={'30px'}
|
||||
w={'30px'}
|
||||
position={'absolute'}
|
||||
right={['12px', '20px']}
|
||||
bottom={'15px'}
|
||||
onClick={sendPrompt}
|
||||
>
|
||||
{isChatting ? (
|
||||
<Image
|
||||
style={{ transform: 'translateY(4px)' }}
|
||||
src={'/icon/chatting.svg'}
|
||||
width={30}
|
||||
height={30}
|
||||
fill
|
||||
alt={''}
|
||||
/>
|
||||
) : (
|
||||
<Box cursor={'pointer'}>
|
||||
<Icon
|
||||
name={'chatSend'}
|
||||
width={'20px'}
|
||||
height={'20px'}
|
||||
fill={useColorModeValue('#718096', 'white')}
|
||||
></Icon>
|
||||
</Box>
|
||||
<Icon
|
||||
name={'chatSend'}
|
||||
width={['18px', '20px']}
|
||||
height={['18px', '20px']}
|
||||
cursor={'pointer'}
|
||||
fill={useColorModeValue('#718096', 'white')}
|
||||
></Icon>
|
||||
)}
|
||||
</Box>
|
||||
</Flex>
|
||||
</Flex>
|
||||
</Box>
|
||||
</Box>
|
||||
</Flex>
|
||||
</Flex>
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import React, { useState, useCallback } from 'react';
|
||||
import {
|
||||
Box,
|
||||
IconButton,
|
||||
Flex,
|
||||
Button,
|
||||
Modal,
|
||||
@@ -9,37 +8,40 @@ import {
|
||||
ModalContent,
|
||||
ModalHeader,
|
||||
ModalCloseButton,
|
||||
Input,
|
||||
Textarea
|
||||
} from '@chakra-ui/react';
|
||||
import { useForm, useFieldArray } from 'react-hook-form';
|
||||
import { postModelDataInput } from '@/api/model';
|
||||
import { useForm } from 'react-hook-form';
|
||||
import { postModelDataInput, putModelDataById } from '@/api/model';
|
||||
import { useToast } from '@/hooks/useToast';
|
||||
import { DeleteIcon } from '@chakra-ui/icons';
|
||||
import { customAlphabet } from 'nanoid';
|
||||
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
|
||||
|
||||
type FormData = { text: string; q: string };
|
||||
export type FormData = { dataId?: string; text: string; q: string };
|
||||
|
||||
const InputDataModal = ({
|
||||
onClose,
|
||||
onSuccess,
|
||||
modelId
|
||||
modelId,
|
||||
defaultValues = {
|
||||
text: '',
|
||||
q: ''
|
||||
}
|
||||
}: {
|
||||
onClose: () => void;
|
||||
onSuccess: () => void;
|
||||
modelId: string;
|
||||
defaultValues?: FormData;
|
||||
}) => {
|
||||
const [importing, setImporting] = useState(false);
|
||||
const { toast } = useToast();
|
||||
|
||||
const { register, handleSubmit, control } = useForm<FormData>({
|
||||
defaultValues: {
|
||||
text: '',
|
||||
q: ''
|
||||
}
|
||||
const { register, handleSubmit } = useForm<FormData>({
|
||||
defaultValues
|
||||
});
|
||||
|
||||
/**
|
||||
* 确认导入新数据
|
||||
*/
|
||||
const sureImportData = useCallback(
|
||||
async (e: FormData) => {
|
||||
setImporting(true);
|
||||
@@ -72,6 +74,26 @@ const InputDataModal = ({
|
||||
[modelId, onClose, onSuccess, toast]
|
||||
);
|
||||
|
||||
const updateData = useCallback(
|
||||
async (e: FormData) => {
|
||||
if (!e.dataId) return;
|
||||
if (e.text === defaultValues.text && e.q === defaultValues.q) return;
|
||||
|
||||
await putModelDataById({
|
||||
dataId: e.dataId,
|
||||
text: e.text,
|
||||
q: e.q === defaultValues.q ? '' : e.q
|
||||
});
|
||||
toast({
|
||||
title: '修改回答成功',
|
||||
status: 'success'
|
||||
});
|
||||
onClose();
|
||||
onSuccess();
|
||||
},
|
||||
[defaultValues.q, onClose, onSuccess, toast]
|
||||
);
|
||||
|
||||
return (
|
||||
<Modal isOpen={true} onClose={onClose} isCentered>
|
||||
<ModalOverlay />
|
||||
@@ -125,7 +147,10 @@ const InputDataModal = ({
|
||||
<Button variant={'outline'} mr={3} onClick={onClose}>
|
||||
取消
|
||||
</Button>
|
||||
<Button isLoading={importing} onClick={handleSubmit(sureImportData)}>
|
||||
<Button
|
||||
isLoading={importing}
|
||||
onClick={handleSubmit(defaultValues.dataId ? updateData : sureImportData)}
|
||||
>
|
||||
确认导入
|
||||
</Button>
|
||||
</Flex>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import React, { useCallback } from 'react';
|
||||
import React, { useCallback, useState } from 'react';
|
||||
import {
|
||||
Box,
|
||||
TableContainer,
|
||||
@@ -12,7 +12,6 @@ import {
|
||||
Flex,
|
||||
Button,
|
||||
useDisclosure,
|
||||
Textarea,
|
||||
Menu,
|
||||
MenuButton,
|
||||
MenuList,
|
||||
@@ -25,22 +24,22 @@ import { usePagination } from '@/hooks/usePagination';
|
||||
import {
|
||||
getModelDataList,
|
||||
delOneModelData,
|
||||
putModelDataById,
|
||||
getModelSplitDataList,
|
||||
getModelSplitDataListLen,
|
||||
getExportDataList
|
||||
} from '@/api/model';
|
||||
import { DeleteIcon, RepeatIcon } from '@chakra-ui/icons';
|
||||
import { DeleteIcon, RepeatIcon, EditIcon } from '@chakra-ui/icons';
|
||||
import { useToast } from '@/hooks/useToast';
|
||||
import { useLoading } from '@/hooks/useLoading';
|
||||
import dynamic from 'next/dynamic';
|
||||
import { useMutation, useQuery } from '@tanstack/react-query';
|
||||
import type { FormData as InputDataType } from './InputDataModal';
|
||||
|
||||
const InputModel = dynamic(() => import('./InputDataModal'));
|
||||
const SelectFileModel = dynamic(() => import('./SelectFileModal'));
|
||||
const SelectUrlModel = dynamic(() => import('./SelectUrlModal'));
|
||||
const SelectJsonModel = dynamic(() => import('./SelectJsonModal'));
|
||||
|
||||
const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
const { toast } = useToast();
|
||||
const { Loading } = useLoading();
|
||||
|
||||
const {
|
||||
@@ -58,38 +57,26 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
}
|
||||
});
|
||||
|
||||
const updateAnswer = useCallback(
|
||||
async (dataId: string, text: string) => {
|
||||
await putModelDataById({
|
||||
dataId,
|
||||
text
|
||||
});
|
||||
toast({
|
||||
title: '修改回答成功',
|
||||
status: 'success'
|
||||
});
|
||||
},
|
||||
[toast]
|
||||
);
|
||||
const [editInputData, setEditInputData] = useState<InputDataType>();
|
||||
|
||||
const {
|
||||
isOpen: isOpenInputModal,
|
||||
onOpen: onOpenInputModal,
|
||||
onClose: onCloseInputModal
|
||||
} = useDisclosure();
|
||||
const {
|
||||
isOpen: isOpenSelectFileModal,
|
||||
onOpen: onOpenSelectFileModal,
|
||||
onClose: onCloseSelectFileModal
|
||||
} = useDisclosure();
|
||||
const {
|
||||
isOpen: isOpenSelectUrlModal,
|
||||
onOpen: onOpenSelectUrlModal,
|
||||
onClose: onCloseSelectUrlModal
|
||||
} = useDisclosure();
|
||||
const {
|
||||
isOpen: isOpenSelectJsonModal,
|
||||
onOpen: onOpenSelectJsonModal,
|
||||
onClose: onCloseSelectJsonModal
|
||||
} = useDisclosure();
|
||||
|
||||
const { data: splitDataList, refetch } = useQuery(['getModelSplitDataList'], () =>
|
||||
getModelSplitDataList(model._id)
|
||||
const { data: splitDataLen, refetch } = useQuery(['getModelSplitDataList'], () =>
|
||||
getModelSplitDataListLen(model._id)
|
||||
);
|
||||
|
||||
const refetchData = useCallback(
|
||||
@@ -151,16 +138,24 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
导入
|
||||
</MenuButton>
|
||||
<MenuList>
|
||||
<MenuItem onClick={onOpenInputModal}>手动输入</MenuItem>
|
||||
<MenuItem
|
||||
onClick={() =>
|
||||
setEditInputData({
|
||||
text: '',
|
||||
q: ''
|
||||
})
|
||||
}
|
||||
>
|
||||
手动输入
|
||||
</MenuItem>
|
||||
<MenuItem onClick={onOpenSelectFileModal}>文件导入</MenuItem>
|
||||
<MenuItem onClick={onOpenSelectUrlModal}>网站地址导入</MenuItem>
|
||||
<MenuItem onClick={onOpenSelectJsonModal}>JSON导入</MenuItem>
|
||||
</MenuList>
|
||||
</Menu>
|
||||
</Flex>
|
||||
{splitDataList && splitDataList.length > 0 && (
|
||||
<Box fontSize={'xs'}>
|
||||
{splitDataList.map((item) => item.textList).flat().length}条数据正在拆分...
|
||||
</Box>
|
||||
{!!(splitDataLen && splitDataLen > 0) && (
|
||||
<Box fontSize={'xs'}>{splitDataLen}条数据正在拆分...</Box>
|
||||
)}
|
||||
<Box mt={4}>
|
||||
<TableContainer minH={'500px'}>
|
||||
@@ -170,34 +165,44 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
<Th>Question</Th>
|
||||
<Th>Text</Th>
|
||||
<Th>Status</Th>
|
||||
<Th></Th>
|
||||
<Th>操作</Th>
|
||||
</Tr>
|
||||
</Thead>
|
||||
<Tbody>
|
||||
{modelDataList.map((item) => (
|
||||
<Tr key={item.id}>
|
||||
<Td w={'350px'}>
|
||||
<Box fontSize={'xs'} w={'100%'} whiteSpace={'pre-wrap'} _notLast={{ mb: 1 }}>
|
||||
<Td minW={'200px'}>
|
||||
<Box fontSize={'xs'} whiteSpace={'pre-wrap'}>
|
||||
{item.q}
|
||||
</Box>
|
||||
</Td>
|
||||
<Td minW={'200px'}>
|
||||
<Textarea
|
||||
<Box
|
||||
w={'100%'}
|
||||
h={'100%'}
|
||||
defaultValue={item.text}
|
||||
fontSize={'xs'}
|
||||
resize={'both'}
|
||||
onBlur={(e) => {
|
||||
const oldVal = modelDataList.find((data) => item.id === data.id)?.text;
|
||||
if (oldVal !== e.target.value) {
|
||||
updateAnswer(item.id, e.target.value);
|
||||
}
|
||||
}}
|
||||
></Textarea>
|
||||
whiteSpace={'pre-wrap'}
|
||||
maxH={'250px'}
|
||||
overflowY={'auto'}
|
||||
>
|
||||
{item.text}
|
||||
</Box>
|
||||
</Td>
|
||||
<Td w={'100px'}>{ModelDataStatusMap[item.status]}</Td>
|
||||
<Td>{ModelDataStatusMap[item.status]}</Td>
|
||||
<Td>
|
||||
<IconButton
|
||||
mr={5}
|
||||
icon={<EditIcon />}
|
||||
variant={'outline'}
|
||||
aria-label={'delete'}
|
||||
size={'sm'}
|
||||
onClick={() =>
|
||||
setEditInputData({
|
||||
dataId: item.id,
|
||||
q: item.q,
|
||||
text: item.text
|
||||
})
|
||||
}
|
||||
/>
|
||||
<IconButton
|
||||
icon={<DeleteIcon />}
|
||||
variant={'outline'}
|
||||
@@ -221,8 +226,13 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
</Box>
|
||||
|
||||
<Loading loading={isLoading} fixed={false} />
|
||||
{isOpenInputModal && (
|
||||
<InputModel modelId={model._id} onClose={onCloseInputModal} onSuccess={refetchData} />
|
||||
{editInputData !== undefined && (
|
||||
<InputModel
|
||||
modelId={model._id}
|
||||
defaultValues={editInputData}
|
||||
onClose={() => setEditInputData(undefined)}
|
||||
onSuccess={refetchData}
|
||||
/>
|
||||
)}
|
||||
{isOpenSelectFileModal && (
|
||||
<SelectFileModel
|
||||
@@ -231,6 +241,13 @@ const ModelDataCard = ({ model }: { model: ModelSchema }) => {
|
||||
onSuccess={refetchData}
|
||||
/>
|
||||
)}
|
||||
{isOpenSelectUrlModal && (
|
||||
<SelectUrlModel
|
||||
modelId={model._id}
|
||||
onClose={onCloseSelectUrlModal}
|
||||
onSuccess={refetchData}
|
||||
/>
|
||||
)}
|
||||
{isOpenSelectJsonModal && (
|
||||
<SelectJsonModel
|
||||
modelId={model._id}
|
||||
|
||||
@@ -9,7 +9,8 @@ import {
|
||||
ModalHeader,
|
||||
ModalCloseButton,
|
||||
ModalBody,
|
||||
Input
|
||||
Input,
|
||||
Textarea
|
||||
} from '@chakra-ui/react';
|
||||
import { useToast } from '@/hooks/useToast';
|
||||
import { useSelectFile } from '@/hooks/useSelectFile';
|
||||
@@ -18,7 +19,8 @@ import { encode } from 'gpt-token-utils';
|
||||
import { useConfirm } from '@/hooks/useConfirm';
|
||||
import { readTxtContent, readPdfContent, readDocContent } from '@/utils/tools';
|
||||
import { useMutation } from '@tanstack/react-query';
|
||||
import { postModelDataFileText } from '@/api/model';
|
||||
import { postModelDataSplitData } from '@/api/model';
|
||||
import { formatPrice } from '@/utils/user';
|
||||
|
||||
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
|
||||
|
||||
@@ -66,10 +68,9 @@ const SelectFileModal = ({
|
||||
})
|
||||
)
|
||||
)
|
||||
.join('\n')
|
||||
.replace(/\n+/g, '\n');
|
||||
.join(' ')
|
||||
.replace(/(\\n|\n)+/g, '\n');
|
||||
setFileText(fileTexts);
|
||||
console.log(encode(fileTexts));
|
||||
} catch (error: any) {
|
||||
console.log(error);
|
||||
toast({
|
||||
@@ -85,7 +86,7 @@ const SelectFileModal = ({
|
||||
const { mutate, isLoading } = useMutation({
|
||||
mutationFn: async () => {
|
||||
if (!fileText) return;
|
||||
await postModelDataFileText({
|
||||
await postModelDataSplitData({
|
||||
modelId,
|
||||
text: fileText,
|
||||
prompt: `下面是${prompt || '一段长文本'}`
|
||||
@@ -126,10 +127,11 @@ const SelectFileModal = ({
|
||||
</Button>
|
||||
<Box mt={2} maxW={['100%', '70%']}>
|
||||
支持 {fileExtension} 文件。模型会自动对文本进行 QA 拆分,需要较长训练时间,拆分需要消耗
|
||||
tokens,大约0.04元/1k tokens,请确保账号余额充足。
|
||||
tokens,账号余额不足时,未拆分的数据会被删除。
|
||||
</Box>
|
||||
<Box mt={2}>
|
||||
一共 {fileText.length} 个字,{encode(fileText).length} 个tokens
|
||||
一共 {encode(fileText).length} 个tokens,大约 {formatPrice(encode(fileText).length * 4)}
|
||||
元
|
||||
</Box>
|
||||
<Flex w={'100%'} alignItems={'center'} my={4}>
|
||||
<Box flex={'0 0 auto'} mr={2}>
|
||||
@@ -142,18 +144,18 @@ const SelectFileModal = ({
|
||||
size={'sm'}
|
||||
/>
|
||||
</Flex>
|
||||
<Box
|
||||
<Textarea
|
||||
flex={'1 0 0'}
|
||||
h={0}
|
||||
w={'100%'}
|
||||
overflowY={'auto'}
|
||||
p={2}
|
||||
backgroundColor={'blackAlpha.50'}
|
||||
whiteSpace={'pre-wrap'}
|
||||
placeholder="文件内容"
|
||||
maxLength={-1}
|
||||
resize={'none'}
|
||||
fontSize={'xs'}
|
||||
>
|
||||
{fileText}
|
||||
</Box>
|
||||
whiteSpace={'pre-wrap'}
|
||||
value={fileText}
|
||||
onChange={(e) => setFileText(e.target.value)}
|
||||
/>
|
||||
</ModalBody>
|
||||
|
||||
<Flex px={6} pt={2} pb={4}>
|
||||
|
||||
168
src/pages/model/detail/components/SelectUrlModal.tsx
Normal file
168
src/pages/model/detail/components/SelectUrlModal.tsx
Normal file
@@ -0,0 +1,168 @@
|
||||
import React, { useState } from 'react';
|
||||
import {
|
||||
Box,
|
||||
Flex,
|
||||
Button,
|
||||
Modal,
|
||||
ModalOverlay,
|
||||
ModalContent,
|
||||
ModalHeader,
|
||||
ModalCloseButton,
|
||||
ModalBody,
|
||||
Input,
|
||||
Textarea
|
||||
} from '@chakra-ui/react';
|
||||
import { useToast } from '@/hooks/useToast';
|
||||
import { customAlphabet } from 'nanoid';
|
||||
import { encode } from 'gpt-token-utils';
|
||||
import { useConfirm } from '@/hooks/useConfirm';
|
||||
import { useMutation } from '@tanstack/react-query';
|
||||
import { postModelDataSplitData, getWebContent } from '@/api/model';
|
||||
import { formatPrice } from '@/utils/user';
|
||||
|
||||
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
|
||||
|
||||
const SelectUrlModal = ({
|
||||
onClose,
|
||||
onSuccess,
|
||||
modelId
|
||||
}: {
|
||||
onClose: () => void;
|
||||
onSuccess: () => void;
|
||||
modelId: string;
|
||||
}) => {
|
||||
const { toast } = useToast();
|
||||
const [webUrl, setWebUrl] = useState('');
|
||||
const [webText, setWebText] = useState('');
|
||||
const [prompt, setPrompt] = useState(''); // 提示词
|
||||
const { openConfirm, ConfirmChild } = useConfirm({
|
||||
content: '确认导入该文件,需要一定时间进行拆解,该任务无法终止!如果余额不足,任务讲被终止。'
|
||||
});
|
||||
|
||||
const { mutate: onclickImport, isLoading: isImporting } = useMutation({
|
||||
mutationFn: async () => {
|
||||
if (!webText) return;
|
||||
await postModelDataSplitData({
|
||||
modelId,
|
||||
text: webText,
|
||||
prompt: `下面是${prompt || '一段长文本'}`
|
||||
});
|
||||
toast({
|
||||
title: '导入数据成功,需要一段拆解和训练',
|
||||
status: 'success'
|
||||
});
|
||||
onClose();
|
||||
onSuccess();
|
||||
},
|
||||
onError(error) {
|
||||
console.log(error);
|
||||
toast({
|
||||
title: '导入数据失败',
|
||||
status: 'error'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
const { mutate: onclickFetchingUrl, isLoading: isFetching } = useMutation({
|
||||
mutationFn: async () => {
|
||||
if (!webUrl) return;
|
||||
const res = await getWebContent(webUrl);
|
||||
const parser = new DOMParser();
|
||||
const htmlDoc = parser.parseFromString(res, 'text/html');
|
||||
const data = htmlDoc?.body?.innerText || '';
|
||||
|
||||
if (!data) {
|
||||
throw new Error('获取不到数据');
|
||||
}
|
||||
setWebText(data.replace(/\s+/g, ' '));
|
||||
},
|
||||
onError(error) {
|
||||
console.log(error);
|
||||
toast({
|
||||
status: 'error',
|
||||
title: '获取网站内容失败'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return (
|
||||
<Modal isOpen={true} onClose={onClose} isCentered>
|
||||
<ModalOverlay />
|
||||
<ModalContent maxW={'min(900px, 90vw)'} m={0} position={'relative'} h={'90vh'}>
|
||||
<ModalHeader>网站地址导入</ModalHeader>
|
||||
<ModalCloseButton />
|
||||
|
||||
<ModalBody
|
||||
display={'flex'}
|
||||
flexDirection={'column'}
|
||||
p={4}
|
||||
h={'100%'}
|
||||
alignItems={'center'}
|
||||
justifyContent={'center'}
|
||||
fontSize={'sm'}
|
||||
>
|
||||
<Box mt={2} maxW={['100%', '70%']}>
|
||||
根据网站地址,获取网站文本内容(请注意获取后的内容,不是每个网站内容都能获取到的)。模型会对文本进行
|
||||
QA 拆分,需要较长训练时间,拆分需要消耗 tokens,账号余额不足时,未拆分的数据会被删除。
|
||||
</Box>
|
||||
<Box mt={2}>
|
||||
一共 {encode(webText).length} 个tokens,大约 {formatPrice(encode(webText).length * 4)}元
|
||||
</Box>
|
||||
<Flex w={'100%'} alignItems={'center'} my={4}>
|
||||
<Box flex={'0 0 70px'}>网站地址</Box>
|
||||
<Input
|
||||
mx={2}
|
||||
placeholder="需要获取内容的地址。例如:https://fastgpt.ahapocket.cn"
|
||||
value={webUrl}
|
||||
onChange={(e) => setWebUrl(e.target.value)}
|
||||
size={'sm'}
|
||||
/>
|
||||
<Button isLoading={isFetching} onClick={() => onclickFetchingUrl()}>
|
||||
获取
|
||||
</Button>
|
||||
</Flex>
|
||||
<Flex w={'100%'} alignItems={'center'} my={4}>
|
||||
<Box flex={'0 0 70px'} mr={2}>
|
||||
下面是
|
||||
</Box>
|
||||
<Input
|
||||
placeholder="内容提示词。例如: Laf的介绍/关于gpt4的论文/一段长文本"
|
||||
value={prompt}
|
||||
onChange={(e) => setPrompt(e.target.value)}
|
||||
size={'sm'}
|
||||
/>
|
||||
</Flex>
|
||||
<Textarea
|
||||
flex={'1 0 0'}
|
||||
h={0}
|
||||
w={'100%'}
|
||||
placeholder="网站的内容"
|
||||
maxLength={-1}
|
||||
resize={'none'}
|
||||
fontSize={'xs'}
|
||||
whiteSpace={'pre-wrap'}
|
||||
value={webText}
|
||||
onChange={(e) => setWebText(e.target.value)}
|
||||
/>
|
||||
</ModalBody>
|
||||
|
||||
<Flex px={6} pt={2} pb={4}>
|
||||
<Box flex={1}></Box>
|
||||
<Button variant={'outline'} mr={3} onClick={onClose}>
|
||||
取消
|
||||
</Button>
|
||||
<Button
|
||||
isLoading={isImporting}
|
||||
isDisabled={webText === ''}
|
||||
onClick={openConfirm(onclickImport)}
|
||||
>
|
||||
确认导入
|
||||
</Button>
|
||||
</Flex>
|
||||
</ModalContent>
|
||||
<ConfirmChild />
|
||||
</Modal>
|
||||
);
|
||||
};
|
||||
|
||||
export default SelectUrlModal;
|
||||
@@ -95,7 +95,7 @@ const PayModal = ({ onClose }: { onClose: () => void }) => {
|
||||
{!payId && (
|
||||
<>
|
||||
{/* 价格表 */}
|
||||
<TableContainer mb={4}>
|
||||
{/* <TableContainer mb={4}>
|
||||
<Table>
|
||||
<Thead>
|
||||
<Tr>
|
||||
@@ -112,7 +112,7 @@ const PayModal = ({ onClose }: { onClose: () => void }) => {
|
||||
))}
|
||||
</Tbody>
|
||||
</Table>
|
||||
</TableContainer>
|
||||
</TableContainer> */}
|
||||
<Grid gridTemplateColumns={'repeat(4,1fr)'} gridGap={5} mb={4}>
|
||||
{[5, 10, 20, 50].map((item) => (
|
||||
<Button
|
||||
|
||||
@@ -6,6 +6,9 @@ export const openaiError: Record<string, string> = {
|
||||
'Too Many Requests': '请求次数太多了,请慢点~',
|
||||
'Bad Gateway': '网关异常,请重试'
|
||||
};
|
||||
export const openaiError2: Record<string, string> = {
|
||||
insufficient_quota: 'API 余额不足'
|
||||
};
|
||||
export const proxyError: Record<string, boolean> = {
|
||||
ECONNABORTED: true,
|
||||
ECONNRESET: true
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { DataItem } from '@/service/mongo';
|
||||
import { getOpenAIApi } from '@/service/utils/chat';
|
||||
import { httpsAgent, getOpenApiKey } from '@/service/utils/tools';
|
||||
import { httpsAgent } from '@/service/utils/tools';
|
||||
import { getOpenApiKey } from '../utils/openai';
|
||||
import type { ChatCompletionRequestMessage } from 'openai';
|
||||
import { DataItemSchema } from '@/types/mongoSchema';
|
||||
import { ChatModelNameEnum } from '@/constants/model';
|
||||
@@ -38,7 +39,7 @@ export async function generateAbstract(next = false): Promise<any> {
|
||||
// 获取 openapi Key
|
||||
let userApiKey, systemKey;
|
||||
try {
|
||||
const key = await getOpenApiKey(dataItem.userId, true);
|
||||
const key = await getOpenApiKey(dataItem.userId);
|
||||
userApiKey = key.userApiKey;
|
||||
systemKey = key.systemKey;
|
||||
} catch (error: any) {
|
||||
@@ -83,36 +84,6 @@ export async function generateAbstract(next = false): Promise<any> {
|
||||
const rawContent: string = abstractResponse?.data.choices[0].message?.content || '';
|
||||
// 从 content 中提取摘要内容
|
||||
const splitContents = splitText(rawContent);
|
||||
// console.log(rawContent);
|
||||
// 生成词向量
|
||||
// const vectorResponse = await Promise.allSettled(
|
||||
// splitContents.map((item) =>
|
||||
// chatAPI.createEmbedding(
|
||||
// {
|
||||
// model: 'text-embedding-ada-002',
|
||||
// input: item.abstract
|
||||
// },
|
||||
// {
|
||||
// timeout: 120000,
|
||||
// httpsAgent
|
||||
// }
|
||||
// )
|
||||
// )
|
||||
// );
|
||||
// 筛选成功的向量请求
|
||||
// const vectorSuccessResponse = vectorResponse
|
||||
// .map((item: any, i) => {
|
||||
// if (item.status !== 'fulfilled') {
|
||||
// // 没有词向量的【摘要】不要
|
||||
// console.log('获取词向量错误: ', item);
|
||||
// return '';
|
||||
// }
|
||||
// return {
|
||||
// abstract: splitContents[i].abstract,
|
||||
// abstractVector: item?.value?.data?.data?.[0]?.embedding
|
||||
// };
|
||||
// })
|
||||
// .filter((item) => item);
|
||||
|
||||
// 插入数据库,并修改状态
|
||||
await DataItem.findByIdAndUpdate(dataItem._id, {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { SplitData } from '@/service/mongo';
|
||||
import { getOpenAIApi } from '@/service/utils/chat';
|
||||
import { httpsAgent, getOpenApiKey } from '@/service/utils/tools';
|
||||
import { httpsAgent } from '@/service/utils/tools';
|
||||
import { getOpenApiKey } from '../utils/openai';
|
||||
import type { ChatCompletionRequestMessage } from 'openai';
|
||||
import { ChatModelNameEnum } from '@/constants/model';
|
||||
import { pushSplitDataBill } from '@/service/events/pushBill';
|
||||
@@ -8,18 +9,24 @@ import { generateVector } from './generateVector';
|
||||
import { connectRedis } from '../redis';
|
||||
import { VecModelDataPrefix } from '@/constants/redis';
|
||||
import { customAlphabet } from 'nanoid';
|
||||
import { ModelSplitDataSchema } from '@/types/mongoSchema';
|
||||
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
|
||||
|
||||
export async function generateQA(next = false): Promise<any> {
|
||||
if (global.generatingQA && !next) return;
|
||||
if (global.generatingQA === true && !next) return;
|
||||
global.generatingQA = true;
|
||||
|
||||
let dataId = null;
|
||||
|
||||
try {
|
||||
const redis = await connectRedis();
|
||||
// 找出一个需要生成的 dataItem
|
||||
const dataItem = await SplitData.findOne({
|
||||
textList: { $exists: true, $ne: [] }
|
||||
});
|
||||
const data = await SplitData.aggregate([
|
||||
{ $match: { textList: { $exists: true, $ne: [] } } },
|
||||
{ $sample: { size: 1 } }
|
||||
]);
|
||||
|
||||
const dataItem: ModelSplitDataSchema = data[0];
|
||||
|
||||
if (!dataItem) {
|
||||
console.log('没有需要生成 QA 的数据');
|
||||
@@ -27,17 +34,15 @@ export async function generateQA(next = false): Promise<any> {
|
||||
return;
|
||||
}
|
||||
|
||||
// 源文本
|
||||
const text = dataItem.textList[dataItem.textList.length - 1];
|
||||
if (!text) {
|
||||
await SplitData.findByIdAndUpdate(dataItem._id, { $pop: { textList: 1 } }); // 弹出无效文本
|
||||
throw new Error('无文本');
|
||||
}
|
||||
dataId = dataItem._id;
|
||||
|
||||
// 获取 5 个源文本
|
||||
const textList: string[] = dataItem.textList.slice(-5);
|
||||
|
||||
// 获取 openapi Key
|
||||
let userApiKey, systemKey;
|
||||
try {
|
||||
const key = await getOpenApiKey(dataItem.userId, true);
|
||||
const key = await getOpenApiKey(dataItem.userId);
|
||||
userApiKey = key.userApiKey;
|
||||
systemKey = key.systemKey;
|
||||
} catch (error: any) {
|
||||
@@ -47,13 +52,13 @@ export async function generateQA(next = false): Promise<any> {
|
||||
textList: [],
|
||||
errorText: error.message
|
||||
});
|
||||
throw new Error('账号余额不足');
|
||||
throw new Error(error?.message);
|
||||
}
|
||||
|
||||
throw new Error('获取 openai key 失败');
|
||||
}
|
||||
|
||||
console.log('正在生成一组QA, ID:', dataItem._id);
|
||||
console.log(`正在生成一组QA, 包含 ${textList.length} 组文本。ID: ${dataItem._id}`);
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
@@ -67,33 +72,50 @@ export async function generateQA(next = false): Promise<any> {
|
||||
};
|
||||
|
||||
// 请求 chatgpt 获取回答
|
||||
const response = await chatAPI
|
||||
.createChatCompletion(
|
||||
{
|
||||
model: ChatModelNameEnum.GPT35,
|
||||
temperature: 0.8,
|
||||
n: 1,
|
||||
messages: [
|
||||
systemPrompt,
|
||||
const response = await Promise.allSettled(
|
||||
textList.map((text) =>
|
||||
chatAPI
|
||||
.createChatCompletion(
|
||||
{
|
||||
role: 'user',
|
||||
content: text
|
||||
model: ChatModelNameEnum.GPT35,
|
||||
temperature: 0.8,
|
||||
n: 1,
|
||||
messages: [
|
||||
systemPrompt,
|
||||
{
|
||||
role: 'user',
|
||||
content: text
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
timeout: 180000,
|
||||
httpsAgent
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
timeout: 120000,
|
||||
httpsAgent
|
||||
}
|
||||
)
|
||||
.then((res) => ({
|
||||
rawContent: res?.data.choices[0].message?.content || '', // chatgpt原本的回复
|
||||
result: splitText(res?.data.choices[0].message?.content || '') // 格式化后的QA对
|
||||
}))
|
||||
)
|
||||
.then((res) => ({
|
||||
rawContent: res?.data.choices[0].message?.content || '', // chatgpt原本的回复
|
||||
result: splitText(res?.data.choices[0].message?.content || '') // 格式化后的QA对
|
||||
}));
|
||||
);
|
||||
|
||||
// 获取成功的回答
|
||||
const successResponse: {
|
||||
rawContent: string;
|
||||
result: {
|
||||
q: string;
|
||||
a: string;
|
||||
}[];
|
||||
}[] = response.filter((item) => item.status === 'fulfilled').map((item: any) => item.value);
|
||||
|
||||
const resultList = successResponse.map((item) => item.result).flat();
|
||||
|
||||
await Promise.allSettled([
|
||||
SplitData.findByIdAndUpdate(dataItem._id, { $pop: { textList: 1 } }), // 弹出已经拆分的文本
|
||||
...response.result.map((item) => {
|
||||
SplitData.findByIdAndUpdate(dataItem._id, {
|
||||
textList: dataItem.textList.slice(0, -5)
|
||||
}), // 删掉后5个数据
|
||||
...resultList.map((item) => {
|
||||
// 插入 redis
|
||||
return redis.sendCommand([
|
||||
'HMSET',
|
||||
@@ -116,26 +138,46 @@ export async function generateQA(next = false): Promise<any> {
|
||||
'生成QA成功,time:',
|
||||
`${(Date.now() - startTime) / 1000}s`,
|
||||
'QA数量:',
|
||||
response.result.length
|
||||
resultList.length
|
||||
);
|
||||
|
||||
// 计费
|
||||
pushSplitDataBill({
|
||||
isPay: !userApiKey && response.result.length > 0,
|
||||
isPay: !userApiKey && resultList.length > 0,
|
||||
userId: dataItem.userId,
|
||||
type: 'QA',
|
||||
text: systemPrompt.content + text + response.rawContent
|
||||
text:
|
||||
systemPrompt.content +
|
||||
textList.join('') +
|
||||
successResponse.map((item) => item.rawContent).join('')
|
||||
});
|
||||
|
||||
generateQA(true);
|
||||
generateVector(true);
|
||||
generateVector();
|
||||
} catch (error: any) {
|
||||
console.log(error);
|
||||
console.log('生成QA错误:', error?.response);
|
||||
// log
|
||||
if (error?.response) {
|
||||
console.log('openai error: 生成QA错误');
|
||||
console.log(error.response?.status, error.response?.statusText, error.response?.data);
|
||||
} else {
|
||||
console.log('生成QA错误:', error);
|
||||
}
|
||||
|
||||
if (dataId && error?.response?.data?.error?.type === 'insufficient_quota') {
|
||||
console.log('api 余额不足');
|
||||
|
||||
await SplitData.findByIdAndUpdate(dataId, {
|
||||
textList: [],
|
||||
errorText: 'api 余额不足'
|
||||
});
|
||||
|
||||
generateQA(true);
|
||||
return;
|
||||
}
|
||||
|
||||
setTimeout(() => {
|
||||
generateQA(true);
|
||||
}, 5000);
|
||||
}, 4000);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -154,10 +196,7 @@ function splitText(text: string) {
|
||||
// 如果Q和A都存在,就将其添加到结果中
|
||||
result.push({
|
||||
q,
|
||||
a: a // 过滤空行
|
||||
.split('\n')
|
||||
.filter((item) => item)
|
||||
.join('\n')
|
||||
a: a.trim().replace(/\n\s*/g, '\n')
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
import { getOpenAIApi } from '@/service/utils/chat';
|
||||
import { httpsAgent } from '@/service/utils/tools';
|
||||
import { connectRedis } from '../redis';
|
||||
import { VecModelDataIdx } from '@/constants/redis';
|
||||
import { vectorToBuffer } from '@/utils/tools';
|
||||
import { ModelDataStatusEnum } from '@/constants/redis';
|
||||
import { openaiCreateEmbedding, getOpenApiKey } from '../utils/openai';
|
||||
|
||||
export async function generateVector(next = false): Promise<any> {
|
||||
if (global.generatingVector && !next) return;
|
||||
global.generatingVector = true;
|
||||
|
||||
let dataId = null;
|
||||
try {
|
||||
const redis = await connectRedis();
|
||||
|
||||
@@ -17,7 +16,7 @@ export async function generateVector(next = false): Promise<any> {
|
||||
VecModelDataIdx,
|
||||
`@status:{${ModelDataStatusEnum.waiting}}`,
|
||||
{
|
||||
RETURN: ['q'],
|
||||
RETURN: ['q', 'userId'],
|
||||
LIMIT: {
|
||||
from: 0,
|
||||
size: 1
|
||||
@@ -31,30 +30,36 @@ export async function generateVector(next = false): Promise<any> {
|
||||
return;
|
||||
}
|
||||
|
||||
const dataItem: { id: string; q: string } = {
|
||||
const dataItem: { id: string; q: string; userId: string } = {
|
||||
id: searchRes.documents[0].id,
|
||||
q: String(searchRes.documents[0]?.value?.q || '')
|
||||
q: String(searchRes.documents[0]?.value?.q || ''),
|
||||
userId: String(searchRes.documents[0]?.value?.userId || '')
|
||||
};
|
||||
|
||||
// 获取 openapi Key
|
||||
const openAiKey = process.env.OPENAIKEY as string;
|
||||
dataId = dataItem.id;
|
||||
|
||||
// 获取 openai 请求实例
|
||||
const chatAPI = getOpenAIApi(openAiKey);
|
||||
// 获取 openapi Key
|
||||
let userApiKey, systemKey;
|
||||
try {
|
||||
const res = await getOpenApiKey(dataItem.userId);
|
||||
userApiKey = res.userApiKey;
|
||||
systemKey = res.systemKey;
|
||||
} catch (error: any) {
|
||||
if (error?.code === 501) {
|
||||
await redis.del(dataItem.id);
|
||||
throw new Error(error?.message);
|
||||
}
|
||||
|
||||
throw new Error('获取 openai key 失败');
|
||||
}
|
||||
|
||||
// 生成词向量
|
||||
const vector = await chatAPI
|
||||
.createEmbedding(
|
||||
{
|
||||
model: 'text-embedding-ada-002',
|
||||
input: dataItem.q
|
||||
},
|
||||
{
|
||||
timeout: 120000,
|
||||
httpsAgent
|
||||
}
|
||||
)
|
||||
.then((res) => res?.data?.data?.[0]?.embedding || []);
|
||||
const { vector } = await openaiCreateEmbedding({
|
||||
text: dataItem.q,
|
||||
userId: dataItem.userId,
|
||||
isPay: !userApiKey,
|
||||
apiKey: userApiKey || systemKey
|
||||
});
|
||||
|
||||
// 更新 redis 向量和状态数据
|
||||
await redis.sendCommand([
|
||||
@@ -70,23 +75,33 @@ export async function generateVector(next = false): Promise<any> {
|
||||
|
||||
console.log(`生成向量成功: ${dataItem.id}`);
|
||||
|
||||
setTimeout(() => {
|
||||
generateVector(true);
|
||||
}, 2000);
|
||||
generateVector(true);
|
||||
} catch (error: any) {
|
||||
console.log('error: 生成向量错误', error?.response?.statusText);
|
||||
!error?.response && console.log(error);
|
||||
// log
|
||||
if (error?.response) {
|
||||
console.log('openai error: 生成向量错误');
|
||||
console.log(error.response?.status, error.response?.statusText, error.response?.data);
|
||||
} else {
|
||||
console.log('生成向量错误:', error);
|
||||
}
|
||||
|
||||
if (dataId && error?.response?.data?.error?.type === 'insufficient_quota') {
|
||||
console.log('api 余额不足,删除 redis 模型数据');
|
||||
const redis = await connectRedis();
|
||||
redis.del(dataId);
|
||||
generateVector(true);
|
||||
return;
|
||||
}
|
||||
if (error?.response?.statusText === 'Too Many Requests') {
|
||||
console.log('生成向量次数限制,1分钟后尝试');
|
||||
// 限制次数,1分钟后再试
|
||||
setTimeout(() => {
|
||||
generateVector(true);
|
||||
}, 60000);
|
||||
return;
|
||||
}
|
||||
|
||||
setTimeout(() => {
|
||||
generateVector(true);
|
||||
}, 3000);
|
||||
}, 4000);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ import { connectToDatabase, Bill, User } from '../mongo';
|
||||
import { modelList, ChatModelNameEnum } from '@/constants/model';
|
||||
import { encode } from 'gpt-token-utils';
|
||||
import { formatPrice } from '@/utils/user';
|
||||
import { BillTypeEnum } from '@/constants/user';
|
||||
import type { DataType } from '@/types/data';
|
||||
|
||||
export const pushChatBill = async ({
|
||||
@@ -23,8 +24,7 @@ export const pushChatBill = async ({
|
||||
// 计算 token 数量
|
||||
const tokens = encode(text);
|
||||
|
||||
console.log('text len: ', text.length);
|
||||
console.log('token len:', tokens.length);
|
||||
console.log(`chat generate success. text len: ${text.length}. token len: ${tokens.length}`);
|
||||
|
||||
if (isPay) {
|
||||
await connectToDatabase();
|
||||
@@ -34,7 +34,7 @@ export const pushChatBill = async ({
|
||||
// 计算价格
|
||||
const unitPrice = modelItem?.price || 5;
|
||||
const price = unitPrice * tokens.length;
|
||||
console.log(`chat bill, unit price: ${unitPrice}, price: ${formatPrice(price)}元`);
|
||||
console.log(`unit price: ${unitPrice}, price: ${formatPrice(price)}元`);
|
||||
|
||||
try {
|
||||
// 插入 Bill 记录
|
||||
@@ -82,18 +82,19 @@ export const pushSplitDataBill = async ({
|
||||
// 计算 token 数量
|
||||
const tokens = encode(text);
|
||||
|
||||
console.log('text len: ', text.length);
|
||||
console.log('token len:', tokens.length);
|
||||
console.log(
|
||||
`splitData generate success. text len: ${text.length}. token len: ${tokens.length}`
|
||||
);
|
||||
|
||||
if (isPay) {
|
||||
try {
|
||||
// 获取模型单价格, 都是用 gpt35 拆分
|
||||
const modelItem = modelList.find((item) => item.model === ChatModelNameEnum.GPT35);
|
||||
const unitPrice = modelItem?.price || 5;
|
||||
const unitPrice = modelItem?.price || 3;
|
||||
// 计算价格
|
||||
const price = unitPrice * tokens.length;
|
||||
|
||||
console.log(`splitData bill, price: ${formatPrice(price)}元`);
|
||||
console.log(`price: ${formatPrice(price)}元`);
|
||||
|
||||
// 插入 Bill 记录
|
||||
const res = await Bill.create({
|
||||
@@ -123,13 +124,11 @@ export const pushSplitDataBill = async ({
|
||||
export const pushGenerateVectorBill = async ({
|
||||
isPay,
|
||||
userId,
|
||||
text,
|
||||
type
|
||||
text
|
||||
}: {
|
||||
isPay: boolean;
|
||||
userId: string;
|
||||
text: string;
|
||||
type: DataType;
|
||||
}) => {
|
||||
await connectToDatabase();
|
||||
|
||||
@@ -139,24 +138,21 @@ export const pushGenerateVectorBill = async ({
|
||||
// 计算 token 数量
|
||||
const tokens = encode(text);
|
||||
|
||||
console.log('text len: ', text.length);
|
||||
console.log('token len:', tokens.length);
|
||||
console.log(`vector generate success. text len: ${text.length}. token len: ${tokens.length}`);
|
||||
|
||||
if (isPay) {
|
||||
try {
|
||||
// 获取模型单价格, 都是用 gpt35 拆分
|
||||
const modelItem = modelList.find((item) => item.model === ChatModelNameEnum.GPT35);
|
||||
const unitPrice = modelItem?.price || 5;
|
||||
const unitPrice = 1;
|
||||
// 计算价格
|
||||
const price = unitPrice * tokens.length;
|
||||
|
||||
console.log(`splitData bill, price: ${formatPrice(price)}元`);
|
||||
console.log(`price: ${formatPrice(price)}元`);
|
||||
|
||||
// 插入 Bill 记录
|
||||
const res = await Bill.create({
|
||||
userId,
|
||||
type,
|
||||
modelName: ChatModelNameEnum.GPT35,
|
||||
type: BillTypeEnum.vector,
|
||||
modelName: ChatModelNameEnum.VECTOR,
|
||||
textLen: text.length,
|
||||
tokenLen: tokens.length,
|
||||
price
|
||||
|
||||
@@ -16,7 +16,7 @@ const BillSchema = new Schema({
|
||||
},
|
||||
modelName: {
|
||||
type: String,
|
||||
enum: modelList.map((item) => item.model),
|
||||
enum: [...modelList.map((item) => item.model), 'text-embedding-ada-002'],
|
||||
required: true
|
||||
},
|
||||
chatId: {
|
||||
|
||||
@@ -29,7 +29,7 @@ export async function connectToDatabase(): Promise<void> {
|
||||
|
||||
generateQA();
|
||||
// generateAbstract();
|
||||
generateVector();
|
||||
generateVector(true);
|
||||
}
|
||||
|
||||
export * from './models/authCode';
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
import { ChatItemType } from '../types/chat';
|
||||
|
||||
export const chatWindows = new Map<string, ChatItemType[]>();
|
||||
|
||||
/**
|
||||
* 获取聊天窗口信息
|
||||
*/
|
||||
export const getWindowMessages = (id: string) => {
|
||||
return chatWindows.get(id) || [];
|
||||
};
|
||||
|
||||
export const pushWindowMessage = (id: string, prompt: ChatItemType) => {
|
||||
const messages = chatWindows.get(id) || [];
|
||||
messages.push(prompt);
|
||||
chatWindows.set(id, messages);
|
||||
return messages;
|
||||
};
|
||||
|
||||
export const deleteWindow = (id: string) => {
|
||||
chatWindows.delete(id);
|
||||
};
|
||||
@@ -1,5 +1,5 @@
|
||||
import { NextApiResponse } from 'next';
|
||||
import { openaiError, proxyError } from './errorCode';
|
||||
import { openaiError, openaiError2, proxyError } from './errorCode';
|
||||
|
||||
export interface ResponseType<T = any> {
|
||||
code: number;
|
||||
@@ -25,13 +25,19 @@ export const jsonRes = <T = any>(
|
||||
msg = error;
|
||||
} else if (proxyError[error?.code]) {
|
||||
msg = '服务器代理出错';
|
||||
} else if (openaiError2[error?.response?.data?.error?.type]) {
|
||||
msg = openaiError2[error?.response?.data?.error?.type];
|
||||
} else if (openaiError[error?.response?.statusText]) {
|
||||
msg = openaiError[error.response.statusText];
|
||||
}
|
||||
console.log('error->');
|
||||
console.log('code:', error.code);
|
||||
console.log('statusText:', error?.response?.statusText);
|
||||
console.log('msg:', msg);
|
||||
// request 时候报错
|
||||
if (error?.response) {
|
||||
console.log('statusText:', error?.response?.statusText);
|
||||
console.log('type:', error?.response?.data?.error?.type);
|
||||
}
|
||||
}
|
||||
|
||||
res.json({
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import { Configuration, OpenAIApi } from 'openai';
|
||||
import { Chat } from '../mongo';
|
||||
import type { ChatPopulate } from '@/types/mongoSchema';
|
||||
import { authToken, getOpenApiKey } from './tools';
|
||||
import { authToken } from './tools';
|
||||
import { getOpenApiKey } from './openai';
|
||||
|
||||
export const getOpenAIApi = (apiKey: string) => {
|
||||
const configuration = new Configuration({
|
||||
@@ -35,10 +36,7 @@ export const authChat = async (chatId: string, authorization?: string) => {
|
||||
}
|
||||
|
||||
// 获取 user 的 apiKey
|
||||
const { user, userApiKey, systemKey } = await getOpenApiKey(
|
||||
chat.userId as unknown as string,
|
||||
false
|
||||
);
|
||||
const { user, userApiKey, systemKey } = await getOpenApiKey(chat.userId as unknown as string);
|
||||
|
||||
// filter 掉被 deleted 的内容
|
||||
chat.content = chat.content.filter((item) => item.deleted !== true);
|
||||
|
||||
101
src/service/utils/openai.ts
Normal file
101
src/service/utils/openai.ts
Normal file
@@ -0,0 +1,101 @@
|
||||
import axios from 'axios';
|
||||
import { getOpenAIApi } from '@/service/utils/chat';
|
||||
import { httpsAgent } from './tools';
|
||||
import { User } from '../models/user';
|
||||
import { formatPrice } from '@/utils/user';
|
||||
import { ChatModelNameEnum } from '@/constants/model';
|
||||
import { pushGenerateVectorBill } from '../events/pushBill';
|
||||
|
||||
/* 获取用户 api 的 openai 信息 */
|
||||
export const getUserApiOpenai = async (userId: string) => {
|
||||
const user = await User.findById(userId);
|
||||
|
||||
const userApiKey = user?.accounts?.find((item: any) => item.type === 'openai')?.value;
|
||||
|
||||
if (!userApiKey) {
|
||||
return Promise.reject('缺少ApiKey, 无法请求');
|
||||
}
|
||||
|
||||
return {
|
||||
user,
|
||||
openai: getOpenAIApi(userApiKey),
|
||||
apiKey: userApiKey
|
||||
};
|
||||
};
|
||||
|
||||
/* 获取 open api key,如果用户没有自己的key,就用平台的,用平台记得加账单 */
|
||||
export const getOpenApiKey = async (userId: string) => {
|
||||
const user = await User.findById(userId);
|
||||
if (!user) {
|
||||
return Promise.reject({
|
||||
code: 501,
|
||||
message: '找不到用户'
|
||||
});
|
||||
}
|
||||
|
||||
const userApiKey = user?.accounts?.find((item: any) => item.type === 'openai')?.value;
|
||||
|
||||
// 有自己的key
|
||||
if (userApiKey) {
|
||||
return {
|
||||
user,
|
||||
userApiKey,
|
||||
systemKey: ''
|
||||
};
|
||||
}
|
||||
|
||||
// 平台账号余额校验
|
||||
if (formatPrice(user.balance) <= 0) {
|
||||
return Promise.reject({
|
||||
code: 501,
|
||||
message: '账号余额不足'
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
user,
|
||||
userApiKey: '',
|
||||
systemKey: process.env.OPENAIKEY as string
|
||||
};
|
||||
};
|
||||
|
||||
/* 获取向量 */
|
||||
export const openaiCreateEmbedding = async ({
|
||||
isPay,
|
||||
userId,
|
||||
apiKey,
|
||||
text
|
||||
}: {
|
||||
isPay: boolean;
|
||||
userId: string;
|
||||
apiKey: string;
|
||||
text: string;
|
||||
}) => {
|
||||
// 获取 chatAPI
|
||||
const chatAPI = getOpenAIApi(apiKey);
|
||||
|
||||
// 把输入的内容转成向量
|
||||
const vector = await chatAPI
|
||||
.createEmbedding(
|
||||
{
|
||||
model: ChatModelNameEnum.VECTOR,
|
||||
input: text
|
||||
},
|
||||
{
|
||||
timeout: 60000,
|
||||
httpsAgent
|
||||
}
|
||||
)
|
||||
.then((res) => res?.data?.data?.[0]?.embedding || []);
|
||||
|
||||
pushGenerateVectorBill({
|
||||
isPay,
|
||||
userId,
|
||||
text
|
||||
});
|
||||
|
||||
return {
|
||||
vector,
|
||||
chatAPI
|
||||
};
|
||||
};
|
||||
@@ -34,7 +34,7 @@ export const sendCode = (email: string, code: string, type: `${EmailTypeEnum}`)
|
||||
};
|
||||
mailTransport.sendMail(options, function (err, msg) {
|
||||
if (err) {
|
||||
console.log('error->', err);
|
||||
console.log('send email error->', err);
|
||||
reject('邮箱异常');
|
||||
} else {
|
||||
resolve('');
|
||||
@@ -53,7 +53,7 @@ export const sendTrainSucceed = (email: string, modelName: string) => {
|
||||
};
|
||||
mailTransport.sendMail(options, function (err, msg) {
|
||||
if (err) {
|
||||
console.log('error->', err);
|
||||
console.log('send email error->', err);
|
||||
reject('邮箱异常');
|
||||
} else {
|
||||
resolve('');
|
||||
|
||||
@@ -1,12 +1,8 @@
|
||||
import crypto from 'crypto';
|
||||
import jwt from 'jsonwebtoken';
|
||||
import { User } from '../models/user';
|
||||
import tunnel from 'tunnel';
|
||||
import { formatPrice } from '@/utils/user';
|
||||
import { ChatItemType } from '@/types/chat';
|
||||
import { encode } from 'gpt-token-utils';
|
||||
import { getOpenAIApi } from '@/service/utils/chat';
|
||||
import axios from 'axios';
|
||||
|
||||
/* 密码加密 */
|
||||
export const hashPassword = (psw: string) => {
|
||||
@@ -56,90 +52,6 @@ export const httpsAgent =
|
||||
})
|
||||
: undefined;
|
||||
|
||||
/* 判断 apikey 是否还有余额 */
|
||||
export const checkKeyGrant = async (apiKey: string) => {
|
||||
const grant = await axios.get('https://api.openai.com/dashboard/billing/credit_grants', {
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`
|
||||
},
|
||||
httpsAgent
|
||||
});
|
||||
if (grant.data?.total_available <= 0.2) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
/* 获取用户 api 的 openai 信息 */
|
||||
export const getUserApiOpenai = async (userId: string) => {
|
||||
const user = await User.findById(userId);
|
||||
|
||||
const userApiKey = user?.accounts?.find((item: any) => item.type === 'openai')?.value;
|
||||
|
||||
if (!userApiKey) {
|
||||
return Promise.reject('缺少ApiKey, 无法请求');
|
||||
}
|
||||
|
||||
// 余额校验
|
||||
const hasGrant = await checkKeyGrant(userApiKey);
|
||||
if (!hasGrant) {
|
||||
return Promise.reject({
|
||||
code: 501,
|
||||
message: 'API 余额不足'
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
user,
|
||||
openai: getOpenAIApi(userApiKey),
|
||||
apiKey: userApiKey
|
||||
};
|
||||
};
|
||||
|
||||
/* 获取 open api key,如果用户没有自己的key,就用平台的,用平台记得加账单 */
|
||||
export const getOpenApiKey = async (userId: string, checkGrant = false) => {
|
||||
const user = await User.findById(userId);
|
||||
if (!user) {
|
||||
return Promise.reject('找不到用户');
|
||||
}
|
||||
|
||||
const userApiKey = user?.accounts?.find((item: any) => item.type === 'openai')?.value;
|
||||
|
||||
// 有自己的key
|
||||
if (userApiKey) {
|
||||
// api 余额校验
|
||||
if (checkGrant) {
|
||||
const hasGrant = await checkKeyGrant(userApiKey);
|
||||
if (!hasGrant) {
|
||||
return Promise.reject({
|
||||
code: 501,
|
||||
message: 'API 余额不足'
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
user,
|
||||
userApiKey,
|
||||
systemKey: ''
|
||||
};
|
||||
}
|
||||
|
||||
// 平台账号余额校验
|
||||
if (formatPrice(user.balance) <= 0) {
|
||||
return Promise.reject({
|
||||
code: 501,
|
||||
message: '账号余额不足'
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
user,
|
||||
userApiKey: '',
|
||||
systemKey: process.env.OPENAIKEY as string
|
||||
};
|
||||
};
|
||||
|
||||
/* tokens 截断 */
|
||||
export const openaiChatFilter = (prompts: ChatItemType[], maxTokens: number) => {
|
||||
let res: ChatItemType[] = [];
|
||||
|
||||
@@ -75,8 +75,8 @@ export const readPdfContent = (file: File) =>
|
||||
const readPDFPage = async (doc: any, pageNo: number) => {
|
||||
const page = await doc.getPage(pageNo);
|
||||
const tokenizedText = await page.getTextContent();
|
||||
const pageText = tokenizedText.items.map((token: any) => token.str).join('');
|
||||
return pageText.replaceAll(/\s+/g, '\n');
|
||||
const pageText = tokenizedText.items.map((token: any) => token.str).join(' ');
|
||||
return pageText;
|
||||
};
|
||||
|
||||
let reader = new FileReader();
|
||||
@@ -109,11 +109,16 @@ export const readDocContent = (file: File) =>
|
||||
new Promise<string>((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.readAsArrayBuffer(file);
|
||||
reader.onload = ({ target }) => {
|
||||
reader.onload = async ({ target }) => {
|
||||
if (!target?.result) return reject('读取 doc 文件失败');
|
||||
return mammoth.extractRawText({ arrayBuffer: target.result as ArrayBuffer }).then((res) => {
|
||||
resolve(res.value);
|
||||
});
|
||||
try {
|
||||
const res = await mammoth.extractRawText({
|
||||
arrayBuffer: target.result as ArrayBuffer
|
||||
});
|
||||
resolve(res?.value);
|
||||
} catch (error) {
|
||||
reject('读取 doc 文件失败, 请转换成 PDF');
|
||||
}
|
||||
};
|
||||
reader.onerror = (err) => {
|
||||
console.log('error doc read:', err);
|
||||
@@ -129,15 +134,7 @@ export const vectorToBuffer = (vector: number[]) => {
|
||||
|
||||
return buffer;
|
||||
};
|
||||
export const BufferToVector = (bufferStr: string) => {
|
||||
let buffer = Buffer.from(`bufferStr`, 'binary'); // 将字符串转换成 Buffer 对象
|
||||
const npVector = new Float32Array(
|
||||
buffer,
|
||||
buffer.byteOffset,
|
||||
buffer.byteLength / Float32Array.BYTES_PER_ELEMENT
|
||||
);
|
||||
return Array.from(npVector);
|
||||
};
|
||||
|
||||
export function formatVector(vector: number[]) {
|
||||
let formattedVector = vector.slice(0, 1536); // 截取前1536个元素
|
||||
if (vector.length > 1536) {
|
||||
|
||||
Reference in New Issue
Block a user