perf: 知识库数据结构
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { createParser, ParsedEvent, ReconnectInterval } from 'eventsource-parser';
|
||||
import { connectToDatabase, ModelData } from '@/service/mongo';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { getOpenAIApi, authChat } from '@/service/utils/chat';
|
||||
import { httpsAgent, openaiChatFilter, systemPromptFilter } from '@/service/utils/tools';
|
||||
import { ChatCompletionRequestMessage, ChatCompletionRequestMessageRoleEnum } from 'openai';
|
||||
@@ -11,7 +11,7 @@ import { PassThrough } from 'stream';
|
||||
import { modelList } from '@/constants/model';
|
||||
import { pushChatBill } from '@/service/events/pushBill';
|
||||
import { connectRedis } from '@/service/redis';
|
||||
import { VecModelDataIndex } from '@/constants/redis';
|
||||
import { VecModelDataPrefix } from '@/constants/redis';
|
||||
import { vectorToBuffer } from '@/utils/tools';
|
||||
|
||||
/* 发送提示词 */
|
||||
@@ -73,17 +73,17 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
)
|
||||
.then((res) => res?.data?.data?.[0]?.embedding || []);
|
||||
|
||||
// 搜索系统提示词, 按相似度从 redis 中搜出前3条不同 dataId 的数据
|
||||
// 搜索系统提示词, 按相似度从 redis 中搜出相关的 q 和 text
|
||||
const redisData: any[] = await redis.sendCommand([
|
||||
'FT.SEARCH',
|
||||
`idx:${VecModelDataIndex}:hash`,
|
||||
`idx:${VecModelDataPrefix}:hash`,
|
||||
`@modelId:{${String(
|
||||
chat.modelId._id
|
||||
)}} @vector:[VECTOR_RANGE 0.15 $blob]=>{$YIELD_DISTANCE_AS: score}`,
|
||||
// `@modelId:{${String(chat.modelId._id)}}=>[KNN 10 @vector $blob AS score]`,
|
||||
'RETURN',
|
||||
'1',
|
||||
'dataId',
|
||||
'text',
|
||||
'SORTBY',
|
||||
'score',
|
||||
'PARAMS',
|
||||
@@ -97,42 +97,28 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
'2'
|
||||
]);
|
||||
|
||||
// 格式化响应值,获取去重后的id
|
||||
let formatIds = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
|
||||
// 格式化响应值,获取 qa
|
||||
const formatRedisPrompt = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
|
||||
.map((i) => {
|
||||
if (!redisData[i] || !redisData[i][1]) return '';
|
||||
return redisData[i][1];
|
||||
if (!redisData[i]) return '';
|
||||
const text = (redisData[i][1] as string) || '';
|
||||
|
||||
if (!text) return '';
|
||||
|
||||
return text;
|
||||
})
|
||||
.filter((item) => item);
|
||||
formatIds = Array.from(new Set(formatIds));
|
||||
|
||||
if (formatIds.length === 0) {
|
||||
if (formatRedisPrompt.length === 0) {
|
||||
throw new Error('对不起,我没有找到你的问题');
|
||||
}
|
||||
|
||||
// 从 mongo 中取出原文作为提示词
|
||||
const textArr = (
|
||||
await Promise.all(
|
||||
[2, 4, 6, 8, 10, 12, 14, 16, 18, 20].map((i) => {
|
||||
if (!redisData[i] || !redisData[i][1]) return '';
|
||||
return ModelData.findById(redisData[i][1])
|
||||
.select('text q')
|
||||
.then((res) => {
|
||||
if (!res) return '';
|
||||
// const questions = res.q.map((item) => item.text).join(' ');
|
||||
const answer = res.text;
|
||||
return `${answer}`;
|
||||
});
|
||||
})
|
||||
)
|
||||
).filter((item) => item);
|
||||
|
||||
// textArr 筛选,最多 3000 tokens
|
||||
const systemPrompt = systemPromptFilter(textArr, 3400);
|
||||
const systemPrompt = systemPromptFilter(formatRedisPrompt, 3400);
|
||||
|
||||
prompts.unshift({
|
||||
obj: 'SYSTEM',
|
||||
value: `${model.systemPrompt}。 我的知识库: "${systemPrompt}"`
|
||||
value: `${model.systemPrompt} 我的知识库: "${systemPrompt}"`
|
||||
});
|
||||
|
||||
// 控制在 tokens 数量,防止超出
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { connectToDatabase, ModelData } from '@/service/mongo';
|
||||
import { authToken } from '@/service/utils/tools';
|
||||
import { connectRedis } from '@/service/redis';
|
||||
import { VecModelDataIndex } from '@/constants/redis';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -23,25 +21,15 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
// 凭证校验
|
||||
const userId = await authToken(authorization);
|
||||
|
||||
await connectToDatabase();
|
||||
const redis = await connectRedis();
|
||||
|
||||
const data = await ModelData.findById(dataId);
|
||||
|
||||
await ModelData.deleteOne({
|
||||
_id: dataId,
|
||||
userId
|
||||
});
|
||||
|
||||
// 删除 redis 数据
|
||||
data?.q.forEach(async (item) => {
|
||||
try {
|
||||
await redis.json.del(`${VecModelDataIndex}:${item.id}`);
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
});
|
||||
|
||||
// 校验是否为该用户的数据
|
||||
const dataItemUserId = await redis.hGet(dataId, 'userId');
|
||||
if (dataItemUserId !== userId) {
|
||||
throw new Error('无权操作');
|
||||
}
|
||||
// 删除
|
||||
await redis.del(dataId);
|
||||
jsonRes(res);
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { connectToDatabase, ModelData } from '@/service/mongo';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authToken } from '@/service/utils/tools';
|
||||
import { connectRedis } from '@/service/redis';
|
||||
import { VecModelDataIdx } from '@/constants/redis';
|
||||
import { SearchOptions } from 'redis';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -32,24 +35,34 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
const userId = await authToken(authorization);
|
||||
|
||||
await connectToDatabase();
|
||||
const redis = await connectRedis();
|
||||
|
||||
const data = await ModelData.find({
|
||||
modelId,
|
||||
userId
|
||||
})
|
||||
.sort({ _id: -1 }) // 按照创建时间倒序排列
|
||||
.skip((pageNum - 1) * pageSize)
|
||||
.limit(pageSize);
|
||||
// 从 redis 中获取数据
|
||||
const searchRes = await redis.ft.search(
|
||||
VecModelDataIdx,
|
||||
`@modelId:{${modelId}} @userId:{${userId}}`,
|
||||
{
|
||||
RETURN: ['q', 'text', 'status'],
|
||||
LIMIT: {
|
||||
from: (pageNum - 1) * pageSize,
|
||||
size: pageSize
|
||||
},
|
||||
SORTBY: {
|
||||
BY: 'modelId',
|
||||
DIRECTION: 'DESC'
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
jsonRes(res, {
|
||||
data: {
|
||||
pageNum,
|
||||
pageSize,
|
||||
data,
|
||||
total: await ModelData.countDocuments({
|
||||
modelId,
|
||||
userId
|
||||
})
|
||||
data: searchRes.documents.map((item) => ({
|
||||
id: item.id,
|
||||
...item.value
|
||||
})),
|
||||
total: searchRes.total
|
||||
}
|
||||
});
|
||||
} catch (err) {
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { connectToDatabase, ModelData, Model } from '@/service/mongo';
|
||||
import { connectToDatabase, Model } from '@/service/mongo';
|
||||
import { authToken } from '@/service/utils/tools';
|
||||
import { ModelDataSchema } from '@/types/mongoSchema';
|
||||
import { generateVector } from '@/service/events/generateVector';
|
||||
import { connectRedis } from '@/service/redis';
|
||||
import { VecModelDataPrefix, ModelDataStatusEnum } from '@/constants/redis';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -25,6 +27,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
const userId = await authToken(authorization);
|
||||
|
||||
await connectToDatabase();
|
||||
const redis = await connectRedis();
|
||||
|
||||
// 验证是否是该用户的 model
|
||||
const model = await Model.findOne({
|
||||
@@ -36,19 +39,29 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
throw new Error('无权操作该模型');
|
||||
}
|
||||
|
||||
// push data
|
||||
await ModelData.insertMany(
|
||||
data.map((item) => ({
|
||||
...item,
|
||||
modelId,
|
||||
userId
|
||||
}))
|
||||
const insertRes = await Promise.allSettled(
|
||||
data.map((item) => {
|
||||
return redis.sendCommand([
|
||||
'HMSET',
|
||||
`${VecModelDataPrefix}:${item.q.id}`,
|
||||
'userId',
|
||||
userId,
|
||||
'modelId',
|
||||
modelId,
|
||||
'q',
|
||||
item.q.text,
|
||||
'text',
|
||||
item.text,
|
||||
'status',
|
||||
ModelDataStatusEnum.waiting
|
||||
]);
|
||||
})
|
||||
);
|
||||
|
||||
generateVector(true);
|
||||
|
||||
jsonRes(res, {
|
||||
data: model
|
||||
data: insertRes.filter((item) => item.status === 'rejected').length
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
|
||||
78
src/pages/api/model/data/pushModelDataJson.ts
Normal file
78
src/pages/api/model/data/pushModelDataJson.ts
Normal file
@@ -0,0 +1,78 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { connectToDatabase, Model } from '@/service/mongo';
|
||||
import { authToken } from '@/service/utils/tools';
|
||||
import { generateVector } from '@/service/events/generateVector';
|
||||
import { vectorToBuffer, formatVector } from '@/utils/tools';
|
||||
import { connectRedis } from '@/service/redis';
|
||||
import { VecModelDataPrefix, ModelDataStatusEnum } from '@/constants/redis';
|
||||
import { customAlphabet } from 'nanoid';
|
||||
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
const { modelId, data } = req.body as {
|
||||
modelId: string;
|
||||
data: { prompt: string; completion: string; vector?: number[] }[];
|
||||
};
|
||||
const { authorization } = req.headers;
|
||||
|
||||
if (!authorization) {
|
||||
throw new Error('无权操作');
|
||||
}
|
||||
|
||||
if (!modelId || !Array.isArray(data)) {
|
||||
throw new Error('缺少参数');
|
||||
}
|
||||
|
||||
// 凭证校验
|
||||
const userId = await authToken(authorization);
|
||||
|
||||
await connectToDatabase();
|
||||
const redis = await connectRedis();
|
||||
|
||||
// 验证是否是该用户的 model
|
||||
const model = await Model.findOne({
|
||||
_id: modelId,
|
||||
userId
|
||||
});
|
||||
|
||||
if (!model) {
|
||||
throw new Error('无权操作该模型');
|
||||
}
|
||||
|
||||
// 插入 redis
|
||||
const insertRedisRes = await Promise.allSettled(
|
||||
data.map((item) => {
|
||||
const vector = item.vector;
|
||||
|
||||
return redis.sendCommand([
|
||||
'HMSET',
|
||||
`${VecModelDataPrefix}:${nanoid()}`,
|
||||
'userId',
|
||||
userId,
|
||||
'modelId',
|
||||
String(modelId),
|
||||
...(vector ? ['vector', vectorToBuffer(formatVector(vector))] : []),
|
||||
'q',
|
||||
item.prompt,
|
||||
'text',
|
||||
item.completion,
|
||||
'status',
|
||||
vector ? ModelDataStatusEnum.ready : ModelDataStatusEnum.waiting
|
||||
]);
|
||||
})
|
||||
);
|
||||
|
||||
generateVector(true);
|
||||
|
||||
jsonRes(res, {
|
||||
data: insertRedisRes.filter((item) => item.status === 'rejected').length
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error: err
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1,57 +0,0 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { connectToDatabase, DataItem, ModelData } from '@/service/mongo';
|
||||
import { authToken } from '@/service/utils/tools';
|
||||
import { customAlphabet } from 'nanoid';
|
||||
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
let { dataIds, modelId } = req.body as { dataIds: string[]; modelId: string };
|
||||
|
||||
if (!dataIds) {
|
||||
throw new Error('参数错误');
|
||||
}
|
||||
await connectToDatabase();
|
||||
|
||||
const { authorization } = req.headers;
|
||||
|
||||
const userId = await authToken(authorization);
|
||||
|
||||
const dataItems = (
|
||||
await Promise.all(
|
||||
dataIds.map((dataId) =>
|
||||
DataItem.find<{ _id: string; result: { q: string }[]; text: string }>(
|
||||
{
|
||||
userId,
|
||||
dataId
|
||||
},
|
||||
'result text'
|
||||
)
|
||||
)
|
||||
)
|
||||
).flat();
|
||||
|
||||
// push data
|
||||
await ModelData.insertMany(
|
||||
dataItems.map((item) => ({
|
||||
modelId: modelId,
|
||||
userId,
|
||||
text: item.text,
|
||||
q: item.result.map((item) => ({
|
||||
id: nanoid(),
|
||||
text: item.q
|
||||
}))
|
||||
}))
|
||||
);
|
||||
|
||||
jsonRes(res, {
|
||||
data: dataItems
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error: err
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { connectToDatabase, ModelData } from '@/service/mongo';
|
||||
import { authToken } from '@/service/utils/tools';
|
||||
import { connectRedis } from '@/service/redis';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -22,17 +22,16 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
// 凭证校验
|
||||
const userId = await authToken(authorization);
|
||||
|
||||
await connectToDatabase();
|
||||
const redis = await connectRedis();
|
||||
|
||||
await ModelData.updateOne(
|
||||
{
|
||||
_id: dataId,
|
||||
userId
|
||||
},
|
||||
{
|
||||
text
|
||||
}
|
||||
);
|
||||
// 校验是否为该用户的数据
|
||||
const dataItemUserId = await redis.hGet(dataId, 'userId');
|
||||
if (dataItemUserId !== userId) {
|
||||
throw new Error('无权操作');
|
||||
}
|
||||
|
||||
// 更新
|
||||
await redis.hSet(dataId, 'text', text);
|
||||
|
||||
jsonRes(res);
|
||||
} catch (err) {
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { Chat, Model, Training, connectToDatabase, ModelData } from '@/service/mongo';
|
||||
import { Chat, Model, Training, connectToDatabase } from '@/service/mongo';
|
||||
import { authToken, getUserApiOpenai } from '@/service/utils/tools';
|
||||
import { TrainingStatusEnum } from '@/constants/model';
|
||||
import { getOpenAIApi } from '@/service/utils/chat';
|
||||
import { TrainingItemType } from '@/types/training';
|
||||
import { httpsAgent } from '@/service/utils/tools';
|
||||
import { connectRedis } from '@/service/redis';
|
||||
import { VecModelDataIndex } from '@/constants/redis';
|
||||
import { VecModelDataIdx } from '@/constants/redis';
|
||||
|
||||
/* 获取我的模型 */
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
@@ -26,39 +25,38 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
// 凭证校验
|
||||
const userId = await authToken(authorization);
|
||||
|
||||
// 验证是否是该用户的 model
|
||||
const model = await Model.findOne({
|
||||
_id: modelId,
|
||||
userId
|
||||
});
|
||||
|
||||
if (!model) {
|
||||
throw new Error('无权操作该模型');
|
||||
}
|
||||
|
||||
await connectToDatabase();
|
||||
const redis = await connectRedis();
|
||||
|
||||
const modelDataList = await ModelData.find({
|
||||
// 获取 redis 中模型关联的所有数据
|
||||
const searchRes = await redis.ft.search(
|
||||
VecModelDataIdx,
|
||||
`@modelId:{${modelId}} @userId:{${userId}}`,
|
||||
{
|
||||
LIMIT: {
|
||||
from: 0,
|
||||
size: 10000
|
||||
}
|
||||
}
|
||||
);
|
||||
// 删除 redis 内容
|
||||
await Promise.all(searchRes.documents.map((item) => redis.del(item.id)));
|
||||
|
||||
// 删除对应的聊天
|
||||
await Chat.deleteMany({
|
||||
modelId
|
||||
});
|
||||
|
||||
// 删除 redis
|
||||
modelDataList?.forEach((modelData) =>
|
||||
modelData.q.forEach(async (item) => {
|
||||
try {
|
||||
await redis.json.del(`${VecModelDataIndex}:${item.id}`);
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
let requestQueue: any[] = [];
|
||||
// 删除对应的聊天
|
||||
requestQueue.push(
|
||||
Chat.deleteMany({
|
||||
modelId
|
||||
})
|
||||
);
|
||||
|
||||
// 删除数据集
|
||||
requestQueue.push(
|
||||
ModelData.deleteMany({
|
||||
modelId
|
||||
})
|
||||
);
|
||||
|
||||
// 查看是否正在训练
|
||||
const training: TrainingItemType | null = await Training.findOne({
|
||||
modelId,
|
||||
@@ -78,21 +76,15 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
}
|
||||
|
||||
// 删除对应训练记录
|
||||
requestQueue.push(
|
||||
Training.deleteMany({
|
||||
modelId
|
||||
})
|
||||
);
|
||||
await Training.deleteMany({
|
||||
modelId
|
||||
});
|
||||
|
||||
// 删除模型
|
||||
requestQueue.push(
|
||||
Model.deleteOne({
|
||||
_id: modelId,
|
||||
userId
|
||||
})
|
||||
);
|
||||
|
||||
await Promise.all(requestQueue);
|
||||
await Model.deleteOne({
|
||||
_id: modelId,
|
||||
userId
|
||||
});
|
||||
|
||||
jsonRes(res);
|
||||
} catch (err) {
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
// Next.js API route support: https://nextjs.org/docs/api-routes/introduction
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@/service/response';
|
||||
import { connectToDatabase, Bill } from '@/service/mongo';
|
||||
import { authToken } from '@/service/utils/tools';
|
||||
import type { BillSchema } from '@/types/mongoSchema';
|
||||
import { VecModelDataIndex } from '@/constants/redis';
|
||||
import { connectRedis } from '@/service/redis';
|
||||
import { vectorToBuffer } from '@/utils/tools';
|
||||
|
||||
let vectorData = [
|
||||
-0.025028639, -0.010407282, 0.026523087, -0.0107438695, -0.006967359, 0.010043768, -0.012043097,
|
||||
0.008724345, -0.028919589, -0.0117738275, 0.0050690062, 0.02961969
|
||||
].concat(new Array(1524).fill(0));
|
||||
let vectorData2 = [
|
||||
0.025028639, 0.010407282, 0.026523087, 0.0107438695, -0.006967359, 0.010043768, -0.012043097,
|
||||
0.008724345, 0.028919589, 0.0117738275, 0.0050690062, 0.02961969
|
||||
].concat(new Array(1524).fill(0));
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
if (process.env.NODE_ENV !== 'development') {
|
||||
throw new Error('不是开发环境');
|
||||
}
|
||||
await connectToDatabase();
|
||||
|
||||
const redis = await connectRedis();
|
||||
|
||||
await redis.sendCommand([
|
||||
'HMSET',
|
||||
'model:data:333',
|
||||
'vector',
|
||||
vectorToBuffer(vectorData2),
|
||||
'modelId',
|
||||
'1133',
|
||||
'dataId',
|
||||
'safadfa'
|
||||
]);
|
||||
|
||||
// search
|
||||
const response = await redis.sendCommand([
|
||||
'FT.SEARCH',
|
||||
'idx:model:data:hash',
|
||||
'@modelId:{1133} @vector:[VECTOR_RANGE 0.15 $blob]=>{$YIELD_DISTANCE_AS: score}',
|
||||
'RETURN',
|
||||
'2',
|
||||
'modelId',
|
||||
'dataId',
|
||||
'PARAMS',
|
||||
'2',
|
||||
'blob',
|
||||
vectorToBuffer(vectorData2),
|
||||
'SORTBY',
|
||||
'score',
|
||||
'DIALECT',
|
||||
'2'
|
||||
]);
|
||||
|
||||
jsonRes(res, {
|
||||
data: response
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error: err
|
||||
});
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user