perf: 知识库数据结构

This commit is contained in:
archer
2023-04-01 22:31:56 +08:00
parent 5759cbeae0
commit ae4243b522
26 changed files with 611 additions and 518 deletions

View File

@@ -1,6 +1,6 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { createParser, ParsedEvent, ReconnectInterval } from 'eventsource-parser';
import { connectToDatabase, ModelData } from '@/service/mongo';
import { connectToDatabase } from '@/service/mongo';
import { getOpenAIApi, authChat } from '@/service/utils/chat';
import { httpsAgent, openaiChatFilter, systemPromptFilter } from '@/service/utils/tools';
import { ChatCompletionRequestMessage, ChatCompletionRequestMessageRoleEnum } from 'openai';
@@ -11,7 +11,7 @@ import { PassThrough } from 'stream';
import { modelList } from '@/constants/model';
import { pushChatBill } from '@/service/events/pushBill';
import { connectRedis } from '@/service/redis';
import { VecModelDataIndex } from '@/constants/redis';
import { VecModelDataPrefix } from '@/constants/redis';
import { vectorToBuffer } from '@/utils/tools';
/* 发送提示词 */
@@ -73,17 +73,17 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
)
.then((res) => res?.data?.data?.[0]?.embedding || []);
// 搜索系统提示词, 按相似度从 redis 中搜出前3条不同 dataId 的数据
// 搜索系统提示词, 按相似度从 redis 中搜出相关的 q 和 text
const redisData: any[] = await redis.sendCommand([
'FT.SEARCH',
`idx:${VecModelDataIndex}:hash`,
`idx:${VecModelDataPrefix}:hash`,
`@modelId:{${String(
chat.modelId._id
)}} @vector:[VECTOR_RANGE 0.15 $blob]=>{$YIELD_DISTANCE_AS: score}`,
// `@modelId:{${String(chat.modelId._id)}}=>[KNN 10 @vector $blob AS score]`,
'RETURN',
'1',
'dataId',
'text',
'SORTBY',
'score',
'PARAMS',
@@ -97,42 +97,28 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
'2'
]);
// 格式化响应值,获取去重后的id
let formatIds = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
// 格式化响应值,获取 qa
const formatRedisPrompt = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
.map((i) => {
if (!redisData[i] || !redisData[i][1]) return '';
return redisData[i][1];
if (!redisData[i]) return '';
const text = (redisData[i][1] as string) || '';
if (!text) return '';
return text;
})
.filter((item) => item);
formatIds = Array.from(new Set(formatIds));
if (formatIds.length === 0) {
if (formatRedisPrompt.length === 0) {
throw new Error('对不起,我没有找到你的问题');
}
// 从 mongo 中取出原文作为提示词
const textArr = (
await Promise.all(
[2, 4, 6, 8, 10, 12, 14, 16, 18, 20].map((i) => {
if (!redisData[i] || !redisData[i][1]) return '';
return ModelData.findById(redisData[i][1])
.select('text q')
.then((res) => {
if (!res) return '';
// const questions = res.q.map((item) => item.text).join(' ');
const answer = res.text;
return `${answer}`;
});
})
)
).filter((item) => item);
// textArr 筛选,最多 3000 tokens
const systemPrompt = systemPromptFilter(textArr, 3400);
const systemPrompt = systemPromptFilter(formatRedisPrompt, 3400);
prompts.unshift({
obj: 'SYSTEM',
value: `${model.systemPrompt} 我的知识库: "${systemPrompt}"`
value: `${model.systemPrompt} 我的知识库: "${systemPrompt}"`
});
// 控制在 tokens 数量,防止超出

View File

@@ -1,9 +1,7 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, ModelData } from '@/service/mongo';
import { authToken } from '@/service/utils/tools';
import { connectRedis } from '@/service/redis';
import { VecModelDataIndex } from '@/constants/redis';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -23,25 +21,15 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
// 凭证校验
const userId = await authToken(authorization);
await connectToDatabase();
const redis = await connectRedis();
const data = await ModelData.findById(dataId);
await ModelData.deleteOne({
_id: dataId,
userId
});
// 删除 redis 数据
data?.q.forEach(async (item) => {
try {
await redis.json.del(`${VecModelDataIndex}:${item.id}`);
} catch (error) {
console.log(error);
}
});
// 校验是否为该用户的数据
const dataItemUserId = await redis.hGet(dataId, 'userId');
if (dataItemUserId !== userId) {
throw new Error('无权操作');
}
// 删除
await redis.del(dataId);
jsonRes(res);
} catch (err) {
console.log(err);

View File

@@ -1,7 +1,10 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, ModelData } from '@/service/mongo';
import { connectToDatabase } from '@/service/mongo';
import { authToken } from '@/service/utils/tools';
import { connectRedis } from '@/service/redis';
import { VecModelDataIdx } from '@/constants/redis';
import { SearchOptions } from 'redis';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -32,24 +35,34 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const userId = await authToken(authorization);
await connectToDatabase();
const redis = await connectRedis();
const data = await ModelData.find({
modelId,
userId
})
.sort({ _id: -1 }) // 按照创建时间倒序排列
.skip((pageNum - 1) * pageSize)
.limit(pageSize);
// 从 redis 中获取数据
const searchRes = await redis.ft.search(
VecModelDataIdx,
`@modelId:{${modelId}} @userId:{${userId}}`,
{
RETURN: ['q', 'text', 'status'],
LIMIT: {
from: (pageNum - 1) * pageSize,
size: pageSize
},
SORTBY: {
BY: 'modelId',
DIRECTION: 'DESC'
}
}
);
jsonRes(res, {
data: {
pageNum,
pageSize,
data,
total: await ModelData.countDocuments({
modelId,
userId
})
data: searchRes.documents.map((item) => ({
id: item.id,
...item.value
})),
total: searchRes.total
}
});
} catch (err) {

View File

@@ -1,9 +1,11 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, ModelData, Model } from '@/service/mongo';
import { connectToDatabase, Model } from '@/service/mongo';
import { authToken } from '@/service/utils/tools';
import { ModelDataSchema } from '@/types/mongoSchema';
import { generateVector } from '@/service/events/generateVector';
import { connectRedis } from '@/service/redis';
import { VecModelDataPrefix, ModelDataStatusEnum } from '@/constants/redis';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -25,6 +27,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const userId = await authToken(authorization);
await connectToDatabase();
const redis = await connectRedis();
// 验证是否是该用户的 model
const model = await Model.findOne({
@@ -36,19 +39,29 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
throw new Error('无权操作该模型');
}
// push data
await ModelData.insertMany(
data.map((item) => ({
...item,
modelId,
userId
}))
const insertRes = await Promise.allSettled(
data.map((item) => {
return redis.sendCommand([
'HMSET',
`${VecModelDataPrefix}:${item.q.id}`,
'userId',
userId,
'modelId',
modelId,
'q',
item.q.text,
'text',
item.text,
'status',
ModelDataStatusEnum.waiting
]);
})
);
generateVector(true);
jsonRes(res, {
data: model
data: insertRes.filter((item) => item.status === 'rejected').length
});
} catch (err) {
jsonRes(res, {

View File

@@ -0,0 +1,78 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, Model } from '@/service/mongo';
import { authToken } from '@/service/utils/tools';
import { generateVector } from '@/service/events/generateVector';
import { vectorToBuffer, formatVector } from '@/utils/tools';
import { connectRedis } from '@/service/redis';
import { VecModelDataPrefix, ModelDataStatusEnum } from '@/constants/redis';
import { customAlphabet } from 'nanoid';
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
const { modelId, data } = req.body as {
modelId: string;
data: { prompt: string; completion: string; vector?: number[] }[];
};
const { authorization } = req.headers;
if (!authorization) {
throw new Error('无权操作');
}
if (!modelId || !Array.isArray(data)) {
throw new Error('缺少参数');
}
// 凭证校验
const userId = await authToken(authorization);
await connectToDatabase();
const redis = await connectRedis();
// 验证是否是该用户的 model
const model = await Model.findOne({
_id: modelId,
userId
});
if (!model) {
throw new Error('无权操作该模型');
}
// 插入 redis
const insertRedisRes = await Promise.allSettled(
data.map((item) => {
const vector = item.vector;
return redis.sendCommand([
'HMSET',
`${VecModelDataPrefix}:${nanoid()}`,
'userId',
userId,
'modelId',
String(modelId),
...(vector ? ['vector', vectorToBuffer(formatVector(vector))] : []),
'q',
item.prompt,
'text',
item.completion,
'status',
vector ? ModelDataStatusEnum.ready : ModelDataStatusEnum.waiting
]);
})
);
generateVector(true);
jsonRes(res, {
data: insertRedisRes.filter((item) => item.status === 'rejected').length
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}

View File

@@ -1,57 +0,0 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, DataItem, ModelData } from '@/service/mongo';
import { authToken } from '@/service/utils/tools';
import { customAlphabet } from 'nanoid';
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
let { dataIds, modelId } = req.body as { dataIds: string[]; modelId: string };
if (!dataIds) {
throw new Error('参数错误');
}
await connectToDatabase();
const { authorization } = req.headers;
const userId = await authToken(authorization);
const dataItems = (
await Promise.all(
dataIds.map((dataId) =>
DataItem.find<{ _id: string; result: { q: string }[]; text: string }>(
{
userId,
dataId
},
'result text'
)
)
)
).flat();
// push data
await ModelData.insertMany(
dataItems.map((item) => ({
modelId: modelId,
userId,
text: item.text,
q: item.result.map((item) => ({
id: nanoid(),
text: item.q
}))
}))
);
jsonRes(res, {
data: dataItems
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}

View File

@@ -1,7 +1,7 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, ModelData } from '@/service/mongo';
import { authToken } from '@/service/utils/tools';
import { connectRedis } from '@/service/redis';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -22,17 +22,16 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
// 凭证校验
const userId = await authToken(authorization);
await connectToDatabase();
const redis = await connectRedis();
await ModelData.updateOne(
{
_id: dataId,
userId
},
{
text
}
);
// 校验是否为该用户的数据
const dataItemUserId = await redis.hGet(dataId, 'userId');
if (dataItemUserId !== userId) {
throw new Error('无权操作');
}
// 更新
await redis.hSet(dataId, 'text', text);
jsonRes(res);
} catch (err) {

View File

@@ -1,13 +1,12 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { Chat, Model, Training, connectToDatabase, ModelData } from '@/service/mongo';
import { Chat, Model, Training, connectToDatabase } from '@/service/mongo';
import { authToken, getUserApiOpenai } from '@/service/utils/tools';
import { TrainingStatusEnum } from '@/constants/model';
import { getOpenAIApi } from '@/service/utils/chat';
import { TrainingItemType } from '@/types/training';
import { httpsAgent } from '@/service/utils/tools';
import { connectRedis } from '@/service/redis';
import { VecModelDataIndex } from '@/constants/redis';
import { VecModelDataIdx } from '@/constants/redis';
/* 获取我的模型 */
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
@@ -26,39 +25,38 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
// 凭证校验
const userId = await authToken(authorization);
// 验证是否是该用户的 model
const model = await Model.findOne({
_id: modelId,
userId
});
if (!model) {
throw new Error('无权操作该模型');
}
await connectToDatabase();
const redis = await connectRedis();
const modelDataList = await ModelData.find({
// 获取 redis 中模型关联的所有数据
const searchRes = await redis.ft.search(
VecModelDataIdx,
`@modelId:{${modelId}} @userId:{${userId}}`,
{
LIMIT: {
from: 0,
size: 10000
}
}
);
// 删除 redis 内容
await Promise.all(searchRes.documents.map((item) => redis.del(item.id)));
// 删除对应的聊天
await Chat.deleteMany({
modelId
});
// 删除 redis
modelDataList?.forEach((modelData) =>
modelData.q.forEach(async (item) => {
try {
await redis.json.del(`${VecModelDataIndex}:${item.id}`);
} catch (error) {
console.log(error);
}
})
);
let requestQueue: any[] = [];
// 删除对应的聊天
requestQueue.push(
Chat.deleteMany({
modelId
})
);
// 删除数据集
requestQueue.push(
ModelData.deleteMany({
modelId
})
);
// 查看是否正在训练
const training: TrainingItemType | null = await Training.findOne({
modelId,
@@ -78,21 +76,15 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
}
// 删除对应训练记录
requestQueue.push(
Training.deleteMany({
modelId
})
);
await Training.deleteMany({
modelId
});
// 删除模型
requestQueue.push(
Model.deleteOne({
_id: modelId,
userId
})
);
await Promise.all(requestQueue);
await Model.deleteOne({
_id: modelId,
userId
});
jsonRes(res);
} catch (err) {

View File

@@ -1,68 +0,0 @@
// Next.js API route support: https://nextjs.org/docs/api-routes/introduction
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, Bill } from '@/service/mongo';
import { authToken } from '@/service/utils/tools';
import type { BillSchema } from '@/types/mongoSchema';
import { VecModelDataIndex } from '@/constants/redis';
import { connectRedis } from '@/service/redis';
import { vectorToBuffer } from '@/utils/tools';
let vectorData = [
-0.025028639, -0.010407282, 0.026523087, -0.0107438695, -0.006967359, 0.010043768, -0.012043097,
0.008724345, -0.028919589, -0.0117738275, 0.0050690062, 0.02961969
].concat(new Array(1524).fill(0));
let vectorData2 = [
0.025028639, 0.010407282, 0.026523087, 0.0107438695, -0.006967359, 0.010043768, -0.012043097,
0.008724345, 0.028919589, 0.0117738275, 0.0050690062, 0.02961969
].concat(new Array(1524).fill(0));
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
if (process.env.NODE_ENV !== 'development') {
throw new Error('不是开发环境');
}
await connectToDatabase();
const redis = await connectRedis();
await redis.sendCommand([
'HMSET',
'model:data:333',
'vector',
vectorToBuffer(vectorData2),
'modelId',
'1133',
'dataId',
'safadfa'
]);
// search
const response = await redis.sendCommand([
'FT.SEARCH',
'idx:model:data:hash',
'@modelId:{1133} @vector:[VECTOR_RANGE 0.15 $blob]=>{$YIELD_DISTANCE_AS: score}',
'RETURN',
'2',
'modelId',
'dataId',
'PARAMS',
'2',
'blob',
vectorToBuffer(vectorData2),
'SORTBY',
'score',
'DIALECT',
'2'
]);
jsonRes(res, {
data: response
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}