feat: 数据集导出

This commit is contained in:
archer
2023-04-03 00:18:21 +08:00
parent 05b2e9e99c
commit 16a31de1c7
9 changed files with 35 additions and 17 deletions

View File

@@ -33,7 +33,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
VecModelDataIdx,
`@modelId:{${modelId}} @userId:{${userId}}`,
{
RETURN: ['q', 'text', 'vector'],
RETURN: ['q', 'text', 'rawVector'],
LIMIT: {
from: 0,
size: 10000
@@ -42,15 +42,23 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
);
const data = searchRes.documents
.filter((item) => item?.value?.vector)
.filter((item) => {
if (!item?.value?.rawVector) return false;
try {
JSON.parse(item.value.rawVector as string);
return true;
} catch (error) {
return false;
}
})
.map((item: any) => ({
prompt: item.value.q,
completion: item.value.text,
vector: BufferToVector(item.value.vector)
vector: JSON.parse(item.value.rawVector)
}));
jsonRes(res, {
data
data: JSON.stringify(data)
});
} catch (err) {
jsonRes(res, {

View File

@@ -53,7 +53,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
userId,
'modelId',
String(modelId),
...(vector ? ['vector', vectorToBuffer(formatVector(vector))] : []),
...(vector
? ['vector', vectorToBuffer(formatVector(vector)), 'rawVector', JSON.stringify(vector)]
: []),
'q',
item.prompt,
'text',

View File

@@ -36,12 +36,20 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
const textList: string[] = [];
let splitText = '';
/* 取 3k ~ 4K tokens 内容 */
chunks.forEach((chunk) => {
splitText += chunk;
const tokens = encode(splitText).length;
if (tokens >= 980) {
const tokens = encode(splitText + chunk).length;
if (tokens >= 4000) {
// 超过 4000不要这块内容
textList.push(splitText);
splitText = chunk;
} else if (tokens >= 3000) {
// 超过 3000取内容
textList.push(splitText + chunk);
splitText = '';
} else {
//没超过 3000继续添加
splitText += chunk;
}
});