External dataset (#1497)

* perf: read rawText and chunk code

* perf: read raw text

* perf: read rawtext

* perf: token count

* log
This commit is contained in:
Archer
2024-05-16 11:47:53 +08:00
committed by GitHub
parent d5073f98ab
commit c6d9b15897
36 changed files with 531 additions and 267 deletions

View File

@@ -1,41 +1,50 @@
/*
Read db file content and response 3000 words
*/
import type { NextApiRequest, NextApiResponse } from 'next';
import type { NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
import { authFile } from '@fastgpt/service/support/permission/auth/file';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { NextAPI } from '@/service/middle/entry';
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
import { ApiRequestProps } from '@fastgpt/service/type/next';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { fileId, csvFormat } = req.body as { fileId: string; csvFormat?: boolean };
export type PreviewContextProps = {
type: DatasetSourceReadTypeEnum;
sourceId: string;
isQAImport?: boolean;
selector?: string;
};
if (!fileId) {
throw new Error('fileId is empty');
}
async function handler(req: ApiRequestProps<PreviewContextProps>, res: NextApiResponse<any>) {
const { type, sourceId, isQAImport, selector } = req.body;
const { teamId } = await authFile({ req, authToken: true, fileId });
const { rawText } = await readFileContentFromMongo({
teamId,
bucketName: BucketNameEnum.dataset,
fileId,
csvFormat
});
jsonRes(res, {
data: {
previewContent: rawText.slice(0, 3000),
totalLength: rawText.length
}
});
} catch (error) {
jsonRes(res, {
code: 500,
error
});
if (!sourceId) {
throw new Error('fileId is empty');
}
const { teamId } = await (async () => {
if (type === DatasetSourceReadTypeEnum.fileLocal) {
return authFile({ req, authToken: true, authApiKey: true, fileId: sourceId });
}
return authCert({ req, authApiKey: true, authToken: true });
})();
const rawText = await readDatasetSourceRawText({
teamId,
type,
sourceId: sourceId,
isQAImport,
selector
});
jsonRes(res, {
data: {
previewContent: rawText.slice(0, 3000),
totalLength: rawText.length
}
});
}
export default NextAPI(handler);