4.7.1-alpha (#1120)

Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
Archer
2024-04-03 18:14:09 +08:00
committed by GitHub
parent 9ae581e09b
commit 8a46372418
76 changed files with 3129 additions and 2104 deletions

View File

@@ -6,16 +6,9 @@ import { DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
import { MongoFileSchema } from './schema';
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { readFileRawText } from '../read/rawText';
import { ReadFileByBufferParams } from '../read/type';
import { readMarkdown } from '../read/markdown';
import { readHtmlRawText } from '../read/html';
import { readPdfFile } from '../read/pdf';
import { readWordFile } from '../read/word';
import { readCsvRawText } from '../read/csv';
import { MongoRwaTextBuffer } from '../../buffer/rawText/schema';
import { readPptxRawText } from '../read/pptx';
import { readXlsxRawText } from '../read/xlsx';
import { readFileRawContent } from '../read/utils';
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
MongoFileSchema;
@@ -146,7 +139,7 @@ export const readFileEncode = async ({
return encoding as BufferEncoding;
};
export const readFileContent = async ({
export const readFileContentFromMongo = async ({
teamId,
bucketName,
fileId,
@@ -205,47 +198,14 @@ export const readFileContent = async ({
}
};
const { rawText } = await (async () => {
switch (extension) {
case 'txt':
return readFileRawText(params);
case 'md':
return readMarkdown(params);
case 'html':
return readHtmlRawText(params);
case 'pdf':
return readPdfFile(params);
case 'docx':
return readWordFile(params);
case 'pptx':
return readPptxRawText(params);
case 'xlsx':
const xlsxResult = await readXlsxRawText(params);
if (csvFormat) {
return {
rawText: xlsxResult.formatText || ''
};
}
return {
rawText: xlsxResult.rawText
};
case 'csv':
const csvResult = await readCsvRawText(params);
if (csvFormat) {
return {
rawText: csvResult.formatText || ''
};
}
return {
rawText: csvResult.rawText
};
default:
return Promise.reject('Only support .txt, .md, .html, .pdf, .docx, pptx, .csv, .xlsx');
}
})();
const { rawText } = await readFileRawContent({
extension,
csvFormat,
params
});
if (rawText.trim()) {
await MongoRwaTextBuffer.create({
MongoRwaTextBuffer.create({
sourceId: fileId,
rawText,
metadata: {

View File

@@ -2,6 +2,15 @@ import { markdownProcess } from '@fastgpt/global/common/string/markdown';
import { uploadMongoImg } from '../image/controller';
import { MongoImageTypeEnum } from '@fastgpt/global/common/file/image/constants';
import { addHours } from 'date-fns';
import { ReadFileByBufferParams } from './type';
import { readFileRawText } from '../read/rawText';
import { readMarkdown } from '../read/markdown';
import { readHtmlRawText } from '../read/html';
import { readPdfFile } from '../read/pdf';
import { readWordFile } from '../read/word';
import { readCsvRawText } from '../read/csv';
import { readPptxRawText } from '../read/pptx';
import { readXlsxRawText } from '../read/xlsx';
export const initMarkdownText = ({
teamId,
@@ -23,3 +32,50 @@ export const initMarkdownText = ({
expiredTime: addHours(new Date(), 2)
})
});
export const readFileRawContent = async ({
extension,
csvFormat,
params
}: {
csvFormat?: boolean;
extension: string;
params: ReadFileByBufferParams;
}) => {
switch (extension) {
case 'txt':
return readFileRawText(params);
case 'md':
return readMarkdown(params);
case 'html':
return readHtmlRawText(params);
case 'pdf':
return readPdfFile(params);
case 'docx':
return readWordFile(params);
case 'pptx':
return readPptxRawText(params);
case 'xlsx':
const xlsxResult = await readXlsxRawText(params);
if (csvFormat) {
return {
rawText: xlsxResult.formatText || ''
};
}
return {
rawText: xlsxResult.rawText
};
case 'csv':
const csvResult = await readCsvRawText(params);
if (csvFormat) {
return {
rawText: csvResult.formatText || ''
};
}
return {
rawText: csvResult.rawText
};
default:
return Promise.reject('Only support .txt, .md, .html, .pdf, .docx, pptx, .csv, .xlsx');
}
};

View File

@@ -55,8 +55,8 @@ export const clearTmpUploadFiles = () => {
fs.stat(filePath, (err, stats) => {
if (err) return;
// 如果文件是在1小时前上传的,则认为是临时文件并删除它
if (Date.now() - stats.mtime.getTime() > 1 * 60 * 60 * 1000) {
// 如果文件是在2小时前上传的,则认为是临时文件并删除它
if (Date.now() - stats.mtime.getTime() > 2 * 60 * 60 * 1000) {
fs.unlink(filePath, (err) => {
if (err) return;
console.log(`Deleted temp file: ${filePath}`);

View File

@@ -0,0 +1,15 @@
export enum TimerIdEnum {
checkInValidDatasetFiles = 'checkInValidDatasetFiles',
checkInvalidDatasetData = 'checkInvalidDatasetData',
checkInvalidVector = 'checkInvalidVector',
clearExpiredSubPlan = 'clearExpiredSubPlan',
updateStandardPlan = 'updateStandardPlan'
}
export const timerIdMap = {
[TimerIdEnum.checkInValidDatasetFiles]: 'checkInValidDatasetFiles',
[TimerIdEnum.checkInvalidDatasetData]: 'checkInvalidDatasetData',
[TimerIdEnum.checkInvalidVector]: 'checkInvalidVector',
[TimerIdEnum.clearExpiredSubPlan]: 'clearExpiredSubPlan',
[TimerIdEnum.updateStandardPlan]: 'updateStandardPlan'
};

View File

@@ -0,0 +1,29 @@
import { connectionMongo, type Model } from '../../mongo';
import { timerIdMap } from './constants';
const { Schema, model, models } = connectionMongo;
import { TimerLockSchemaType } from './type.d';
export const collectionName = 'systemtimerlocks';
const TimerLockSchema = new Schema({
timerId: {
type: String,
required: true,
unique: true,
enum: Object.keys(timerIdMap)
},
expiredTime: {
type: Date,
required: true
}
});
try {
TimerLockSchema.index({ expiredTime: 1 }, { expireAfterSeconds: 5 });
} catch (error) {
console.log(error);
}
export const MongoTimerLock: Model<TimerLockSchemaType> =
models[collectionName] || model(collectionName, TimerLockSchema);
MongoTimerLock.syncIndexes();

View File

@@ -0,0 +1,5 @@
export type TimerLockSchemaType = {
_id: string;
timerId: string;
expiredTime: Date;
};

View File

@@ -0,0 +1,25 @@
import { TimerIdEnum } from './constants';
import { MongoTimerLock } from './schema';
import { addMinutes } from 'date-fns';
/*
利用唯一健,使得同一时间只有一个任务在执行,后创建的锁,会因唯一健创建失败,从而无法继续执行任务
*/
export const checkTimerLock = async ({
timerId,
lockMinuted
}: {
timerId: `${TimerIdEnum}`;
lockMinuted: number;
}) => {
try {
await MongoTimerLock.create({
timerId,
expiredTime: addMinutes(new Date(), lockMinuted)
});
return true;
} catch (error) {
return false;
}
};