dataset save raw file

This commit is contained in:
archer
2023-09-03 22:39:09 +08:00
parent 086ea83fac
commit a754ceaf3b
37 changed files with 347 additions and 144 deletions

View File

@@ -38,7 +38,7 @@ export async function generateQA(): Promise<any> {
prompt: 1,
q: 1,
source: 1,
model: 1
file_id: 1
});
// task preemption
@@ -136,7 +136,8 @@ A2:
kbId,
data: responseList.map((item) => ({
...item,
source: data.source
source: data.source,
file_id: data.file_id
})),
userId,
mode: TrainingModeEnum.index

View File

@@ -38,6 +38,7 @@ export async function generateVector(): Promise<any> {
q: 1,
a: 1,
source: 1,
file_id: 1,
vectorModel: 1
});
@@ -74,6 +75,7 @@ export async function generateVector(): Promise<any> {
q: dataItems[i].q,
a: dataItems[i].a,
source: data.source,
file_id: data.file_id,
vector
}))
});

View File

@@ -49,6 +49,10 @@ const TrainingDataSchema = new Schema({
source: {
type: String,
default: ''
},
file_id: {
type: String,
default: ''
}
});

View File

@@ -42,7 +42,7 @@ export async function dispatchKBSearch(props: Record<string, any>): Promise<KBSe
const res: any = await PgClient.query(
`BEGIN;
SET LOCAL ivfflat.probes = ${global.systemEnv.pgIvfflatProbe || 10};
select kb_id,id,q,a,source from ${PgTrainingTableName} where kb_id IN (${kbList
select kb_id,id,q,a,source,file_id from ${PgTrainingTableName} where kb_id IN (${kbList
.map((item) => `'${item.kbId}'`)
.join(',')}) AND vector <#> '[${vectors[0]}]' < -${similarity} order by vector <#> '[${
vectors[0]

View File

@@ -1,8 +1,8 @@
import { Pool } from 'pg';
import type { QueryResultRow } from 'pg';
import { PgTrainingTableName } from '@/constants/plugin';
import { exit } from 'process';
import { addLog } from './utils/tools';
import { DatasetItemType } from '@/types/plugin';
export const connectPg = async (): Promise<Pool> => {
if (global.pgClient) {
@@ -45,7 +45,7 @@ type DeleteProps = {
where: WhereProps;
};
type ValuesProps = { key: string; value: string | number }[];
type ValuesProps = { key: string; value?: string | number }[];
type UpdateProps = {
values: ValuesProps;
where: WhereProps;
@@ -168,18 +168,16 @@ export const insertKbItem = ({
}: {
userId: string;
kbId: string;
data: {
data: (DatasetItemType & {
vector: number[];
q: string;
a: string;
source?: string;
}[];
})[];
}) => {
return PgClient.insert(PgTrainingTableName, {
values: data.map((item) => [
{ key: 'user_id', value: userId },
{ key: 'kb_id', value: kbId },
{ key: 'source', value: item.source?.slice(0, 30)?.trim() || '' },
{ key: 'file_id', value: item.file_id },
{ key: 'q', value: item.q.replace(/'/g, '"') },
{ key: 'a', value: item.a.replace(/'/g, '"') },
{ key: 'vector', value: `[${item.vector}]` }
@@ -196,10 +194,11 @@ export async function initPg() {
id BIGSERIAL PRIMARY KEY,
vector VECTOR(1536) NOT NULL,
user_id VARCHAR(50) NOT NULL,
kb_id VARCHAR(50) NOT NULL,
kb_id VARCHAR(50),
source VARCHAR(100),
file_id VARCHAR(100),
q TEXT NOT NULL,
a TEXT NOT NULL
a TEXT
);
CREATE INDEX IF NOT EXISTS modelData_userId_index ON ${PgTrainingTableName} USING HASH (user_id);
CREATE INDEX IF NOT EXISTS modelData_kbId_index ON ${PgTrainingTableName} USING HASH (kb_id);