dataset save raw file
This commit is contained in:
@@ -38,7 +38,7 @@ export async function generateQA(): Promise<any> {
|
||||
prompt: 1,
|
||||
q: 1,
|
||||
source: 1,
|
||||
model: 1
|
||||
file_id: 1
|
||||
});
|
||||
|
||||
// task preemption
|
||||
@@ -136,7 +136,8 @@ A2:
|
||||
kbId,
|
||||
data: responseList.map((item) => ({
|
||||
...item,
|
||||
source: data.source
|
||||
source: data.source,
|
||||
file_id: data.file_id
|
||||
})),
|
||||
userId,
|
||||
mode: TrainingModeEnum.index
|
||||
|
||||
@@ -38,6 +38,7 @@ export async function generateVector(): Promise<any> {
|
||||
q: 1,
|
||||
a: 1,
|
||||
source: 1,
|
||||
file_id: 1,
|
||||
vectorModel: 1
|
||||
});
|
||||
|
||||
@@ -74,6 +75,7 @@ export async function generateVector(): Promise<any> {
|
||||
q: dataItems[i].q,
|
||||
a: dataItems[i].a,
|
||||
source: data.source,
|
||||
file_id: data.file_id,
|
||||
vector
|
||||
}))
|
||||
});
|
||||
|
||||
@@ -49,6 +49,10 @@ const TrainingDataSchema = new Schema({
|
||||
source: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
file_id: {
|
||||
type: String,
|
||||
default: ''
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ export async function dispatchKBSearch(props: Record<string, any>): Promise<KBSe
|
||||
const res: any = await PgClient.query(
|
||||
`BEGIN;
|
||||
SET LOCAL ivfflat.probes = ${global.systemEnv.pgIvfflatProbe || 10};
|
||||
select kb_id,id,q,a,source from ${PgTrainingTableName} where kb_id IN (${kbList
|
||||
select kb_id,id,q,a,source,file_id from ${PgTrainingTableName} where kb_id IN (${kbList
|
||||
.map((item) => `'${item.kbId}'`)
|
||||
.join(',')}) AND vector <#> '[${vectors[0]}]' < -${similarity} order by vector <#> '[${
|
||||
vectors[0]
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { Pool } from 'pg';
|
||||
import type { QueryResultRow } from 'pg';
|
||||
import { PgTrainingTableName } from '@/constants/plugin';
|
||||
import { exit } from 'process';
|
||||
import { addLog } from './utils/tools';
|
||||
import { DatasetItemType } from '@/types/plugin';
|
||||
|
||||
export const connectPg = async (): Promise<Pool> => {
|
||||
if (global.pgClient) {
|
||||
@@ -45,7 +45,7 @@ type DeleteProps = {
|
||||
where: WhereProps;
|
||||
};
|
||||
|
||||
type ValuesProps = { key: string; value: string | number }[];
|
||||
type ValuesProps = { key: string; value?: string | number }[];
|
||||
type UpdateProps = {
|
||||
values: ValuesProps;
|
||||
where: WhereProps;
|
||||
@@ -168,18 +168,16 @@ export const insertKbItem = ({
|
||||
}: {
|
||||
userId: string;
|
||||
kbId: string;
|
||||
data: {
|
||||
data: (DatasetItemType & {
|
||||
vector: number[];
|
||||
q: string;
|
||||
a: string;
|
||||
source?: string;
|
||||
}[];
|
||||
})[];
|
||||
}) => {
|
||||
return PgClient.insert(PgTrainingTableName, {
|
||||
values: data.map((item) => [
|
||||
{ key: 'user_id', value: userId },
|
||||
{ key: 'kb_id', value: kbId },
|
||||
{ key: 'source', value: item.source?.slice(0, 30)?.trim() || '' },
|
||||
{ key: 'file_id', value: item.file_id },
|
||||
{ key: 'q', value: item.q.replace(/'/g, '"') },
|
||||
{ key: 'a', value: item.a.replace(/'/g, '"') },
|
||||
{ key: 'vector', value: `[${item.vector}]` }
|
||||
@@ -196,10 +194,11 @@ export async function initPg() {
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
vector VECTOR(1536) NOT NULL,
|
||||
user_id VARCHAR(50) NOT NULL,
|
||||
kb_id VARCHAR(50) NOT NULL,
|
||||
kb_id VARCHAR(50),
|
||||
source VARCHAR(100),
|
||||
file_id VARCHAR(100),
|
||||
q TEXT NOT NULL,
|
||||
a TEXT NOT NULL
|
||||
a TEXT
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS modelData_userId_index ON ${PgTrainingTableName} USING HASH (user_id);
|
||||
CREATE INDEX IF NOT EXISTS modelData_kbId_index ON ${PgTrainingTableName} USING HASH (kb_id);
|
||||
|
||||
Reference in New Issue
Block a user