mirror of
https://github.com/EthanMarti/infio-copilot.git
synced 2026-05-08 16:10:09 +00:00
fix trans tool
This commit is contained in:
@@ -6,6 +6,7 @@ import { createAndInitDb } from '../pgworker'
|
||||
|
||||
import { CommandManager } from './modules/command/command-manager'
|
||||
import { ConversationManager } from './modules/conversation/conversation-manager'
|
||||
import { InsightManager } from './modules/insight/insight-manager'
|
||||
import { VectorManager } from './modules/vector/vector-manager'
|
||||
|
||||
export class DBManager {
|
||||
@@ -14,6 +15,7 @@ export class DBManager {
|
||||
private vectorManager: VectorManager
|
||||
private CommandManager: CommandManager
|
||||
private conversationManager: ConversationManager
|
||||
private insightManager: InsightManager
|
||||
|
||||
constructor(app: App) {
|
||||
this.app = app
|
||||
@@ -26,6 +28,7 @@ export class DBManager {
|
||||
dbManager.vectorManager = new VectorManager(app, dbManager)
|
||||
dbManager.CommandManager = new CommandManager(app, dbManager)
|
||||
dbManager.conversationManager = new ConversationManager(app, dbManager)
|
||||
dbManager.insightManager = new InsightManager(app, dbManager)
|
||||
|
||||
return dbManager
|
||||
}
|
||||
@@ -46,6 +49,10 @@ export class DBManager {
|
||||
return this.conversationManager
|
||||
}
|
||||
|
||||
getInsightManager(): InsightManager {
|
||||
return this.insightManager
|
||||
}
|
||||
|
||||
async cleanup() {
|
||||
this.db?.close()
|
||||
this.db = null
|
||||
|
||||
2
src/database/modules/insight/index.ts
Normal file
2
src/database/modules/insight/index.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
export { InsightRepository } from './insight-repository'
|
||||
export { InsightManager } from './insight-manager'
|
||||
321
src/database/modules/insight/insight-manager.ts
Normal file
321
src/database/modules/insight/insight-manager.ts
Normal file
@@ -0,0 +1,321 @@
|
||||
import { App, TFile } from 'obsidian'
|
||||
|
||||
import { InsertSourceInsight, SelectSourceInsight } from '../../schema'
|
||||
import { EmbeddingModel } from '../../../types/embedding'
|
||||
import { DBManager } from '../../database-manager'
|
||||
|
||||
import { InsightRepository } from './insight-repository'
|
||||
|
||||
export class InsightManager {
|
||||
private app: App
|
||||
private repository: InsightRepository
|
||||
private dbManager: DBManager
|
||||
|
||||
constructor(app: App, dbManager: DBManager) {
|
||||
this.app = app
|
||||
this.dbManager = dbManager
|
||||
this.repository = new InsightRepository(app, dbManager.getPgClient())
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行洞察相似性搜索
|
||||
*/
|
||||
async performSimilaritySearch(
|
||||
queryVector: number[],
|
||||
embeddingModel: EmbeddingModel,
|
||||
options: {
|
||||
minSimilarity: number
|
||||
limit: number
|
||||
insightTypes?: string[]
|
||||
sourceTypes?: ('document' | 'tag' | 'folder')[]
|
||||
sourcePaths?: string[]
|
||||
},
|
||||
): Promise<
|
||||
(Omit<SelectSourceInsight, 'embedding'> & {
|
||||
similarity: number
|
||||
})[]
|
||||
> {
|
||||
return await this.repository.performSimilaritySearch(
|
||||
queryVector,
|
||||
embeddingModel,
|
||||
options,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* 存储单个洞察
|
||||
*/
|
||||
async storeInsight(
|
||||
insightData: {
|
||||
insightType: string
|
||||
insight: string
|
||||
sourceType: 'document' | 'tag' | 'folder'
|
||||
sourcePath: string
|
||||
embedding: number[]
|
||||
},
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
const insertData: InsertSourceInsight = {
|
||||
insight_type: insightData.insightType,
|
||||
insight: insightData.insight,
|
||||
source_type: insightData.sourceType,
|
||||
source_path: insightData.sourcePath,
|
||||
embedding: insightData.embedding,
|
||||
}
|
||||
|
||||
await this.repository.insertInsights([insertData], embeddingModel)
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量存储洞察
|
||||
*/
|
||||
async storeBatchInsights(
|
||||
insightsData: Array<{
|
||||
insightType: string
|
||||
insight: string
|
||||
sourceType: 'document' | 'tag' | 'folder'
|
||||
sourcePath: string
|
||||
embedding: number[]
|
||||
}>,
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
const insertData: InsertSourceInsight[] = insightsData.map(data => ({
|
||||
insight_type: data.insightType,
|
||||
insight: data.insight,
|
||||
source_type: data.sourceType,
|
||||
source_path: data.sourcePath,
|
||||
embedding: data.embedding,
|
||||
}))
|
||||
|
||||
await this.repository.insertInsights(insertData, embeddingModel)
|
||||
}
|
||||
|
||||
/**
|
||||
* 更新现有洞察
|
||||
*/
|
||||
async updateInsight(
|
||||
id: number,
|
||||
updates: {
|
||||
insightType?: string
|
||||
insight?: string
|
||||
sourceType?: 'document' | 'tag' | 'folder'
|
||||
sourcePath?: string
|
||||
embedding?: number[]
|
||||
},
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
const updateData: Partial<InsertSourceInsight> = {}
|
||||
|
||||
if (updates.insightType !== undefined) {
|
||||
updateData.insight_type = updates.insightType
|
||||
}
|
||||
if (updates.insight !== undefined) {
|
||||
updateData.insight = updates.insight
|
||||
}
|
||||
if (updates.sourceType !== undefined) {
|
||||
updateData.source_type = updates.sourceType
|
||||
}
|
||||
if (updates.sourcePath !== undefined) {
|
||||
updateData.source_path = updates.sourcePath
|
||||
}
|
||||
if (updates.embedding !== undefined) {
|
||||
updateData.embedding = updates.embedding
|
||||
}
|
||||
|
||||
await this.repository.updateInsight(id, updateData, embeddingModel)
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取所有洞察
|
||||
*/
|
||||
async getAllInsights(embeddingModel: EmbeddingModel): Promise<SelectSourceInsight[]> {
|
||||
return await this.repository.getAllInsights(embeddingModel)
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据源路径获取洞察
|
||||
*/
|
||||
async getInsightsBySourcePath(
|
||||
sourcePath: string,
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<SelectSourceInsight[]> {
|
||||
return await this.repository.getInsightsBySourcePath(sourcePath, embeddingModel)
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据洞察类型获取洞察
|
||||
*/
|
||||
async getInsightsByType(
|
||||
insightType: string,
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<SelectSourceInsight[]> {
|
||||
return await this.repository.getInsightsByType(insightType, embeddingModel)
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据源类型获取洞察
|
||||
*/
|
||||
async getInsightsBySourceType(
|
||||
sourceType: 'document' | 'tag' | 'folder',
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<SelectSourceInsight[]> {
|
||||
return await this.repository.getInsightsBySourceType(sourceType, embeddingModel)
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除指定源路径的所有洞察
|
||||
*/
|
||||
async deleteInsightsBySourcePath(
|
||||
sourcePath: string,
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
await this.repository.deleteInsightsBySourcePath(sourcePath, embeddingModel)
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量删除多个源路径的洞察
|
||||
*/
|
||||
async deleteInsightsBySourcePaths(
|
||||
sourcePaths: string[],
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
await this.repository.deleteInsightsBySourcePaths(sourcePaths, embeddingModel)
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除指定类型的所有洞察
|
||||
*/
|
||||
async deleteInsightsByType(
|
||||
insightType: string,
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
await this.repository.deleteInsightsByType(insightType, embeddingModel)
|
||||
}
|
||||
|
||||
/**
|
||||
* 清空所有洞察
|
||||
*/
|
||||
async clearAllInsights(embeddingModel: EmbeddingModel): Promise<void> {
|
||||
await this.repository.clearAllInsights(embeddingModel)
|
||||
}
|
||||
|
||||
/**
|
||||
* 文件删除时清理相关洞察
|
||||
*/
|
||||
async cleanInsightsForDeletedFile(
|
||||
file: TFile,
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
await this.repository.deleteInsightsBySourcePath(file.path, embeddingModel)
|
||||
}
|
||||
|
||||
/**
|
||||
* 文件重命名时更新洞察路径
|
||||
*/
|
||||
async updateInsightsForRenamedFile(
|
||||
oldPath: string,
|
||||
newPath: string,
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
// 获取旧路径的所有洞察
|
||||
const insights = await this.repository.getInsightsBySourcePath(oldPath, embeddingModel)
|
||||
|
||||
// 批量更新路径
|
||||
for (const insight of insights) {
|
||||
await this.repository.updateInsight(
|
||||
insight.id,
|
||||
{ source_path: newPath },
|
||||
embeddingModel
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 清理已删除文件的洞察(批量清理)
|
||||
*/
|
||||
async cleanInsightsForDeletedFiles(embeddingModel: EmbeddingModel): Promise<void> {
|
||||
const allInsights = await this.repository.getAllInsights(embeddingModel)
|
||||
const pathsToDelete: string[] = []
|
||||
|
||||
for (const insight of allInsights) {
|
||||
if (insight.source_type === 'document') {
|
||||
// 检查文件是否还存在
|
||||
const file = this.app.vault.getAbstractFileByPath(insight.source_path)
|
||||
if (!file) {
|
||||
pathsToDelete.push(insight.source_path)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pathsToDelete.length > 0) {
|
||||
await this.repository.deleteInsightsBySourcePaths(pathsToDelete, embeddingModel)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取洞察统计信息
|
||||
*/
|
||||
async getInsightStats(embeddingModel: EmbeddingModel): Promise<{
|
||||
total: number
|
||||
byType: Record<string, number>
|
||||
bySourceType: Record<string, number>
|
||||
}> {
|
||||
const allInsights = await this.repository.getAllInsights(embeddingModel)
|
||||
|
||||
const stats = {
|
||||
total: allInsights.length,
|
||||
byType: {} as Record<string, number>,
|
||||
bySourceType: {} as Record<string, number>,
|
||||
}
|
||||
|
||||
for (const insight of allInsights) {
|
||||
// 统计洞察类型
|
||||
stats.byType[insight.insight_type] = (stats.byType[insight.insight_type] || 0) + 1
|
||||
|
||||
// 统计源类型
|
||||
stats.bySourceType[insight.source_type] = (stats.bySourceType[insight.source_type] || 0) + 1
|
||||
}
|
||||
|
||||
return stats
|
||||
}
|
||||
|
||||
/**
|
||||
* 搜索洞察(文本搜索,非向量搜索)
|
||||
*/
|
||||
async searchInsightsByText(
|
||||
searchText: string,
|
||||
embeddingModel: EmbeddingModel,
|
||||
options?: {
|
||||
insightTypes?: string[]
|
||||
sourceTypes?: ('document' | 'tag' | 'folder')[]
|
||||
limit?: number
|
||||
}
|
||||
): Promise<SelectSourceInsight[]> {
|
||||
// 这里可以实现基于文本的搜索逻辑
|
||||
// 目前先返回所有洞察,然后在内存中过滤
|
||||
const allInsights = await this.repository.getAllInsights(embeddingModel)
|
||||
|
||||
let filteredInsights = allInsights.filter(insight =>
|
||||
insight.insight.toLowerCase().includes(searchText.toLowerCase()) ||
|
||||
insight.insight_type.toLowerCase().includes(searchText.toLowerCase())
|
||||
)
|
||||
|
||||
if (options?.insightTypes) {
|
||||
filteredInsights = filteredInsights.filter(insight =>
|
||||
options.insightTypes!.includes(insight.insight_type)
|
||||
)
|
||||
}
|
||||
|
||||
if (options?.sourceTypes) {
|
||||
filteredInsights = filteredInsights.filter(insight =>
|
||||
options.sourceTypes!.includes(insight.source_type)
|
||||
)
|
||||
}
|
||||
|
||||
if (options?.limit) {
|
||||
filteredInsights = filteredInsights.slice(0, options.limit)
|
||||
}
|
||||
|
||||
return filteredInsights
|
||||
}
|
||||
}
|
||||
274
src/database/modules/insight/insight-repository.ts
Normal file
274
src/database/modules/insight/insight-repository.ts
Normal file
@@ -0,0 +1,274 @@
|
||||
import { PGliteInterface } from '@electric-sql/pglite'
|
||||
import { App } from 'obsidian'
|
||||
|
||||
import { EmbeddingModel } from '../../../types/embedding'
|
||||
import { DatabaseNotInitializedException } from '../../exception'
|
||||
import { InsertSourceInsight, SelectSourceInsight, sourceInsightTables } from '../../schema'
|
||||
|
||||
export class InsightRepository {
|
||||
private app: App
|
||||
private db: PGliteInterface | null
|
||||
|
||||
constructor(app: App, pgClient: PGliteInterface | null) {
|
||||
this.app = app
|
||||
this.db = pgClient
|
||||
}
|
||||
|
||||
private getTableName(embeddingModel: EmbeddingModel): string {
|
||||
const tableDefinition = sourceInsightTables[embeddingModel.dimension]
|
||||
if (!tableDefinition) {
|
||||
throw new Error(`No source insight table definition found for model: ${embeddingModel.id}`)
|
||||
}
|
||||
return tableDefinition.name
|
||||
}
|
||||
|
||||
async getAllInsights(embeddingModel: EmbeddingModel): Promise<SelectSourceInsight[]> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
const result = await this.db.query<SelectSourceInsight>(
|
||||
`SELECT * FROM "${tableName}" ORDER BY created_at DESC`
|
||||
)
|
||||
return result.rows
|
||||
}
|
||||
|
||||
async getInsightsBySourcePath(
|
||||
sourcePath: string,
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<SelectSourceInsight[]> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
const result = await this.db.query<SelectSourceInsight>(
|
||||
`SELECT * FROM "${tableName}" WHERE source_path = $1 ORDER BY created_at DESC`,
|
||||
[sourcePath]
|
||||
)
|
||||
return result.rows
|
||||
}
|
||||
|
||||
async getInsightsByType(
|
||||
insightType: string,
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<SelectSourceInsight[]> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
const result = await this.db.query<SelectSourceInsight>(
|
||||
`SELECT * FROM "${tableName}" WHERE insight_type = $1 ORDER BY created_at DESC`,
|
||||
[insightType]
|
||||
)
|
||||
return result.rows
|
||||
}
|
||||
|
||||
async getInsightsBySourceType(
|
||||
sourceType: 'document' | 'tag' | 'folder',
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<SelectSourceInsight[]> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
const result = await this.db.query<SelectSourceInsight>(
|
||||
`SELECT * FROM "${tableName}" WHERE source_type = $1 ORDER BY created_at DESC`,
|
||||
[sourceType]
|
||||
)
|
||||
return result.rows
|
||||
}
|
||||
|
||||
async deleteInsightsBySourcePath(
|
||||
sourcePath: string,
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
await this.db.query(
|
||||
`DELETE FROM "${tableName}" WHERE source_path = $1`,
|
||||
[sourcePath]
|
||||
)
|
||||
}
|
||||
|
||||
async deleteInsightsBySourcePaths(
|
||||
sourcePaths: string[],
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
await this.db.query(
|
||||
`DELETE FROM "${tableName}" WHERE source_path = ANY($1)`,
|
||||
[sourcePaths]
|
||||
)
|
||||
}
|
||||
|
||||
async deleteInsightsByType(
|
||||
insightType: string,
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
await this.db.query(
|
||||
`DELETE FROM "${tableName}" WHERE insight_type = $1`,
|
||||
[insightType]
|
||||
)
|
||||
}
|
||||
|
||||
async clearAllInsights(embeddingModel: EmbeddingModel): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
await this.db.query(`DELETE FROM "${tableName}"`)
|
||||
}
|
||||
|
||||
async insertInsights(
|
||||
data: InsertSourceInsight[],
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
|
||||
// 构建批量插入的 SQL
|
||||
const values = data.map((insight, index) => {
|
||||
const offset = index * 6
|
||||
return `($${offset + 1}, $${offset + 2}, $${offset + 3}, $${offset + 4}, $${offset + 5}, $${offset + 6})`
|
||||
}).join(',')
|
||||
|
||||
const params = data.flatMap(insight => [
|
||||
insight.insight_type,
|
||||
insight.insight.replace(/\0/g, ''), // 清理null字节
|
||||
insight.source_type,
|
||||
insight.source_path,
|
||||
`[${insight.embedding.join(',')}]`, // 转换为PostgreSQL vector格式
|
||||
new Date() // updated_at
|
||||
])
|
||||
|
||||
await this.db.query(
|
||||
`INSERT INTO "${tableName}" (insight_type, insight, source_type, source_path, embedding, updated_at)
|
||||
VALUES ${values}`,
|
||||
params
|
||||
)
|
||||
}
|
||||
|
||||
async updateInsight(
|
||||
id: number,
|
||||
data: Partial<InsertSourceInsight>,
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
|
||||
const fields: string[] = []
|
||||
const params: unknown[] = []
|
||||
let paramIndex = 1
|
||||
|
||||
if (data.insight_type !== undefined) {
|
||||
fields.push(`insight_type = $${paramIndex}`)
|
||||
params.push(data.insight_type)
|
||||
paramIndex++
|
||||
}
|
||||
|
||||
if (data.insight !== undefined) {
|
||||
fields.push(`insight = $${paramIndex}`)
|
||||
params.push(data.insight.replace(/\0/g, ''))
|
||||
paramIndex++
|
||||
}
|
||||
|
||||
if (data.source_type !== undefined) {
|
||||
fields.push(`source_type = $${paramIndex}`)
|
||||
params.push(data.source_type)
|
||||
paramIndex++
|
||||
}
|
||||
|
||||
if (data.source_path !== undefined) {
|
||||
fields.push(`source_path = $${paramIndex}`)
|
||||
params.push(data.source_path)
|
||||
paramIndex++
|
||||
}
|
||||
|
||||
if (data.embedding !== undefined) {
|
||||
fields.push(`embedding = $${paramIndex}`)
|
||||
params.push(`[${data.embedding.join(',')}]`)
|
||||
paramIndex++
|
||||
}
|
||||
|
||||
fields.push(`updated_at = $${paramIndex}`)
|
||||
params.push(new Date())
|
||||
paramIndex++
|
||||
|
||||
params.push(id)
|
||||
|
||||
await this.db.query(
|
||||
`UPDATE "${tableName}" SET ${fields.join(', ')} WHERE id = $${paramIndex}`,
|
||||
params
|
||||
)
|
||||
}
|
||||
|
||||
async performSimilaritySearch(
|
||||
queryVector: number[],
|
||||
embeddingModel: EmbeddingModel,
|
||||
options: {
|
||||
minSimilarity: number
|
||||
limit: number
|
||||
insightTypes?: string[]
|
||||
sourceTypes?: ('document' | 'tag' | 'folder')[]
|
||||
sourcePaths?: string[]
|
||||
},
|
||||
): Promise<
|
||||
(Omit<SelectSourceInsight, 'embedding'> & {
|
||||
similarity: number
|
||||
})[]
|
||||
> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
|
||||
let whereConditions = ['1 - (embedding <=> $1::vector) > $2']
|
||||
const params: unknown[] = [`[${queryVector.join(',')}]`, options.minSimilarity, options.limit]
|
||||
let paramIndex = 4
|
||||
|
||||
if (options.insightTypes && options.insightTypes.length > 0) {
|
||||
whereConditions.push(`insight_type = ANY($${paramIndex})`)
|
||||
params.push(options.insightTypes)
|
||||
paramIndex++
|
||||
}
|
||||
|
||||
if (options.sourceTypes && options.sourceTypes.length > 0) {
|
||||
whereConditions.push(`source_type = ANY($${paramIndex})`)
|
||||
params.push(options.sourceTypes)
|
||||
paramIndex++
|
||||
}
|
||||
|
||||
if (options.sourcePaths && options.sourcePaths.length > 0) {
|
||||
whereConditions.push(`source_path = ANY($${paramIndex})`)
|
||||
params.push(options.sourcePaths)
|
||||
paramIndex++
|
||||
}
|
||||
|
||||
const query = `
|
||||
SELECT
|
||||
id, insight_type, insight, source_type, source_path, created_at, updated_at,
|
||||
1 - (embedding <=> $1::vector) as similarity
|
||||
FROM "${tableName}"
|
||||
WHERE ${whereConditions.join(' AND ')}
|
||||
ORDER BY similarity DESC
|
||||
LIMIT $3
|
||||
`
|
||||
|
||||
type SearchResult = Omit<SelectSourceInsight, 'embedding'> & { similarity: number }
|
||||
const result = await this.db.query<SearchResult>(query, params)
|
||||
return result.rows
|
||||
}
|
||||
}
|
||||
@@ -136,7 +136,7 @@ export class VectorRepository {
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
|
||||
let scopeCondition = ''
|
||||
const params: any[] = [`[${queryVector.join(',')}]`, options.minSimilarity, options.limit]
|
||||
const params: unknown[] = [`[${queryVector.join(',')}]`, options.minSimilarity, options.limit]
|
||||
let paramIndex = 4
|
||||
|
||||
if (options.scope) {
|
||||
|
||||
@@ -176,3 +176,63 @@ export type SelectMessage = {
|
||||
similarity_search_results?: string | null
|
||||
created_at: Date
|
||||
}
|
||||
|
||||
/* Source Insight Table */
|
||||
export type SourceInsightRecord = {
|
||||
id: number
|
||||
insight_type: string
|
||||
insight: string
|
||||
source_type: 'document' | 'tag' | 'folder'
|
||||
source_path: string
|
||||
embedding: number[]
|
||||
created_at: Date
|
||||
updated_at: Date
|
||||
}
|
||||
|
||||
export type SelectSourceInsight = SourceInsightRecord
|
||||
export type InsertSourceInsight = Omit<SourceInsightRecord, 'id' | 'created_at' | 'updated_at'>
|
||||
|
||||
const createSourceInsightTable = (dimension: number): TableDefinition => {
|
||||
const tableName = `source_insight_${dimension}`
|
||||
|
||||
const table: TableDefinition = {
|
||||
name: tableName,
|
||||
columns: {
|
||||
id: { type: 'SERIAL', primaryKey: true },
|
||||
insight_type: { type: 'TEXT', notNull: true },
|
||||
insight: { type: 'TEXT', notNull: true },
|
||||
source_type: { type: 'TEXT', notNull: true },
|
||||
source_path: { type: 'TEXT', notNull: true },
|
||||
embedding: { type: 'VECTOR', dimensions: dimension },
|
||||
created_at: { type: 'TIMESTAMP', notNull: true, defaultNow: true },
|
||||
updated_at: { type: 'TIMESTAMP', notNull: true, defaultNow: true }
|
||||
}
|
||||
}
|
||||
|
||||
if (dimension <= 2000) {
|
||||
table.indices = {
|
||||
[`insightEmbeddingIndex_${dimension}`]: {
|
||||
type: 'HNSW',
|
||||
columns: ['embedding'],
|
||||
options: 'vector_cosine_ops'
|
||||
},
|
||||
[`insightSourceIndex_${dimension}`]: {
|
||||
type: 'BTREE',
|
||||
columns: ['source_path']
|
||||
},
|
||||
[`insightTypeIndex_${dimension}`]: {
|
||||
type: 'BTREE',
|
||||
columns: ['insight_type']
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return table
|
||||
}
|
||||
|
||||
export const sourceInsightTables = SUPPORT_EMBEDDING_SIMENTION.reduce<
|
||||
Record<number, TableDefinition>
|
||||
>((acc, dimension) => {
|
||||
acc[dimension] = createSourceInsightTable(dimension)
|
||||
return acc
|
||||
}, {})
|
||||
|
||||
@@ -94,6 +94,119 @@ export const migrations: Record<string, SqlMigration> = {
|
||||
ON "embeddings_384" ("path");
|
||||
`
|
||||
},
|
||||
source_insight: {
|
||||
description: "Creates source insight tables and indexes for different embedding models",
|
||||
sql: `
|
||||
-- Create source insight tables for different embedding dimensions
|
||||
CREATE TABLE IF NOT EXISTS "source_insight_1536" (
|
||||
"id" serial PRIMARY KEY NOT NULL,
|
||||
"insight_type" text NOT NULL,
|
||||
"insight" text NOT NULL,
|
||||
"source_type" text NOT NULL,
|
||||
"source_path" text NOT NULL,
|
||||
"embedding" vector(1536),
|
||||
"created_at" timestamp DEFAULT now() NOT NULL,
|
||||
"updated_at" timestamp DEFAULT now() NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS "source_insight_1024" (
|
||||
"id" serial PRIMARY KEY NOT NULL,
|
||||
"insight_type" text NOT NULL,
|
||||
"insight" text NOT NULL,
|
||||
"source_type" text NOT NULL,
|
||||
"source_path" text NOT NULL,
|
||||
"embedding" vector(1024),
|
||||
"created_at" timestamp DEFAULT now() NOT NULL,
|
||||
"updated_at" timestamp DEFAULT now() NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS "source_insight_768" (
|
||||
"id" serial PRIMARY KEY NOT NULL,
|
||||
"insight_type" text NOT NULL,
|
||||
"insight" text NOT NULL,
|
||||
"source_type" text NOT NULL,
|
||||
"source_path" text NOT NULL,
|
||||
"embedding" vector(768),
|
||||
"created_at" timestamp DEFAULT now() NOT NULL,
|
||||
"updated_at" timestamp DEFAULT now() NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS "source_insight_512" (
|
||||
"id" serial PRIMARY KEY NOT NULL,
|
||||
"insight_type" text NOT NULL,
|
||||
"insight" text NOT NULL,
|
||||
"source_type" text NOT NULL,
|
||||
"source_path" text NOT NULL,
|
||||
"embedding" vector(512),
|
||||
"created_at" timestamp DEFAULT now() NOT NULL,
|
||||
"updated_at" timestamp DEFAULT now() NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS "source_insight_384" (
|
||||
"id" serial PRIMARY KEY NOT NULL,
|
||||
"insight_type" text NOT NULL,
|
||||
"insight" text NOT NULL,
|
||||
"source_type" text NOT NULL,
|
||||
"source_path" text NOT NULL,
|
||||
"embedding" vector(384),
|
||||
"created_at" timestamp DEFAULT now() NOT NULL,
|
||||
"updated_at" timestamp DEFAULT now() NOT NULL
|
||||
);
|
||||
|
||||
-- Create HNSW indexes for embedding similarity search
|
||||
CREATE INDEX IF NOT EXISTS "insightEmbeddingIndex_1536"
|
||||
ON "source_insight_1536"
|
||||
USING hnsw ("embedding" vector_cosine_ops);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS "insightEmbeddingIndex_1024"
|
||||
ON "source_insight_1024"
|
||||
USING hnsw ("embedding" vector_cosine_ops);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS "insightEmbeddingIndex_768"
|
||||
ON "source_insight_768"
|
||||
USING hnsw ("embedding" vector_cosine_ops);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS "insightEmbeddingIndex_512"
|
||||
ON "source_insight_512"
|
||||
USING hnsw ("embedding" vector_cosine_ops);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS "insightEmbeddingIndex_384"
|
||||
ON "source_insight_384"
|
||||
USING hnsw ("embedding" vector_cosine_ops);
|
||||
|
||||
-- Create B-tree indexes for source_path field
|
||||
CREATE INDEX IF NOT EXISTS "insightSourceIndex_1536"
|
||||
ON "source_insight_1536" ("source_path");
|
||||
|
||||
CREATE INDEX IF NOT EXISTS "insightSourceIndex_1024"
|
||||
ON "source_insight_1024" ("source_path");
|
||||
|
||||
CREATE INDEX IF NOT EXISTS "insightSourceIndex_768"
|
||||
ON "source_insight_768" ("source_path");
|
||||
|
||||
CREATE INDEX IF NOT EXISTS "insightSourceIndex_512"
|
||||
ON "source_insight_512" ("source_path");
|
||||
|
||||
CREATE INDEX IF NOT EXISTS "insightSourceIndex_384"
|
||||
ON "source_insight_384" ("source_path");
|
||||
|
||||
-- Create B-tree indexes for insight_type field
|
||||
CREATE INDEX IF NOT EXISTS "insightTypeIndex_1536"
|
||||
ON "source_insight_1536" ("insight_type");
|
||||
|
||||
CREATE INDEX IF NOT EXISTS "insightTypeIndex_1024"
|
||||
ON "source_insight_1024" ("insight_type");
|
||||
|
||||
CREATE INDEX IF NOT EXISTS "insightTypeIndex_768"
|
||||
ON "source_insight_768" ("insight_type");
|
||||
|
||||
CREATE INDEX IF NOT EXISTS "insightTypeIndex_512"
|
||||
ON "source_insight_512" ("insight_type");
|
||||
|
||||
CREATE INDEX IF NOT EXISTS "insightTypeIndex_384"
|
||||
ON "source_insight_384" ("insight_type");
|
||||
`
|
||||
},
|
||||
template: {
|
||||
description: "Creates template table with UUID support",
|
||||
sql: `
|
||||
|
||||
Reference in New Issue
Block a user