fix trans tool

This commit is contained in:
duanfuxiang
2025-06-29 08:28:50 +08:00
parent 772270863c
commit f3a0252ab6
25 changed files with 1173 additions and 441 deletions

View File

@@ -0,0 +1,2 @@
export { InsightRepository } from './insight-repository'
export { InsightManager } from './insight-manager'

View File

@@ -0,0 +1,321 @@
import { App, TFile } from 'obsidian'
import { InsertSourceInsight, SelectSourceInsight } from '../../schema'
import { EmbeddingModel } from '../../../types/embedding'
import { DBManager } from '../../database-manager'
import { InsightRepository } from './insight-repository'
export class InsightManager {
private app: App
private repository: InsightRepository
private dbManager: DBManager
constructor(app: App, dbManager: DBManager) {
this.app = app
this.dbManager = dbManager
this.repository = new InsightRepository(app, dbManager.getPgClient())
}
/**
* 执行洞察相似性搜索
*/
async performSimilaritySearch(
queryVector: number[],
embeddingModel: EmbeddingModel,
options: {
minSimilarity: number
limit: number
insightTypes?: string[]
sourceTypes?: ('document' | 'tag' | 'folder')[]
sourcePaths?: string[]
},
): Promise<
(Omit<SelectSourceInsight, 'embedding'> & {
similarity: number
})[]
> {
return await this.repository.performSimilaritySearch(
queryVector,
embeddingModel,
options,
)
}
/**
* 存储单个洞察
*/
async storeInsight(
insightData: {
insightType: string
insight: string
sourceType: 'document' | 'tag' | 'folder'
sourcePath: string
embedding: number[]
},
embeddingModel: EmbeddingModel,
): Promise<void> {
const insertData: InsertSourceInsight = {
insight_type: insightData.insightType,
insight: insightData.insight,
source_type: insightData.sourceType,
source_path: insightData.sourcePath,
embedding: insightData.embedding,
}
await this.repository.insertInsights([insertData], embeddingModel)
}
/**
* 批量存储洞察
*/
async storeBatchInsights(
insightsData: Array<{
insightType: string
insight: string
sourceType: 'document' | 'tag' | 'folder'
sourcePath: string
embedding: number[]
}>,
embeddingModel: EmbeddingModel,
): Promise<void> {
const insertData: InsertSourceInsight[] = insightsData.map(data => ({
insight_type: data.insightType,
insight: data.insight,
source_type: data.sourceType,
source_path: data.sourcePath,
embedding: data.embedding,
}))
await this.repository.insertInsights(insertData, embeddingModel)
}
/**
* 更新现有洞察
*/
async updateInsight(
id: number,
updates: {
insightType?: string
insight?: string
sourceType?: 'document' | 'tag' | 'folder'
sourcePath?: string
embedding?: number[]
},
embeddingModel: EmbeddingModel,
): Promise<void> {
const updateData: Partial<InsertSourceInsight> = {}
if (updates.insightType !== undefined) {
updateData.insight_type = updates.insightType
}
if (updates.insight !== undefined) {
updateData.insight = updates.insight
}
if (updates.sourceType !== undefined) {
updateData.source_type = updates.sourceType
}
if (updates.sourcePath !== undefined) {
updateData.source_path = updates.sourcePath
}
if (updates.embedding !== undefined) {
updateData.embedding = updates.embedding
}
await this.repository.updateInsight(id, updateData, embeddingModel)
}
/**
* 获取所有洞察
*/
async getAllInsights(embeddingModel: EmbeddingModel): Promise<SelectSourceInsight[]> {
return await this.repository.getAllInsights(embeddingModel)
}
/**
* 根据源路径获取洞察
*/
async getInsightsBySourcePath(
sourcePath: string,
embeddingModel: EmbeddingModel,
): Promise<SelectSourceInsight[]> {
return await this.repository.getInsightsBySourcePath(sourcePath, embeddingModel)
}
/**
* 根据洞察类型获取洞察
*/
async getInsightsByType(
insightType: string,
embeddingModel: EmbeddingModel,
): Promise<SelectSourceInsight[]> {
return await this.repository.getInsightsByType(insightType, embeddingModel)
}
/**
* 根据源类型获取洞察
*/
async getInsightsBySourceType(
sourceType: 'document' | 'tag' | 'folder',
embeddingModel: EmbeddingModel,
): Promise<SelectSourceInsight[]> {
return await this.repository.getInsightsBySourceType(sourceType, embeddingModel)
}
/**
* 删除指定源路径的所有洞察
*/
async deleteInsightsBySourcePath(
sourcePath: string,
embeddingModel: EmbeddingModel,
): Promise<void> {
await this.repository.deleteInsightsBySourcePath(sourcePath, embeddingModel)
}
/**
* 批量删除多个源路径的洞察
*/
async deleteInsightsBySourcePaths(
sourcePaths: string[],
embeddingModel: EmbeddingModel,
): Promise<void> {
await this.repository.deleteInsightsBySourcePaths(sourcePaths, embeddingModel)
}
/**
* 删除指定类型的所有洞察
*/
async deleteInsightsByType(
insightType: string,
embeddingModel: EmbeddingModel,
): Promise<void> {
await this.repository.deleteInsightsByType(insightType, embeddingModel)
}
/**
* 清空所有洞察
*/
async clearAllInsights(embeddingModel: EmbeddingModel): Promise<void> {
await this.repository.clearAllInsights(embeddingModel)
}
/**
* 文件删除时清理相关洞察
*/
async cleanInsightsForDeletedFile(
file: TFile,
embeddingModel: EmbeddingModel,
): Promise<void> {
await this.repository.deleteInsightsBySourcePath(file.path, embeddingModel)
}
/**
* 文件重命名时更新洞察路径
*/
async updateInsightsForRenamedFile(
oldPath: string,
newPath: string,
embeddingModel: EmbeddingModel,
): Promise<void> {
// 获取旧路径的所有洞察
const insights = await this.repository.getInsightsBySourcePath(oldPath, embeddingModel)
// 批量更新路径
for (const insight of insights) {
await this.repository.updateInsight(
insight.id,
{ source_path: newPath },
embeddingModel
)
}
}
/**
* 清理已删除文件的洞察(批量清理)
*/
async cleanInsightsForDeletedFiles(embeddingModel: EmbeddingModel): Promise<void> {
const allInsights = await this.repository.getAllInsights(embeddingModel)
const pathsToDelete: string[] = []
for (const insight of allInsights) {
if (insight.source_type === 'document') {
// 检查文件是否还存在
const file = this.app.vault.getAbstractFileByPath(insight.source_path)
if (!file) {
pathsToDelete.push(insight.source_path)
}
}
}
if (pathsToDelete.length > 0) {
await this.repository.deleteInsightsBySourcePaths(pathsToDelete, embeddingModel)
}
}
/**
* 获取洞察统计信息
*/
async getInsightStats(embeddingModel: EmbeddingModel): Promise<{
total: number
byType: Record<string, number>
bySourceType: Record<string, number>
}> {
const allInsights = await this.repository.getAllInsights(embeddingModel)
const stats = {
total: allInsights.length,
byType: {} as Record<string, number>,
bySourceType: {} as Record<string, number>,
}
for (const insight of allInsights) {
// 统计洞察类型
stats.byType[insight.insight_type] = (stats.byType[insight.insight_type] || 0) + 1
// 统计源类型
stats.bySourceType[insight.source_type] = (stats.bySourceType[insight.source_type] || 0) + 1
}
return stats
}
/**
* 搜索洞察(文本搜索,非向量搜索)
*/
async searchInsightsByText(
searchText: string,
embeddingModel: EmbeddingModel,
options?: {
insightTypes?: string[]
sourceTypes?: ('document' | 'tag' | 'folder')[]
limit?: number
}
): Promise<SelectSourceInsight[]> {
// 这里可以实现基于文本的搜索逻辑
// 目前先返回所有洞察,然后在内存中过滤
const allInsights = await this.repository.getAllInsights(embeddingModel)
let filteredInsights = allInsights.filter(insight =>
insight.insight.toLowerCase().includes(searchText.toLowerCase()) ||
insight.insight_type.toLowerCase().includes(searchText.toLowerCase())
)
if (options?.insightTypes) {
filteredInsights = filteredInsights.filter(insight =>
options.insightTypes!.includes(insight.insight_type)
)
}
if (options?.sourceTypes) {
filteredInsights = filteredInsights.filter(insight =>
options.sourceTypes!.includes(insight.source_type)
)
}
if (options?.limit) {
filteredInsights = filteredInsights.slice(0, options.limit)
}
return filteredInsights
}
}

View File

@@ -0,0 +1,274 @@
import { PGliteInterface } from '@electric-sql/pglite'
import { App } from 'obsidian'
import { EmbeddingModel } from '../../../types/embedding'
import { DatabaseNotInitializedException } from '../../exception'
import { InsertSourceInsight, SelectSourceInsight, sourceInsightTables } from '../../schema'
export class InsightRepository {
private app: App
private db: PGliteInterface | null
constructor(app: App, pgClient: PGliteInterface | null) {
this.app = app
this.db = pgClient
}
private getTableName(embeddingModel: EmbeddingModel): string {
const tableDefinition = sourceInsightTables[embeddingModel.dimension]
if (!tableDefinition) {
throw new Error(`No source insight table definition found for model: ${embeddingModel.id}`)
}
return tableDefinition.name
}
async getAllInsights(embeddingModel: EmbeddingModel): Promise<SelectSourceInsight[]> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
const result = await this.db.query<SelectSourceInsight>(
`SELECT * FROM "${tableName}" ORDER BY created_at DESC`
)
return result.rows
}
async getInsightsBySourcePath(
sourcePath: string,
embeddingModel: EmbeddingModel,
): Promise<SelectSourceInsight[]> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
const result = await this.db.query<SelectSourceInsight>(
`SELECT * FROM "${tableName}" WHERE source_path = $1 ORDER BY created_at DESC`,
[sourcePath]
)
return result.rows
}
async getInsightsByType(
insightType: string,
embeddingModel: EmbeddingModel,
): Promise<SelectSourceInsight[]> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
const result = await this.db.query<SelectSourceInsight>(
`SELECT * FROM "${tableName}" WHERE insight_type = $1 ORDER BY created_at DESC`,
[insightType]
)
return result.rows
}
async getInsightsBySourceType(
sourceType: 'document' | 'tag' | 'folder',
embeddingModel: EmbeddingModel,
): Promise<SelectSourceInsight[]> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
const result = await this.db.query<SelectSourceInsight>(
`SELECT * FROM "${tableName}" WHERE source_type = $1 ORDER BY created_at DESC`,
[sourceType]
)
return result.rows
}
async deleteInsightsBySourcePath(
sourcePath: string,
embeddingModel: EmbeddingModel,
): Promise<void> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
await this.db.query(
`DELETE FROM "${tableName}" WHERE source_path = $1`,
[sourcePath]
)
}
async deleteInsightsBySourcePaths(
sourcePaths: string[],
embeddingModel: EmbeddingModel,
): Promise<void> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
await this.db.query(
`DELETE FROM "${tableName}" WHERE source_path = ANY($1)`,
[sourcePaths]
)
}
async deleteInsightsByType(
insightType: string,
embeddingModel: EmbeddingModel,
): Promise<void> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
await this.db.query(
`DELETE FROM "${tableName}" WHERE insight_type = $1`,
[insightType]
)
}
async clearAllInsights(embeddingModel: EmbeddingModel): Promise<void> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
await this.db.query(`DELETE FROM "${tableName}"`)
}
async insertInsights(
data: InsertSourceInsight[],
embeddingModel: EmbeddingModel,
): Promise<void> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
// 构建批量插入的 SQL
const values = data.map((insight, index) => {
const offset = index * 6
return `($${offset + 1}, $${offset + 2}, $${offset + 3}, $${offset + 4}, $${offset + 5}, $${offset + 6})`
}).join(',')
const params = data.flatMap(insight => [
insight.insight_type,
insight.insight.replace(/\0/g, ''), // 清理null字节
insight.source_type,
insight.source_path,
`[${insight.embedding.join(',')}]`, // 转换为PostgreSQL vector格式
new Date() // updated_at
])
await this.db.query(
`INSERT INTO "${tableName}" (insight_type, insight, source_type, source_path, embedding, updated_at)
VALUES ${values}`,
params
)
}
async updateInsight(
id: number,
data: Partial<InsertSourceInsight>,
embeddingModel: EmbeddingModel,
): Promise<void> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
const fields: string[] = []
const params: unknown[] = []
let paramIndex = 1
if (data.insight_type !== undefined) {
fields.push(`insight_type = $${paramIndex}`)
params.push(data.insight_type)
paramIndex++
}
if (data.insight !== undefined) {
fields.push(`insight = $${paramIndex}`)
params.push(data.insight.replace(/\0/g, ''))
paramIndex++
}
if (data.source_type !== undefined) {
fields.push(`source_type = $${paramIndex}`)
params.push(data.source_type)
paramIndex++
}
if (data.source_path !== undefined) {
fields.push(`source_path = $${paramIndex}`)
params.push(data.source_path)
paramIndex++
}
if (data.embedding !== undefined) {
fields.push(`embedding = $${paramIndex}`)
params.push(`[${data.embedding.join(',')}]`)
paramIndex++
}
fields.push(`updated_at = $${paramIndex}`)
params.push(new Date())
paramIndex++
params.push(id)
await this.db.query(
`UPDATE "${tableName}" SET ${fields.join(', ')} WHERE id = $${paramIndex}`,
params
)
}
async performSimilaritySearch(
queryVector: number[],
embeddingModel: EmbeddingModel,
options: {
minSimilarity: number
limit: number
insightTypes?: string[]
sourceTypes?: ('document' | 'tag' | 'folder')[]
sourcePaths?: string[]
},
): Promise<
(Omit<SelectSourceInsight, 'embedding'> & {
similarity: number
})[]
> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
let whereConditions = ['1 - (embedding <=> $1::vector) > $2']
const params: unknown[] = [`[${queryVector.join(',')}]`, options.minSimilarity, options.limit]
let paramIndex = 4
if (options.insightTypes && options.insightTypes.length > 0) {
whereConditions.push(`insight_type = ANY($${paramIndex})`)
params.push(options.insightTypes)
paramIndex++
}
if (options.sourceTypes && options.sourceTypes.length > 0) {
whereConditions.push(`source_type = ANY($${paramIndex})`)
params.push(options.sourceTypes)
paramIndex++
}
if (options.sourcePaths && options.sourcePaths.length > 0) {
whereConditions.push(`source_path = ANY($${paramIndex})`)
params.push(options.sourcePaths)
paramIndex++
}
const query = `
SELECT
id, insight_type, insight, source_type, source_path, created_at, updated_at,
1 - (embedding <=> $1::vector) as similarity
FROM "${tableName}"
WHERE ${whereConditions.join(' AND ')}
ORDER BY similarity DESC
LIMIT $3
`
type SearchResult = Omit<SelectSourceInsight, 'embedding'> & { similarity: number }
const result = await this.db.query<SearchResult>(query, params)
return result.rows
}
}

View File

@@ -136,7 +136,7 @@ export class VectorRepository {
const tableName = this.getTableName(embeddingModel)
let scopeCondition = ''
const params: any[] = [`[${queryVector.join(',')}]`, options.minSimilarity, options.limit]
const params: unknown[] = [`[${queryVector.join(',')}]`, options.minSimilarity, options.limit]
let paramIndex = 4
if (options.scope) {