mirror of
https://github.com/EthanMarti/infio-copilot.git
synced 2026-05-08 16:10:09 +00:00
init
This commit is contained in:
162
src/database/modules/conversation/conversation-manager.ts
Normal file
162
src/database/modules/conversation/conversation-manager.ts
Normal file
@@ -0,0 +1,162 @@
|
||||
import { SerializedEditorState } from 'lexical'
|
||||
import { App } from 'obsidian'
|
||||
|
||||
import { editorStateToPlainText } from '../../../components/chat-view/chat-input/utils/editor-state-to-plain-text'
|
||||
import { ChatAssistantMessage, ChatConversationMeta, ChatMessage, ChatUserMessage } from '../../../types/chat'
|
||||
import { ContentPart } from '../../../types/llm/request'
|
||||
import { Mentionable, SerializedMentionable } from '../../../types/mentionable'
|
||||
import { deserializeMentionable, serializeMentionable } from '../../../utils/mentionable'
|
||||
import { DBManager } from '../../database-manager'
|
||||
import { InsertMessage } from '../../schema'
|
||||
|
||||
import { ConversationRepository } from './conversation-repository'
|
||||
|
||||
export class ConversationManager {
|
||||
private app: App
|
||||
private repository: ConversationRepository
|
||||
private dbManager: DBManager
|
||||
|
||||
constructor(app: App, dbManager: DBManager) {
|
||||
this.app = app
|
||||
this.dbManager = dbManager
|
||||
const db = dbManager.getPgClient()
|
||||
if (!db) throw new Error('Database not initialized')
|
||||
this.repository = new ConversationRepository(app, db)
|
||||
}
|
||||
|
||||
async createConversation(id: string, title = 'New chat'): Promise<void> {
|
||||
const conversation = {
|
||||
id,
|
||||
title,
|
||||
createdAt: new Date(),
|
||||
updatedAt: new Date(),
|
||||
}
|
||||
await this.repository.create(conversation)
|
||||
await this.dbManager.save()
|
||||
}
|
||||
|
||||
async saveConversation(id: string, messages: ChatMessage[]): Promise<void> {
|
||||
const conversation = await this.repository.findById(id)
|
||||
if (!conversation) {
|
||||
let title = 'New chat'
|
||||
if (messages.length > 0 && messages[0].role === 'user') {
|
||||
const query = editorStateToPlainText(messages[0].content)
|
||||
if (query.length > 20) {
|
||||
title = `${query.slice(0, 20)}...`
|
||||
} else {
|
||||
title = query
|
||||
}
|
||||
}
|
||||
await this.createConversation(id, title)
|
||||
}
|
||||
|
||||
// Delete existing messages
|
||||
await this.repository.deleteAllMessagesFromConversation(id)
|
||||
|
||||
// Insert new messages
|
||||
for (const message of messages) {
|
||||
const insertMessage = this.serializeMessage(message, id)
|
||||
await this.repository.createMessage(insertMessage)
|
||||
}
|
||||
|
||||
// Update conversation timestamp
|
||||
await this.repository.update(id, { updatedAt: new Date() })
|
||||
await this.dbManager.save()
|
||||
}
|
||||
|
||||
async findConversation(id: string): Promise<ChatMessage[] | null> {
|
||||
const conversation = await this.repository.findById(id)
|
||||
if (!conversation) {
|
||||
return null
|
||||
}
|
||||
|
||||
const messages = await this.repository.findMessagesByConversationId(id)
|
||||
return messages.map(msg => this.deserializeMessage(msg))
|
||||
}
|
||||
|
||||
async deleteConversation(id: string): Promise<void> {
|
||||
await this.repository.delete(id)
|
||||
await this.dbManager.save()
|
||||
}
|
||||
|
||||
getAllConversations(callback: (conversations: ChatConversationMeta[]) => void): void {
|
||||
const db = this.dbManager.getPgClient()
|
||||
db?.live.query('SELECT * FROM conversations ORDER BY updated_at', [], (results) => {
|
||||
callback(results.rows.map(conv => ({
|
||||
id: conv.id,
|
||||
title: conv.title,
|
||||
schemaVersion: 2,
|
||||
createdAt: conv.createdAt instanceof Date ? conv.createdAt.getTime() : conv.createdAt,
|
||||
updatedAt: conv.updatedAt instanceof Date ? conv.updatedAt.getTime() : conv.updatedAt,
|
||||
})))
|
||||
})
|
||||
}
|
||||
|
||||
async updateConversationTitle(id: string, title: string): Promise<void> {
|
||||
await this.repository.update(id, { title })
|
||||
await this.dbManager.save()
|
||||
}
|
||||
|
||||
private serializeMessage(message: ChatMessage, conversationId: string): InsertMessage {
|
||||
const base = {
|
||||
id: message.id,
|
||||
conversationId,
|
||||
role: message.role,
|
||||
createdAt: new Date(),
|
||||
}
|
||||
|
||||
if (message.role === 'user') {
|
||||
const userMessage: ChatUserMessage = message
|
||||
return {
|
||||
...base,
|
||||
content: userMessage.content ? JSON.stringify(userMessage.content) : null,
|
||||
promptContent: userMessage.promptContent
|
||||
? typeof userMessage.promptContent === 'string'
|
||||
? userMessage.promptContent
|
||||
: JSON.stringify(userMessage.promptContent)
|
||||
: null,
|
||||
mentionables: JSON.stringify(userMessage.mentionables.map(serializeMentionable)),
|
||||
similaritySearchResults: userMessage.similaritySearchResults
|
||||
? JSON.stringify(userMessage.similaritySearchResults)
|
||||
: null,
|
||||
}
|
||||
} else {
|
||||
const assistantMessage: ChatAssistantMessage = message
|
||||
return {
|
||||
...base,
|
||||
content: assistantMessage.content,
|
||||
metadata: assistantMessage.metadata ? JSON.stringify(assistantMessage.metadata) : null,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private deserializeMessage(message: InsertMessage): ChatMessage {
|
||||
if (message.role === 'user') {
|
||||
return {
|
||||
id: message.id,
|
||||
role: 'user',
|
||||
content: message.content ? JSON.parse(message.content) as SerializedEditorState : null,
|
||||
promptContent: message.promptContent
|
||||
? message.promptContent.startsWith('{')
|
||||
? JSON.parse(message.promptContent) as ContentPart[]
|
||||
: message.promptContent
|
||||
: null,
|
||||
mentionables: message.mentionables
|
||||
? (JSON.parse(message.mentionables) as SerializedMentionable[])
|
||||
.map(m => deserializeMentionable(m, this.app))
|
||||
.filter((m: Mentionable | null): m is Mentionable => m !== null)
|
||||
: [],
|
||||
similaritySearchResults: message.similaritySearchResults
|
||||
? JSON.parse(message.similaritySearchResults)
|
||||
: undefined,
|
||||
}
|
||||
} else {
|
||||
return {
|
||||
id: message.id,
|
||||
role: 'assistant',
|
||||
content: message.content || '',
|
||||
metadata: message.metadata ? JSON.parse(message.metadata) : undefined,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
131
src/database/modules/conversation/conversation-repository.ts
Normal file
131
src/database/modules/conversation/conversation-repository.ts
Normal file
@@ -0,0 +1,131 @@
|
||||
import { PGliteInterface } from '@electric-sql/pglite'
|
||||
import { App } from 'obsidian'
|
||||
|
||||
import {
|
||||
InsertConversation,
|
||||
InsertMessage,
|
||||
SelectConversation,
|
||||
SelectMessage,
|
||||
} from '../../schema'
|
||||
|
||||
type QueryResult<T> = {
|
||||
rows: T[]
|
||||
}
|
||||
|
||||
export class ConversationRepository {
|
||||
private app: App
|
||||
private db: PGliteInterface
|
||||
|
||||
constructor(app: App, db: PGliteInterface) {
|
||||
this.app = app
|
||||
this.db = db
|
||||
}
|
||||
|
||||
async create(conversation: InsertConversation): Promise<SelectConversation> {
|
||||
const result = await this.db.query<SelectConversation>(
|
||||
`INSERT INTO conversations (id, title, created_at, updated_at)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
RETURNING *`,
|
||||
[
|
||||
conversation.id,
|
||||
conversation.title,
|
||||
conversation.createdAt || new Date(),
|
||||
conversation.updatedAt || new Date()
|
||||
]
|
||||
) as QueryResult<SelectConversation>
|
||||
return result.rows[0]
|
||||
}
|
||||
|
||||
async createMessage(message: InsertMessage): Promise<SelectMessage> {
|
||||
const result = await this.db.query<SelectMessage>(
|
||||
`INSERT INTO messages (
|
||||
id, conversation_id, role, content,
|
||||
prompt_content, metadata, mentionables,
|
||||
similarity_search_results, created_at
|
||||
)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
|
||||
RETURNING *`,
|
||||
[
|
||||
message.id,
|
||||
message.conversationId,
|
||||
message.role,
|
||||
message.content,
|
||||
message.promptContent,
|
||||
message.metadata,
|
||||
message.mentionables,
|
||||
message.similaritySearchResults,
|
||||
message.createdAt || new Date()
|
||||
]
|
||||
) as QueryResult<SelectMessage>
|
||||
return result.rows[0]
|
||||
}
|
||||
|
||||
async findById(id: string): Promise<SelectConversation | undefined> {
|
||||
const result = await this.db.query<SelectConversation>(
|
||||
`SELECT * FROM conversations WHERE id = $1 LIMIT 1`,
|
||||
[id]
|
||||
) as QueryResult<SelectConversation>
|
||||
return result.rows[0]
|
||||
}
|
||||
|
||||
async findMessagesByConversationId(conversationId: string): Promise<SelectMessage[]> {
|
||||
const result = await this.db.query<SelectMessage>(
|
||||
`SELECT * FROM messages
|
||||
WHERE conversation_id = $1
|
||||
ORDER BY created_at`,
|
||||
[conversationId]
|
||||
) as QueryResult<SelectMessage>
|
||||
return result.rows
|
||||
}
|
||||
|
||||
async findAll(): Promise<SelectConversation[]> {
|
||||
const result = await this.db.query<SelectConversation>(
|
||||
`SELECT * FROM conversations ORDER BY updated_at DESC`
|
||||
) as QueryResult<SelectConversation>
|
||||
return result.rows
|
||||
}
|
||||
|
||||
async update(id: string, data: Partial<InsertConversation>): Promise<SelectConversation> {
|
||||
const setClauses: string[] = []
|
||||
const values: any[] = []
|
||||
let paramIndex = 1
|
||||
|
||||
if (data.title !== undefined) {
|
||||
setClauses.push(`title = $${paramIndex}`)
|
||||
values.push(data.title)
|
||||
paramIndex++
|
||||
}
|
||||
|
||||
// Always update updated_at
|
||||
setClauses.push(`updated_at = $${paramIndex}`)
|
||||
values.push(new Date())
|
||||
paramIndex++
|
||||
|
||||
// Add id as the last parameter
|
||||
values.push(id)
|
||||
|
||||
const result = await this.db.query<SelectConversation>(
|
||||
`UPDATE conversations
|
||||
SET ${setClauses.join(', ')}
|
||||
WHERE id = $${paramIndex}
|
||||
RETURNING *`,
|
||||
values
|
||||
) as QueryResult<SelectConversation>
|
||||
return result.rows[0]
|
||||
}
|
||||
|
||||
async delete(id: string): Promise<boolean> {
|
||||
const result = await this.db.query<SelectConversation>(
|
||||
`DELETE FROM conversations WHERE id = $1 RETURNING *`,
|
||||
[id]
|
||||
) as QueryResult<SelectConversation>
|
||||
return result.rows.length > 0
|
||||
}
|
||||
|
||||
async deleteAllMessagesFromConversation(conversationId: string): Promise<void> {
|
||||
await this.db.query(
|
||||
`DELETE FROM messages WHERE conversation_id = $1`,
|
||||
[conversationId]
|
||||
)
|
||||
}
|
||||
}
|
||||
51
src/database/modules/template/template-manager.ts
Normal file
51
src/database/modules/template/template-manager.ts
Normal file
@@ -0,0 +1,51 @@
|
||||
import fuzzysort from 'fuzzysort'
|
||||
import { App } from 'obsidian'
|
||||
|
||||
import { DBManager } from '../../database-manager'
|
||||
import { DuplicateTemplateException } from '../../exception'
|
||||
import { InsertTemplate, SelectTemplate } from '../../schema'
|
||||
|
||||
import { TemplateRepository } from './template-repository'
|
||||
|
||||
export class TemplateManager {
|
||||
private app: App
|
||||
private repository: TemplateRepository
|
||||
private dbManager: DBManager
|
||||
|
||||
constructor(app: App, dbManager: DBManager) {
|
||||
this.app = app
|
||||
this.dbManager = dbManager
|
||||
this.repository = new TemplateRepository(app, dbManager.getPgClient())
|
||||
}
|
||||
|
||||
async createTemplate(template: InsertTemplate): Promise<SelectTemplate> {
|
||||
const existingTemplate = await this.repository.findByName(template.name)
|
||||
if (existingTemplate) {
|
||||
throw new DuplicateTemplateException(template.name)
|
||||
}
|
||||
const created = await this.repository.create(template)
|
||||
await this.dbManager.save()
|
||||
return created
|
||||
}
|
||||
|
||||
async findAllTemplates(): Promise<SelectTemplate[]> {
|
||||
return await this.repository.findAll()
|
||||
}
|
||||
|
||||
async searchTemplates(query: string): Promise<SelectTemplate[]> {
|
||||
const templates = await this.findAllTemplates()
|
||||
const results = fuzzysort.go(query, templates, {
|
||||
keys: ['name'],
|
||||
threshold: 0.2,
|
||||
limit: 20,
|
||||
all: true,
|
||||
})
|
||||
return results.map((result) => result.obj)
|
||||
}
|
||||
|
||||
async deleteTemplate(id: string): Promise<boolean> {
|
||||
const deleted = await this.repository.delete(id)
|
||||
await this.dbManager.save()
|
||||
return deleted
|
||||
}
|
||||
}
|
||||
98
src/database/modules/template/template-repository.ts
Normal file
98
src/database/modules/template/template-repository.ts
Normal file
@@ -0,0 +1,98 @@
|
||||
import { PGliteInterface } from '@electric-sql/pglite'
|
||||
import { App } from 'obsidian'
|
||||
|
||||
import { DatabaseNotInitializedException } from '../../exception'
|
||||
import { type InsertTemplate, type SelectTemplate } from '../../schema'
|
||||
|
||||
export class TemplateRepository {
|
||||
private app: App
|
||||
private db: PGliteInterface | null
|
||||
|
||||
constructor(app: App, pgClient: PGliteInterface | null) {
|
||||
this.app = app
|
||||
this.db = pgClient
|
||||
}
|
||||
|
||||
async create(template: InsertTemplate): Promise<SelectTemplate> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
|
||||
const result = await this.db.query<SelectTemplate>(
|
||||
`INSERT INTO "template" (name, content)
|
||||
VALUES ($1, $2)
|
||||
RETURNING *`,
|
||||
[template.name, template.content]
|
||||
)
|
||||
return result.rows[0]
|
||||
}
|
||||
|
||||
async findAll(): Promise<SelectTemplate[]> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const result = await this.db.query<SelectTemplate>(
|
||||
`SELECT * FROM "template"`
|
||||
)
|
||||
return result.rows
|
||||
}
|
||||
|
||||
async findByName(name: string): Promise<SelectTemplate | null> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const result = await this.db.query<SelectTemplate>(
|
||||
`SELECT * FROM "template" WHERE name = $1`,
|
||||
[name]
|
||||
)
|
||||
return result.rows[0] ?? null
|
||||
}
|
||||
|
||||
async update(
|
||||
id: string,
|
||||
template: Partial<InsertTemplate>,
|
||||
): Promise<SelectTemplate | null> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
|
||||
const setClauses: string[] = []
|
||||
const params: any[] = []
|
||||
let paramIndex = 1
|
||||
|
||||
if (template.name !== undefined) {
|
||||
setClauses.push(`name = $${paramIndex}`)
|
||||
params.push(template.name)
|
||||
paramIndex++
|
||||
}
|
||||
|
||||
if (template.content !== undefined) {
|
||||
setClauses.push(`content = $${paramIndex}`)
|
||||
params.push(template.content)
|
||||
paramIndex++
|
||||
}
|
||||
|
||||
setClauses.push(`updated_at = now()`)
|
||||
params.push(id)
|
||||
|
||||
const result = await this.db.query<SelectTemplate>(
|
||||
`UPDATE "template"
|
||||
SET ${setClauses.join(', ')}
|
||||
WHERE id = $${paramIndex}
|
||||
RETURNING *`,
|
||||
params
|
||||
)
|
||||
return result.rows[0] ?? null
|
||||
}
|
||||
|
||||
async delete(id: string): Promise<boolean> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const result = await this.db.query<SelectTemplate>(
|
||||
`DELETE FROM "template" WHERE id = $1 RETURNING *`,
|
||||
[id]
|
||||
)
|
||||
return result.rows.length > 0
|
||||
}
|
||||
}
|
||||
277
src/database/modules/vector/vector-manager.ts
Normal file
277
src/database/modules/vector/vector-manager.ts
Normal file
@@ -0,0 +1,277 @@
|
||||
import { backOff } from 'exponential-backoff'
|
||||
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'
|
||||
import { minimatch } from 'minimatch'
|
||||
import { App, Notice, TFile } from 'obsidian'
|
||||
import pLimit from 'p-limit'
|
||||
|
||||
import { IndexProgress } from '../../../components/chat-view/QueryProgress'
|
||||
import {
|
||||
LLMAPIKeyInvalidException,
|
||||
LLMAPIKeyNotSetException,
|
||||
LLMBaseUrlNotSetException,
|
||||
LLMRateLimitExceededException,
|
||||
} from '../../../core/llm/exception'
|
||||
import { InsertVector, SelectVector } from '../../../database/schema'
|
||||
import { EmbeddingModel } from '../../../types/embedding'
|
||||
import { openSettingsModalWithError } from '../../../utils/open-settings-modal'
|
||||
import { DBManager } from '../../database-manager'
|
||||
|
||||
import { VectorRepository } from './vector-repository'
|
||||
|
||||
export class VectorManager {
|
||||
private app: App
|
||||
private repository: VectorRepository
|
||||
private dbManager: DBManager
|
||||
|
||||
constructor(app: App, dbManager: DBManager) {
|
||||
this.app = app
|
||||
this.dbManager = dbManager
|
||||
this.repository = new VectorRepository(app, dbManager.getPgClient())
|
||||
}
|
||||
|
||||
async performSimilaritySearch(
|
||||
queryVector: number[],
|
||||
embeddingModel: EmbeddingModel,
|
||||
options: {
|
||||
minSimilarity: number
|
||||
limit: number
|
||||
scope?: {
|
||||
files: string[]
|
||||
folders: string[]
|
||||
}
|
||||
},
|
||||
): Promise<
|
||||
(Omit<SelectVector, 'embedding'> & {
|
||||
similarity: number
|
||||
})[]
|
||||
> {
|
||||
return await this.repository.performSimilaritySearch(
|
||||
queryVector,
|
||||
embeddingModel,
|
||||
options,
|
||||
)
|
||||
}
|
||||
|
||||
async updateVaultIndex(
|
||||
embeddingModel: EmbeddingModel,
|
||||
options: {
|
||||
chunkSize: number
|
||||
excludePatterns: string[]
|
||||
includePatterns: string[]
|
||||
reindexAll?: boolean
|
||||
},
|
||||
updateProgress?: (indexProgress: IndexProgress) => void,
|
||||
): Promise<void> {
|
||||
let filesToIndex: TFile[]
|
||||
if (options.reindexAll) {
|
||||
filesToIndex = await this.getFilesToIndex({
|
||||
embeddingModel: embeddingModel,
|
||||
excludePatterns: options.excludePatterns,
|
||||
includePatterns: options.includePatterns,
|
||||
reindexAll: true,
|
||||
})
|
||||
await this.repository.clearAllVectors(embeddingModel)
|
||||
} else {
|
||||
await this.deleteVectorsForDeletedFiles(embeddingModel)
|
||||
filesToIndex = await this.getFilesToIndex({
|
||||
embeddingModel: embeddingModel,
|
||||
excludePatterns: options.excludePatterns,
|
||||
includePatterns: options.includePatterns,
|
||||
})
|
||||
await this.repository.deleteVectorsForMultipleFiles(
|
||||
filesToIndex.map((file) => file.path),
|
||||
embeddingModel,
|
||||
)
|
||||
}
|
||||
|
||||
if (filesToIndex.length === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
const textSplitter = RecursiveCharacterTextSplitter.fromLanguage(
|
||||
'markdown',
|
||||
{
|
||||
chunkSize: options.chunkSize,
|
||||
// TODO: Use token-based chunking after migrating to WebAssembly-based tiktoken
|
||||
// Current token counting method is too slow for practical use
|
||||
// lengthFunction: async (text) => {
|
||||
// return await tokenCount(text)
|
||||
// },
|
||||
},
|
||||
)
|
||||
|
||||
const contentChunks: InsertVector[] = (
|
||||
await Promise.all(
|
||||
filesToIndex.map(async (file) => {
|
||||
const fileContent = await this.app.vault.cachedRead(file)
|
||||
const fileDocuments = await textSplitter.createDocuments([
|
||||
fileContent,
|
||||
])
|
||||
return fileDocuments.map((chunk): InsertVector => {
|
||||
return {
|
||||
path: file.path,
|
||||
mtime: file.stat.mtime,
|
||||
content: chunk.pageContent,
|
||||
metadata: {
|
||||
startLine: chunk.metadata.loc.lines.from as number,
|
||||
endLine: chunk.metadata.loc.lines.to as number,
|
||||
},
|
||||
}
|
||||
})
|
||||
}),
|
||||
)
|
||||
).flat()
|
||||
|
||||
updateProgress?.({
|
||||
completedChunks: 0,
|
||||
totalChunks: contentChunks.length,
|
||||
totalFiles: filesToIndex.length,
|
||||
})
|
||||
|
||||
const embeddingProgress = { completed: 0, inserted: 0 }
|
||||
const embeddingChunks: InsertVector[] = []
|
||||
const batchSize = 100
|
||||
const limit = pLimit(50)
|
||||
const abortController = new AbortController()
|
||||
const tasks = contentChunks.map((chunk) =>
|
||||
limit(async () => {
|
||||
if (abortController.signal.aborted) {
|
||||
throw new Error('Operation was aborted')
|
||||
}
|
||||
try {
|
||||
await backOff(
|
||||
async () => {
|
||||
const embedding = await embeddingModel.getEmbedding(chunk.content)
|
||||
const embeddedChunk = {
|
||||
path: chunk.path,
|
||||
mtime: chunk.mtime,
|
||||
content: chunk.content,
|
||||
embedding,
|
||||
metadata: chunk.metadata,
|
||||
}
|
||||
embeddingChunks.push(embeddedChunk)
|
||||
embeddingProgress.completed++
|
||||
updateProgress?.({
|
||||
completedChunks: embeddingProgress.completed,
|
||||
totalChunks: contentChunks.length,
|
||||
totalFiles: filesToIndex.length,
|
||||
})
|
||||
|
||||
// Insert vectors in batches
|
||||
if (
|
||||
embeddingChunks.length >=
|
||||
embeddingProgress.inserted + batchSize ||
|
||||
embeddingChunks.length === contentChunks.length
|
||||
) {
|
||||
await this.repository.insertVectors(
|
||||
embeddingChunks.slice(
|
||||
embeddingProgress.inserted,
|
||||
embeddingProgress.inserted + batchSize,
|
||||
),
|
||||
embeddingModel,
|
||||
)
|
||||
embeddingProgress.inserted += batchSize
|
||||
}
|
||||
},
|
||||
{
|
||||
numOfAttempts: 5,
|
||||
startingDelay: 1000,
|
||||
timeMultiple: 1.5,
|
||||
jitter: 'full',
|
||||
},
|
||||
)
|
||||
} catch (error) {
|
||||
abortController.abort()
|
||||
throw error
|
||||
}
|
||||
}),
|
||||
)
|
||||
|
||||
try {
|
||||
await Promise.all(tasks)
|
||||
} catch (error) {
|
||||
if (
|
||||
error instanceof LLMAPIKeyNotSetException ||
|
||||
error instanceof LLMAPIKeyInvalidException ||
|
||||
error instanceof LLMBaseUrlNotSetException
|
||||
) {
|
||||
openSettingsModalWithError(this.app, (error as Error).message)
|
||||
} else if (error instanceof LLMRateLimitExceededException) {
|
||||
new Notice(error.message)
|
||||
} else {
|
||||
console.error('Error embedding chunks:', error)
|
||||
throw error
|
||||
}
|
||||
} finally {
|
||||
await this.dbManager.save()
|
||||
}
|
||||
}
|
||||
|
||||
private async deleteVectorsForDeletedFiles(embeddingModel: EmbeddingModel) {
|
||||
const indexedFilePaths =
|
||||
await this.repository.getIndexedFilePaths(embeddingModel)
|
||||
for (const filePath of indexedFilePaths) {
|
||||
if (!this.app.vault.getAbstractFileByPath(filePath)) {
|
||||
await this.repository.deleteVectorsForMultipleFiles(
|
||||
[filePath],
|
||||
embeddingModel,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async getFilesToIndex({
|
||||
embeddingModel,
|
||||
excludePatterns,
|
||||
includePatterns,
|
||||
reindexAll,
|
||||
}: {
|
||||
embeddingModel: EmbeddingModel
|
||||
excludePatterns: string[]
|
||||
includePatterns: string[]
|
||||
reindexAll?: boolean
|
||||
}): Promise<TFile[]> {
|
||||
let filesToIndex = this.app.vault.getMarkdownFiles()
|
||||
|
||||
filesToIndex = filesToIndex.filter((file) => {
|
||||
return !excludePatterns.some((pattern) => minimatch(file.path, pattern))
|
||||
})
|
||||
|
||||
if (includePatterns.length > 0) {
|
||||
filesToIndex = filesToIndex.filter((file) => {
|
||||
return includePatterns.some((pattern) => minimatch(file.path, pattern))
|
||||
})
|
||||
}
|
||||
|
||||
if (reindexAll) {
|
||||
return filesToIndex
|
||||
}
|
||||
|
||||
// Check for updated or new files
|
||||
filesToIndex = await Promise.all(
|
||||
filesToIndex.map(async (file) => {
|
||||
const fileChunks = await this.repository.getVectorsByFilePath(
|
||||
file.path,
|
||||
embeddingModel,
|
||||
)
|
||||
if (fileChunks.length === 0) {
|
||||
// File is not indexed, so we need to index it
|
||||
const fileContent = await this.app.vault.cachedRead(file)
|
||||
if (fileContent.length === 0) {
|
||||
// Ignore empty files
|
||||
return null
|
||||
}
|
||||
return file
|
||||
}
|
||||
const outOfDate = file.stat.mtime > fileChunks[0].mtime
|
||||
if (outOfDate) {
|
||||
// File has changed, so we need to re-index it
|
||||
return file
|
||||
}
|
||||
return null
|
||||
}),
|
||||
).then((files) => files.filter(Boolean) as TFile[])
|
||||
|
||||
return filesToIndex
|
||||
}
|
||||
}
|
||||
180
src/database/modules/vector/vector-repository.ts
Normal file
180
src/database/modules/vector/vector-repository.ts
Normal file
@@ -0,0 +1,180 @@
|
||||
import { PGliteInterface } from '@electric-sql/pglite'
|
||||
import { App } from 'obsidian'
|
||||
|
||||
import { EmbeddingModel } from '../../../types/embedding'
|
||||
import { DatabaseNotInitializedException } from '../../exception'
|
||||
import { InsertVector, SelectVector, vectorTables } from '../../schema'
|
||||
|
||||
export class VectorRepository {
|
||||
private app: App
|
||||
private db: PGliteInterface | null
|
||||
|
||||
constructor(app: App, pgClient: PGliteInterface | null) {
|
||||
this.app = app
|
||||
this.db = pgClient
|
||||
}
|
||||
|
||||
private getTableName(embeddingModel: EmbeddingModel): string {
|
||||
const tableDefinition = vectorTables[embeddingModel.dimension]
|
||||
if (!tableDefinition) {
|
||||
throw new Error(`No table definition found for model: ${embeddingModel.id}`)
|
||||
}
|
||||
return tableDefinition.name
|
||||
}
|
||||
|
||||
async getIndexedFilePaths(embeddingModel: EmbeddingModel): Promise<string[]> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
const result = await this.db.query<{ path: string }>(
|
||||
`SELECT DISTINCT path FROM "${tableName}"`
|
||||
)
|
||||
return result.rows.map((row: { path: string }) => row.path)
|
||||
}
|
||||
|
||||
async getVectorsByFilePath(
|
||||
filePath: string,
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<SelectVector[]> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
const result = await this.db.query<SelectVector>(
|
||||
`SELECT * FROM "${tableName}" WHERE path = $1`,
|
||||
[filePath]
|
||||
)
|
||||
return result.rows
|
||||
}
|
||||
|
||||
async deleteVectorsForSingleFile(
|
||||
filePath: string,
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
await this.db.query(
|
||||
`DELETE FROM "${tableName}" WHERE path = $1`,
|
||||
[filePath]
|
||||
)
|
||||
}
|
||||
|
||||
async deleteVectorsForMultipleFiles(
|
||||
filePaths: string[],
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
await this.db.query(
|
||||
`DELETE FROM "${tableName}" WHERE path = ANY($1)`,
|
||||
[filePaths]
|
||||
)
|
||||
}
|
||||
|
||||
async clearAllVectors(embeddingModel: EmbeddingModel): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
await this.db.query(`DELETE FROM "${tableName}"`)
|
||||
}
|
||||
|
||||
async insertVectors(
|
||||
data: InsertVector[],
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
|
||||
// 构建批量插入的 SQL
|
||||
const values = data.map((vector, index) => {
|
||||
const offset = index * 5
|
||||
return `($${offset + 1}, $${offset + 2}, $${offset + 3}, $${offset + 4}, $${offset + 5})`
|
||||
}).join(',')
|
||||
|
||||
const params = data.flatMap(vector => [
|
||||
vector.path,
|
||||
vector.mtime,
|
||||
vector.content,
|
||||
`[${vector.embedding.join(',')}]`, // 转换为PostgreSQL vector格式
|
||||
vector.metadata
|
||||
])
|
||||
|
||||
await this.db.query(
|
||||
`INSERT INTO "${tableName}" (path, mtime, content, embedding, metadata)
|
||||
VALUES ${values}`,
|
||||
params
|
||||
)
|
||||
}
|
||||
|
||||
async performSimilaritySearch(
|
||||
queryVector: number[],
|
||||
embeddingModel: EmbeddingModel,
|
||||
options: {
|
||||
minSimilarity: number
|
||||
limit: number
|
||||
scope?: {
|
||||
files: string[]
|
||||
folders: string[]
|
||||
}
|
||||
},
|
||||
): Promise<
|
||||
(Omit<SelectVector, 'embedding'> & {
|
||||
similarity: number
|
||||
})[]
|
||||
> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
|
||||
let scopeCondition = ''
|
||||
const params: any[] = [`[${queryVector.join(',')}]`, options.minSimilarity, options.limit]
|
||||
let paramIndex = 4
|
||||
|
||||
if (options.scope) {
|
||||
const conditions: string[] = []
|
||||
|
||||
if (options.scope.files.length > 0) {
|
||||
conditions.push(`path = ANY($${paramIndex})`)
|
||||
params.push(options.scope.files)
|
||||
paramIndex++
|
||||
}
|
||||
|
||||
if (options.scope.folders.length > 0) {
|
||||
const folderConditions = options.scope.folders.map((folder, idx) => {
|
||||
params.push(`${folder}/%`)
|
||||
return `path LIKE $${paramIndex + idx}`
|
||||
})
|
||||
conditions.push(`(${folderConditions.join(' OR ')})`)
|
||||
paramIndex += options.scope.folders.length
|
||||
}
|
||||
|
||||
if (conditions.length > 0) {
|
||||
scopeCondition = `AND (${conditions.join(' OR ')})`
|
||||
}
|
||||
}
|
||||
|
||||
const query = `
|
||||
SELECT
|
||||
id, path, mtime, content, metadata,
|
||||
1 - (embedding <=> $1::vector) as similarity
|
||||
FROM "${tableName}"
|
||||
WHERE 1 - (embedding <=> $1::vector) > $2
|
||||
${scopeCondition}
|
||||
ORDER BY similarity DESC
|
||||
LIMIT $3
|
||||
`
|
||||
|
||||
type SearchResult = Omit<SelectVector, 'embedding'> & { similarity: number }
|
||||
const result = await this.db.query<SearchResult>(query, params)
|
||||
return result.rows
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user