mirror of
https://github.com/EthanMarti/infio-copilot.git
synced 2026-05-08 16:10:09 +00:00
use web worker to run pglite
This commit is contained in:
@@ -1,38 +1,33 @@
|
||||
import { PGlite } from '@electric-sql/pglite'
|
||||
// @ts-expect-error
|
||||
import { type PGliteWithLive, live } from '@electric-sql/pglite/live'
|
||||
import { App, normalizePath } from 'obsidian'
|
||||
import { type PGliteWithLive } from '@electric-sql/pglite/live'
|
||||
import { App } from 'obsidian'
|
||||
|
||||
import { PGLITE_DB_PATH } from '../constants'
|
||||
// import { PGLITE_DB_PATH } from '../constants'
|
||||
import { createAndInitDb } from '../pgworker'
|
||||
|
||||
import { ConversationManager } from './modules/conversation/conversation-manager'
|
||||
import { TemplateManager } from './modules/template/template-manager'
|
||||
import { VectorManager } from './modules/vector/vector-manager'
|
||||
import { pgliteResources } from './pglite-resources'
|
||||
import { migrations } from './sql'
|
||||
// import { pgliteResources } from './pglite-resources'
|
||||
// import { migrations } from './sql'
|
||||
|
||||
export class DBManager {
|
||||
private app: App
|
||||
private dbPath: string
|
||||
// private app: App
|
||||
// private dbPath: string
|
||||
private db: PGliteWithLive | null = null
|
||||
// private db: PgliteDatabase | null = null
|
||||
private vectorManager: VectorManager
|
||||
private templateManager: TemplateManager
|
||||
private conversationManager: ConversationManager
|
||||
|
||||
constructor(app: App, dbPath: string) {
|
||||
constructor(app: App) {
|
||||
this.app = app
|
||||
this.dbPath = dbPath
|
||||
// this.dbPath = dbPath
|
||||
}
|
||||
|
||||
static async create(app: App): Promise<DBManager> {
|
||||
const dbManager = new DBManager(app, normalizePath(PGLITE_DB_PATH))
|
||||
await dbManager.loadExistingDatabase()
|
||||
if (!dbManager.db) {
|
||||
await dbManager.createNewDatabase()
|
||||
}
|
||||
await dbManager.migrateDatabase()
|
||||
await dbManager.save()
|
||||
const dbManager = new DBManager(app)
|
||||
dbManager.db = await createAndInitDb()
|
||||
|
||||
dbManager.vectorManager = new VectorManager(app, dbManager)
|
||||
dbManager.templateManager = new TemplateManager(app, dbManager)
|
||||
@@ -57,81 +52,70 @@ export class DBManager {
|
||||
return this.conversationManager
|
||||
}
|
||||
|
||||
private async createNewDatabase() {
|
||||
const { fsBundle, wasmModule, vectorExtensionBundlePath } =
|
||||
await this.loadPGliteResources()
|
||||
this.db = await PGlite.create({
|
||||
fsBundle: fsBundle,
|
||||
wasmModule: wasmModule,
|
||||
extensions: {
|
||||
vector: vectorExtensionBundlePath,
|
||||
live,
|
||||
},
|
||||
})
|
||||
}
|
||||
// private async createNewDatabase() {
|
||||
// const { fsBundle, wasmModule, vectorExtensionBundlePath } =
|
||||
// await this.loadPGliteResources()
|
||||
// this.db = await PGlite.create({
|
||||
// fsBundle: fsBundle,
|
||||
// wasmModule: wasmModule,
|
||||
// extensions: {
|
||||
// vector: vectorExtensionBundlePath,
|
||||
// live,
|
||||
// },
|
||||
// })
|
||||
// }
|
||||
|
||||
private async loadExistingDatabase() {
|
||||
try {
|
||||
const databaseFileExists = await this.app.vault.adapter.exists(
|
||||
this.dbPath,
|
||||
)
|
||||
if (!databaseFileExists) {
|
||||
return null
|
||||
}
|
||||
const fileBuffer = await this.app.vault.adapter.readBinary(this.dbPath)
|
||||
const fileBlob = new Blob([fileBuffer], { type: 'application/x-gzip' })
|
||||
const { fsBundle, wasmModule, vectorExtensionBundlePath } =
|
||||
await this.loadPGliteResources()
|
||||
this.db = await PGlite.create({
|
||||
loadDataDir: fileBlob,
|
||||
fsBundle: fsBundle,
|
||||
wasmModule: wasmModule,
|
||||
extensions: {
|
||||
vector: vectorExtensionBundlePath,
|
||||
live
|
||||
},
|
||||
})
|
||||
// return drizzle(this.pgClient)
|
||||
} catch (error) {
|
||||
console.error('Error loading database:', error)
|
||||
console.log(this.dbPath)
|
||||
return null
|
||||
}
|
||||
}
|
||||
// private async loadExistingDatabase() {
|
||||
// try {
|
||||
// const databaseFileExists = await this.app.vault.adapter.exists(
|
||||
// this.dbPath,
|
||||
// )
|
||||
// if (!databaseFileExists) {
|
||||
// return null
|
||||
// }
|
||||
// const fileBuffer = await this.app.vault.adapter.readBinary(this.dbPath)
|
||||
// const fileBlob = new Blob([fileBuffer], { type: 'application/x-gzip' })
|
||||
// const { fsBundle, wasmModule, vectorExtensionBundlePath } =
|
||||
// await this.loadPGliteResources()
|
||||
// this.db = await PGlite.create({
|
||||
// loadDataDir: fileBlob,
|
||||
// fsBundle: fsBundle,
|
||||
// wasmModule: wasmModule,
|
||||
// extensions: {
|
||||
// vector: vectorExtensionBundlePath,
|
||||
// live
|
||||
// },
|
||||
// })
|
||||
// // return drizzle(this.pgClient)
|
||||
// } catch (error) {
|
||||
// console.error('Error loading database:', error)
|
||||
// console.log(this.dbPath)
|
||||
// return null
|
||||
// }
|
||||
// }
|
||||
|
||||
private async migrateDatabase(): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new Error('Database client not initialized');
|
||||
}
|
||||
// private async migrateDatabase(): Promise<void> {
|
||||
// if (!this.db) {
|
||||
// throw new Error('Database client not initialized');
|
||||
// }
|
||||
|
||||
try {
|
||||
// Execute SQL migrations
|
||||
for (const [_key, migration] of Object.entries(migrations)) {
|
||||
// Split SQL into individual commands and execute them one by one
|
||||
const commands = migration.sql.split('\n\n').filter(cmd => cmd.trim());
|
||||
for (const command of commands) {
|
||||
await this.db.query(command);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error executing SQL migrations:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
// try {
|
||||
// // Execute SQL migrations
|
||||
// for (const [_key, migration] of Object.entries(migrations)) {
|
||||
// // Split SQL into individual commands and execute them one by one
|
||||
// const commands = migration.sql.split('\n\n').filter(cmd => cmd.trim());
|
||||
// for (const command of commands) {
|
||||
// await this.db.query(command);
|
||||
// }
|
||||
// }
|
||||
// } catch (error) {
|
||||
// console.error('Error executing SQL migrations:', error);
|
||||
// throw error;
|
||||
// }
|
||||
// }
|
||||
|
||||
async save(): Promise<void> {
|
||||
if (!this.db) {
|
||||
return
|
||||
}
|
||||
try {
|
||||
const blob: Blob = await this.db.dumpDataDir('gzip')
|
||||
await this.app.vault.adapter.writeBinary(
|
||||
this.dbPath,
|
||||
Buffer.from(await blob.arrayBuffer()),
|
||||
)
|
||||
} catch (error) {
|
||||
console.error('Error saving database:', error)
|
||||
}
|
||||
console.log("need remove")
|
||||
}
|
||||
|
||||
async cleanup() {
|
||||
@@ -139,37 +123,37 @@ export class DBManager {
|
||||
this.db = null
|
||||
}
|
||||
|
||||
private async loadPGliteResources(): Promise<{
|
||||
fsBundle: Blob
|
||||
wasmModule: WebAssembly.Module
|
||||
vectorExtensionBundlePath: URL
|
||||
}> {
|
||||
try {
|
||||
// Convert base64 to binary data
|
||||
const wasmBinary = Buffer.from(pgliteResources.wasmBase64, 'base64')
|
||||
const dataBinary = Buffer.from(pgliteResources.dataBase64, 'base64')
|
||||
const vectorBinary = Buffer.from(pgliteResources.vectorBase64, 'base64')
|
||||
// private async loadPGliteResources(): Promise<{
|
||||
// fsBundle: Blob
|
||||
// wasmModule: WebAssembly.Module
|
||||
// vectorExtensionBundlePath: URL
|
||||
// }> {
|
||||
// try {
|
||||
// // Convert base64 to binary data
|
||||
// const wasmBinary = Buffer.from(pgliteResources.wasmBase64, 'base64')
|
||||
// const dataBinary = Buffer.from(pgliteResources.dataBase64, 'base64')
|
||||
// const vectorBinary = Buffer.from(pgliteResources.vectorBase64, 'base64')
|
||||
|
||||
// Create blobs from binary data
|
||||
const fsBundle = new Blob([dataBinary], {
|
||||
type: 'application/octet-stream',
|
||||
})
|
||||
const wasmModule = await WebAssembly.compile(wasmBinary)
|
||||
// // Create blobs from binary data
|
||||
// const fsBundle = new Blob([dataBinary], {
|
||||
// type: 'application/octet-stream',
|
||||
// })
|
||||
// const wasmModule = await WebAssembly.compile(wasmBinary)
|
||||
|
||||
// Create a blob URL for the vector extension
|
||||
const vectorBlob = new Blob([vectorBinary], {
|
||||
type: 'application/gzip',
|
||||
})
|
||||
const vectorExtensionBundlePath = URL.createObjectURL(vectorBlob)
|
||||
// // Create a blob URL for the vector extension
|
||||
// const vectorBlob = new Blob([vectorBinary], {
|
||||
// type: 'application/gzip',
|
||||
// })
|
||||
// const vectorExtensionBundlePath = URL.createObjectURL(vectorBlob)
|
||||
|
||||
return {
|
||||
fsBundle,
|
||||
wasmModule,
|
||||
vectorExtensionBundlePath: new URL(vectorExtensionBundlePath),
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error loading PGlite resources:', error)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
// return {
|
||||
// fsBundle,
|
||||
// wasmModule,
|
||||
// vectorExtensionBundlePath: new URL(vectorExtensionBundlePath),
|
||||
// }
|
||||
// } catch (error) {
|
||||
// console.error('Error loading PGlite resources:', error)
|
||||
// throw error
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
@@ -30,7 +30,6 @@ export class ConversationManager {
|
||||
updatedAt: new Date(),
|
||||
}
|
||||
await this.repository.create(conversation)
|
||||
await this.dbManager.save()
|
||||
}
|
||||
|
||||
async saveConversation(id: string, messages: ChatMessage[]): Promise<void> {
|
||||
@@ -59,7 +58,6 @@ export class ConversationManager {
|
||||
|
||||
// Update conversation timestamp
|
||||
await this.repository.update(id, { updatedAt: new Date() })
|
||||
await this.dbManager.save()
|
||||
}
|
||||
|
||||
async findConversation(id: string): Promise<ChatMessage[] | null> {
|
||||
@@ -74,7 +72,6 @@ export class ConversationManager {
|
||||
|
||||
async deleteConversation(id: string): Promise<void> {
|
||||
await this.repository.delete(id)
|
||||
await this.dbManager.save()
|
||||
}
|
||||
|
||||
getAllConversations(callback: (conversations: ChatConversationMeta[]) => void): void {
|
||||
@@ -92,7 +89,6 @@ export class ConversationManager {
|
||||
|
||||
async updateConversationTitle(id: string, title: string): Promise<void> {
|
||||
await this.repository.update(id, { title })
|
||||
await this.dbManager.save()
|
||||
}
|
||||
|
||||
// convert ChatMessage to InsertMessage
|
||||
|
||||
@@ -24,7 +24,6 @@ export class TemplateManager {
|
||||
throw new DuplicateTemplateException(template.name)
|
||||
}
|
||||
const created = await this.repository.create(template)
|
||||
await this.dbManager.save()
|
||||
return created
|
||||
}
|
||||
|
||||
@@ -45,7 +44,6 @@ export class TemplateManager {
|
||||
|
||||
async deleteTemplate(id: string): Promise<boolean> {
|
||||
const deleted = await this.repository.delete(id)
|
||||
await this.dbManager.save()
|
||||
return deleted
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,10 +6,10 @@ import pLimit from 'p-limit'
|
||||
|
||||
import { IndexProgress } from '../../../components/chat-view/QueryProgress'
|
||||
import {
|
||||
LLMAPIKeyInvalidException,
|
||||
LLMAPIKeyNotSetException,
|
||||
LLMBaseUrlNotSetException,
|
||||
LLMRateLimitExceededException,
|
||||
LLMAPIKeyInvalidException,
|
||||
LLMAPIKeyNotSetException,
|
||||
LLMBaseUrlNotSetException,
|
||||
LLMRateLimitExceededException,
|
||||
} from '../../../core/llm/exception'
|
||||
import { InsertVector, SelectVector } from '../../../database/schema'
|
||||
import { EmbeddingModel } from '../../../types/embedding'
|
||||
@@ -19,260 +19,353 @@ import { DBManager } from '../../database-manager'
|
||||
import { VectorRepository } from './vector-repository'
|
||||
|
||||
export class VectorManager {
|
||||
private app: App
|
||||
private repository: VectorRepository
|
||||
private dbManager: DBManager
|
||||
private app: App
|
||||
private repository: VectorRepository
|
||||
private dbManager: DBManager
|
||||
|
||||
constructor(app: App, dbManager: DBManager) {
|
||||
this.app = app
|
||||
this.dbManager = dbManager
|
||||
this.repository = new VectorRepository(app, dbManager.getPgClient())
|
||||
}
|
||||
constructor(app: App, dbManager: DBManager) {
|
||||
this.app = app
|
||||
this.dbManager = dbManager
|
||||
this.repository = new VectorRepository(app, dbManager.getPgClient())
|
||||
}
|
||||
|
||||
async performSimilaritySearch(
|
||||
queryVector: number[],
|
||||
embeddingModel: EmbeddingModel,
|
||||
options: {
|
||||
minSimilarity: number
|
||||
limit: number
|
||||
scope?: {
|
||||
files: string[]
|
||||
folders: string[]
|
||||
}
|
||||
},
|
||||
): Promise<
|
||||
(Omit<SelectVector, 'embedding'> & {
|
||||
similarity: number
|
||||
})[]
|
||||
> {
|
||||
return await this.repository.performSimilaritySearch(
|
||||
queryVector,
|
||||
embeddingModel,
|
||||
options,
|
||||
)
|
||||
}
|
||||
async performSimilaritySearch(
|
||||
queryVector: number[],
|
||||
embeddingModel: EmbeddingModel,
|
||||
options: {
|
||||
minSimilarity: number
|
||||
limit: number
|
||||
scope?: {
|
||||
files: string[]
|
||||
folders: string[]
|
||||
}
|
||||
},
|
||||
): Promise<
|
||||
(Omit<SelectVector, 'embedding'> & {
|
||||
similarity: number
|
||||
})[]
|
||||
> {
|
||||
return await this.repository.performSimilaritySearch(
|
||||
queryVector,
|
||||
embeddingModel,
|
||||
options,
|
||||
)
|
||||
}
|
||||
|
||||
async updateVaultIndex(
|
||||
embeddingModel: EmbeddingModel,
|
||||
options: {
|
||||
chunkSize: number
|
||||
excludePatterns: string[]
|
||||
includePatterns: string[]
|
||||
reindexAll?: boolean
|
||||
},
|
||||
updateProgress?: (indexProgress: IndexProgress) => void,
|
||||
): Promise<void> {
|
||||
let filesToIndex: TFile[]
|
||||
if (options.reindexAll) {
|
||||
filesToIndex = await this.getFilesToIndex({
|
||||
embeddingModel: embeddingModel,
|
||||
excludePatterns: options.excludePatterns,
|
||||
includePatterns: options.includePatterns,
|
||||
reindexAll: true,
|
||||
})
|
||||
await this.repository.clearAllVectors(embeddingModel)
|
||||
} else {
|
||||
await this.deleteVectorsForDeletedFiles(embeddingModel)
|
||||
filesToIndex = await this.getFilesToIndex({
|
||||
embeddingModel: embeddingModel,
|
||||
excludePatterns: options.excludePatterns,
|
||||
includePatterns: options.includePatterns,
|
||||
})
|
||||
await this.repository.deleteVectorsForMultipleFiles(
|
||||
filesToIndex.map((file) => file.path),
|
||||
embeddingModel,
|
||||
)
|
||||
}
|
||||
async updateVaultIndex(
|
||||
embeddingModel: EmbeddingModel,
|
||||
options: {
|
||||
chunkSize: number
|
||||
excludePatterns: string[]
|
||||
includePatterns: string[]
|
||||
reindexAll?: boolean
|
||||
},
|
||||
updateProgress?: (indexProgress: IndexProgress) => void,
|
||||
): Promise<void> {
|
||||
let filesToIndex: TFile[]
|
||||
if (options.reindexAll) {
|
||||
filesToIndex = await this.getFilesToIndex({
|
||||
embeddingModel: embeddingModel,
|
||||
excludePatterns: options.excludePatterns,
|
||||
includePatterns: options.includePatterns,
|
||||
reindexAll: true,
|
||||
})
|
||||
await this.repository.clearAllVectors(embeddingModel)
|
||||
} else {
|
||||
await this.cleanVectorsForDeletedFiles(embeddingModel)
|
||||
filesToIndex = await this.getFilesToIndex({
|
||||
embeddingModel: embeddingModel,
|
||||
excludePatterns: options.excludePatterns,
|
||||
includePatterns: options.includePatterns,
|
||||
})
|
||||
await this.repository.deleteVectorsForMultipleFiles(
|
||||
filesToIndex.map((file) => file.path),
|
||||
embeddingModel,
|
||||
)
|
||||
}
|
||||
|
||||
if (filesToIndex.length === 0) {
|
||||
return
|
||||
}
|
||||
if (filesToIndex.length === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
const textSplitter = RecursiveCharacterTextSplitter.fromLanguage(
|
||||
'markdown',
|
||||
{
|
||||
chunkSize: options.chunkSize,
|
||||
// TODO: Use token-based chunking after migrating to WebAssembly-based tiktoken
|
||||
// Current token counting method is too slow for practical use
|
||||
// lengthFunction: async (text) => {
|
||||
// return await tokenCount(text)
|
||||
// },
|
||||
},
|
||||
)
|
||||
const textSplitter = RecursiveCharacterTextSplitter.fromLanguage(
|
||||
'markdown',
|
||||
{
|
||||
chunkSize: options.chunkSize,
|
||||
// TODO: Use token-based chunking after migrating to WebAssembly-based tiktoken
|
||||
// Current token counting method is too slow for practical use
|
||||
// lengthFunction: async (text) => {
|
||||
// return await tokenCount(text)
|
||||
// },
|
||||
},
|
||||
)
|
||||
|
||||
const contentChunks: InsertVector[] = (
|
||||
await Promise.all(
|
||||
filesToIndex.map(async (file) => {
|
||||
const fileContent = await this.app.vault.cachedRead(file)
|
||||
const fileDocuments = await textSplitter.createDocuments([
|
||||
fileContent,
|
||||
])
|
||||
return fileDocuments.map((chunk): InsertVector => {
|
||||
return {
|
||||
path: file.path,
|
||||
mtime: file.stat.mtime,
|
||||
const contentChunks: InsertVector[] = (
|
||||
await Promise.all(
|
||||
filesToIndex.map(async (file) => {
|
||||
const fileContent = await this.app.vault.cachedRead(file)
|
||||
const fileDocuments = await textSplitter.createDocuments([
|
||||
fileContent,
|
||||
])
|
||||
return fileDocuments.map((chunk): InsertVector => {
|
||||
return {
|
||||
path: file.path,
|
||||
mtime: file.stat.mtime,
|
||||
content: chunk.pageContent,
|
||||
embedding: [],
|
||||
metadata: {
|
||||
startLine: chunk.metadata.loc.lines.from as number,
|
||||
endLine: chunk.metadata.loc.lines.to as number,
|
||||
},
|
||||
}
|
||||
})
|
||||
}),
|
||||
)
|
||||
).flat()
|
||||
metadata: {
|
||||
startLine: Number(chunk.metadata.loc.lines.from),
|
||||
endLine: Number(chunk.metadata.loc.lines.to),
|
||||
},
|
||||
}
|
||||
})
|
||||
}),
|
||||
)
|
||||
).flat()
|
||||
|
||||
updateProgress?.({
|
||||
completedChunks: 0,
|
||||
totalChunks: contentChunks.length,
|
||||
totalFiles: filesToIndex.length,
|
||||
})
|
||||
updateProgress?.({
|
||||
completedChunks: 0,
|
||||
totalChunks: contentChunks.length,
|
||||
totalFiles: filesToIndex.length,
|
||||
})
|
||||
|
||||
const embeddingProgress = { completed: 0, inserted: 0 }
|
||||
const embeddingChunks: InsertVector[] = []
|
||||
const batchSize = 100
|
||||
const limit = pLimit(50)
|
||||
const abortController = new AbortController()
|
||||
const tasks = contentChunks.map((chunk) =>
|
||||
limit(async () => {
|
||||
if (abortController.signal.aborted) {
|
||||
throw new Error('Operation was aborted')
|
||||
}
|
||||
try {
|
||||
await backOff(
|
||||
async () => {
|
||||
const embedding = await embeddingModel.getEmbedding(chunk.content)
|
||||
const embeddedChunk = {
|
||||
path: chunk.path,
|
||||
mtime: chunk.mtime,
|
||||
content: chunk.content,
|
||||
embedding,
|
||||
metadata: chunk.metadata,
|
||||
}
|
||||
embeddingChunks.push(embeddedChunk)
|
||||
embeddingProgress.completed++
|
||||
updateProgress?.({
|
||||
completedChunks: embeddingProgress.completed,
|
||||
totalChunks: contentChunks.length,
|
||||
totalFiles: filesToIndex.length,
|
||||
})
|
||||
const embeddingProgress = { completed: 0 }
|
||||
const embeddingChunks: InsertVector[] = []
|
||||
const batchSize = 100
|
||||
const limit = pLimit(50)
|
||||
const abortController = new AbortController()
|
||||
const tasks = contentChunks.map((chunk) =>
|
||||
limit(async () => {
|
||||
if (abortController.signal.aborted) {
|
||||
throw new Error('Operation was aborted')
|
||||
}
|
||||
try {
|
||||
await backOff(
|
||||
async () => {
|
||||
const embedding = await embeddingModel.getEmbedding(chunk.content)
|
||||
const embeddedChunk = {
|
||||
path: chunk.path,
|
||||
mtime: chunk.mtime,
|
||||
content: chunk.content,
|
||||
embedding,
|
||||
metadata: chunk.metadata,
|
||||
}
|
||||
embeddingChunks.push(embeddedChunk)
|
||||
embeddingProgress.completed++
|
||||
updateProgress?.({
|
||||
completedChunks: embeddingProgress.completed,
|
||||
totalChunks: contentChunks.length,
|
||||
totalFiles: filesToIndex.length,
|
||||
})
|
||||
},
|
||||
{
|
||||
numOfAttempts: 5,
|
||||
startingDelay: 1000,
|
||||
timeMultiple: 1.5,
|
||||
jitter: 'full',
|
||||
},
|
||||
)
|
||||
} catch (error) {
|
||||
abortController.abort()
|
||||
throw error
|
||||
}
|
||||
}),
|
||||
)
|
||||
|
||||
// Insert vectors in batches
|
||||
if (
|
||||
embeddingChunks.length >=
|
||||
embeddingProgress.inserted + batchSize ||
|
||||
embeddingChunks.length === contentChunks.length
|
||||
) {
|
||||
await this.repository.insertVectors(
|
||||
embeddingChunks.slice(
|
||||
embeddingProgress.inserted,
|
||||
embeddingProgress.inserted + batchSize,
|
||||
),
|
||||
embeddingModel,
|
||||
)
|
||||
embeddingProgress.inserted += batchSize
|
||||
}
|
||||
},
|
||||
{
|
||||
numOfAttempts: 5,
|
||||
startingDelay: 1000,
|
||||
timeMultiple: 1.5,
|
||||
jitter: 'full',
|
||||
},
|
||||
)
|
||||
} catch (error) {
|
||||
abortController.abort()
|
||||
throw error
|
||||
}
|
||||
}),
|
||||
)
|
||||
try {
|
||||
await Promise.all(tasks)
|
||||
|
||||
try {
|
||||
await Promise.all(tasks)
|
||||
} catch (error) {
|
||||
if (
|
||||
error instanceof LLMAPIKeyNotSetException ||
|
||||
error instanceof LLMAPIKeyInvalidException ||
|
||||
error instanceof LLMBaseUrlNotSetException
|
||||
) {
|
||||
openSettingsModalWithError(this.app, (error as Error).message)
|
||||
} else if (error instanceof LLMRateLimitExceededException) {
|
||||
new Notice(error.message)
|
||||
} else {
|
||||
console.error('Error embedding chunks:', error)
|
||||
throw error
|
||||
}
|
||||
} finally {
|
||||
await this.dbManager.save()
|
||||
}
|
||||
}
|
||||
// all embedding generated, batch insert
|
||||
if (embeddingChunks.length > 0) {
|
||||
// batch insert all vectors
|
||||
let inserted = 0
|
||||
while (inserted < embeddingChunks.length) {
|
||||
const chunksToInsert = embeddingChunks.slice(
|
||||
inserted,
|
||||
Math.min(inserted + batchSize, embeddingChunks.length)
|
||||
)
|
||||
await this.repository.insertVectors(chunksToInsert, embeddingModel)
|
||||
inserted += chunksToInsert.length
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
if (
|
||||
error instanceof LLMAPIKeyNotSetException ||
|
||||
error instanceof LLMAPIKeyInvalidException ||
|
||||
error instanceof LLMBaseUrlNotSetException
|
||||
) {
|
||||
openSettingsModalWithError(this.app, error.message)
|
||||
} else if (error instanceof LLMRateLimitExceededException) {
|
||||
new Notice(error.message)
|
||||
} else {
|
||||
console.error('Error embedding chunks:', error)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async deleteVectorsForDeletedFiles(embeddingModel: EmbeddingModel) {
|
||||
const indexedFilePaths =
|
||||
await this.repository.getIndexedFilePaths(embeddingModel)
|
||||
for (const filePath of indexedFilePaths) {
|
||||
if (!this.app.vault.getAbstractFileByPath(filePath)) {
|
||||
await this.repository.deleteVectorsForMultipleFiles(
|
||||
[filePath],
|
||||
embeddingModel,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
async UpdateFileVectorIndex(
|
||||
embeddingModel: EmbeddingModel,
|
||||
chunkSize: number,
|
||||
file: TFile
|
||||
) {
|
||||
|
||||
private async getFilesToIndex({
|
||||
embeddingModel,
|
||||
excludePatterns,
|
||||
includePatterns,
|
||||
reindexAll,
|
||||
}: {
|
||||
embeddingModel: EmbeddingModel
|
||||
excludePatterns: string[]
|
||||
includePatterns: string[]
|
||||
reindexAll?: boolean
|
||||
}): Promise<TFile[]> {
|
||||
let filesToIndex = this.app.vault.getMarkdownFiles()
|
||||
// Delete existing vectors for the files
|
||||
await this.repository.deleteVectorsForSingleFile(
|
||||
file.path,
|
||||
embeddingModel,
|
||||
)
|
||||
|
||||
filesToIndex = filesToIndex.filter((file) => {
|
||||
return !excludePatterns.some((pattern) => minimatch(file.path, pattern))
|
||||
})
|
||||
// Embed the files
|
||||
const textSplitter = RecursiveCharacterTextSplitter.fromLanguage(
|
||||
'markdown',
|
||||
{
|
||||
chunkSize,
|
||||
},
|
||||
)
|
||||
const fileContent = await this.app.vault.cachedRead(file)
|
||||
const fileDocuments = await textSplitter.createDocuments([
|
||||
fileContent,
|
||||
])
|
||||
|
||||
if (includePatterns.length > 0) {
|
||||
filesToIndex = filesToIndex.filter((file) => {
|
||||
return includePatterns.some((pattern) => minimatch(file.path, pattern))
|
||||
})
|
||||
}
|
||||
const contentChunks: InsertVector[] = fileDocuments.map((chunk): InsertVector => {
|
||||
return {
|
||||
path: file.path,
|
||||
mtime: file.stat.mtime,
|
||||
content: chunk.pageContent,
|
||||
embedding: [],
|
||||
metadata: {
|
||||
startLine: Number(chunk.metadata.loc.lines.from),
|
||||
endLine: Number(chunk.metadata.loc.lines.to),
|
||||
},
|
||||
}
|
||||
})
|
||||
|
||||
if (reindexAll) {
|
||||
return filesToIndex
|
||||
}
|
||||
const embeddingChunks: InsertVector[] = []
|
||||
const limit = pLimit(50)
|
||||
const abortController = new AbortController()
|
||||
const tasks = contentChunks.map((chunk) =>
|
||||
limit(async () => {
|
||||
if (abortController.signal.aborted) {
|
||||
throw new Error('Operation was aborted')
|
||||
}
|
||||
try {
|
||||
await backOff(
|
||||
async () => {
|
||||
const embedding = await embeddingModel.getEmbedding(chunk.content)
|
||||
const embeddedChunk = {
|
||||
path: chunk.path,
|
||||
mtime: chunk.mtime,
|
||||
content: chunk.content,
|
||||
embedding,
|
||||
metadata: chunk.metadata,
|
||||
}
|
||||
embeddingChunks.push(embeddedChunk)
|
||||
},
|
||||
{
|
||||
numOfAttempts: 5,
|
||||
startingDelay: 1000,
|
||||
timeMultiple: 1.5,
|
||||
jitter: 'full',
|
||||
},
|
||||
)
|
||||
} catch (error) {
|
||||
abortController.abort()
|
||||
throw error
|
||||
}
|
||||
}),
|
||||
)
|
||||
|
||||
// Check for updated or new files
|
||||
filesToIndex = await Promise.all(
|
||||
filesToIndex.map(async (file) => {
|
||||
const fileChunks = await this.repository.getVectorsByFilePath(
|
||||
file.path,
|
||||
embeddingModel,
|
||||
)
|
||||
if (fileChunks.length === 0) {
|
||||
// File is not indexed, so we need to index it
|
||||
const fileContent = await this.app.vault.cachedRead(file)
|
||||
if (fileContent.length === 0) {
|
||||
// Ignore empty files
|
||||
return null
|
||||
}
|
||||
return file
|
||||
}
|
||||
const outOfDate = file.stat.mtime > fileChunks[0].mtime
|
||||
if (outOfDate) {
|
||||
// File has changed, so we need to re-index it
|
||||
return file
|
||||
}
|
||||
return null
|
||||
}),
|
||||
).then((files) => files.filter(Boolean))
|
||||
try {
|
||||
await Promise.all(tasks)
|
||||
|
||||
return filesToIndex
|
||||
}
|
||||
// all embedding generated, batch insert
|
||||
if (embeddingChunks.length > 0) {
|
||||
const batchSize = 100
|
||||
let inserted = 0
|
||||
while (inserted < embeddingChunks.length) {
|
||||
const chunksToInsert = embeddingChunks.slice(inserted, Math.min(inserted + batchSize, embeddingChunks.length))
|
||||
await this.repository.insertVectors(chunksToInsert, embeddingModel)
|
||||
inserted += chunksToInsert.length
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error embedding chunks:', error)
|
||||
}
|
||||
}
|
||||
|
||||
async DeleteFileVectorIndex(
|
||||
embeddingModel: EmbeddingModel,
|
||||
file: TFile
|
||||
) {
|
||||
await this.repository.deleteVectorsForSingleFile(file.path, embeddingModel)
|
||||
}
|
||||
|
||||
private async cleanVectorsForDeletedFiles(
|
||||
embeddingModel: EmbeddingModel,
|
||||
) {
|
||||
const indexedFilePaths = await this.repository.getAllIndexedFilePaths(embeddingModel)
|
||||
const needToDelete = indexedFilePaths.filter(filePath => !this.app.vault.getAbstractFileByPath(filePath))
|
||||
if (needToDelete.length > 0) {
|
||||
await this.repository.deleteVectorsForMultipleFiles(
|
||||
needToDelete,
|
||||
embeddingModel,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private async getFilesToIndex({
|
||||
embeddingModel,
|
||||
excludePatterns,
|
||||
includePatterns,
|
||||
reindexAll,
|
||||
}: {
|
||||
embeddingModel: EmbeddingModel
|
||||
excludePatterns: string[]
|
||||
includePatterns: string[]
|
||||
reindexAll?: boolean
|
||||
}): Promise<TFile[]> {
|
||||
let filesToIndex = this.app.vault.getMarkdownFiles()
|
||||
|
||||
filesToIndex = filesToIndex.filter((file) => {
|
||||
return !excludePatterns.some((pattern) => minimatch(file.path, pattern))
|
||||
})
|
||||
|
||||
if (includePatterns.length > 0) {
|
||||
filesToIndex = filesToIndex.filter((file) => {
|
||||
return includePatterns.some((pattern) => minimatch(file.path, pattern))
|
||||
})
|
||||
}
|
||||
|
||||
if (reindexAll) {
|
||||
return filesToIndex
|
||||
}
|
||||
|
||||
// Check for updated or new files
|
||||
filesToIndex = await Promise.all(
|
||||
filesToIndex.map(async (file) => {
|
||||
const fileChunks = await this.repository.getVectorsByFilePath(
|
||||
file.path,
|
||||
embeddingModel,
|
||||
)
|
||||
if (fileChunks.length === 0) {
|
||||
// File is not indexed, so we need to index it
|
||||
const fileContent = await this.app.vault.cachedRead(file)
|
||||
if (fileContent.length === 0) {
|
||||
// Ignore empty files
|
||||
return null
|
||||
}
|
||||
return file
|
||||
}
|
||||
const outOfDate = file.stat.mtime > fileChunks[0].mtime
|
||||
if (outOfDate) {
|
||||
// File has changed, so we need to re-index it
|
||||
return file
|
||||
}
|
||||
return null
|
||||
}),
|
||||
).then((files) => files.filter(Boolean))
|
||||
|
||||
return filesToIndex
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ export class VectorRepository {
|
||||
return tableDefinition.name
|
||||
}
|
||||
|
||||
async getIndexedFilePaths(embeddingModel: EmbeddingModel): Promise<string[]> {
|
||||
async getAllIndexedFilePaths(embeddingModel: EmbeddingModel): Promise<string[]> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
@@ -80,7 +80,7 @@ export class VectorRepository {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
await this.db.query(`DELETE FROM "${tableName}"`)
|
||||
}
|
||||
|
||||
@@ -160,7 +160,11 @@ export class VectorRepository {
|
||||
if (conditions.length > 0) {
|
||||
scopeCondition = `AND (${conditions.join(' OR ')})`
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const queryVectorLength = `SELECT count(1) FROM "${tableName}"`;
|
||||
const queryVectorLengthResult = await this.db.query(queryVectorLength)
|
||||
console.log('queryVectorLengthResult, ', queryVectorLengthResult)
|
||||
|
||||
const query = `
|
||||
SELECT
|
||||
|
||||
Reference in New Issue
Block a user