This commit is contained in:
duanfuxiang
2025-01-05 11:51:39 +08:00
commit 0c7ee142cb
215 changed files with 20611 additions and 0 deletions

View File

@@ -0,0 +1,180 @@
import { PGlite } from '@electric-sql/pglite'
import { type PGliteWithLive, live } from '@electric-sql/pglite/live'
// import { PgliteDatabase, drizzle } from 'drizzle-orm/pglite'
import { App, normalizePath } from 'obsidian'
import { PGLITE_DB_PATH } from '../constants'
import { ConversationManager } from './modules/conversation/conversation-manager'
import { TemplateManager } from './modules/template/template-manager'
import { VectorManager } from './modules/vector/vector-manager'
import { pgliteResources } from './pglite-resources'
import { migrations } from './sql'
export class DBManager {
private app: App
private dbPath: string
private db: PGliteWithLive | null = null
// private db: PgliteDatabase | null = null
private vectorManager: VectorManager
private templateManager: TemplateManager
private conversationManager: ConversationManager
constructor(app: App, dbPath: string) {
this.app = app
this.dbPath = dbPath
}
static async create(app: App): Promise<DBManager> {
const dbManager = new DBManager(app, normalizePath(PGLITE_DB_PATH))
await dbManager.loadExistingDatabase()
if (!dbManager.db) {
await dbManager.createNewDatabase()
}
await dbManager.migrateDatabase()
await dbManager.save()
dbManager.vectorManager = new VectorManager(app, dbManager)
dbManager.templateManager = new TemplateManager(app, dbManager)
dbManager.conversationManager = new ConversationManager(app, dbManager)
console.log('Smart composer database initialized.')
return dbManager
}
// getDb() {
// return this.db
// }
getPgClient() {
return this.db
}
getVectorManager() {
return this.vectorManager
}
getTemplateManager() {
return this.templateManager
}
getConversationManager() {
return this.conversationManager
}
private async createNewDatabase() {
const { fsBundle, wasmModule, vectorExtensionBundlePath } =
await this.loadPGliteResources()
this.db = await PGlite.create({
fsBundle: fsBundle,
wasmModule: wasmModule,
extensions: {
vector: vectorExtensionBundlePath,
live,
},
})
}
private async loadExistingDatabase() {
try {
const databaseFileExists = await this.app.vault.adapter.exists(
this.dbPath,
)
if (!databaseFileExists) {
return null
}
const fileBuffer = await this.app.vault.adapter.readBinary(this.dbPath)
const fileBlob = new Blob([fileBuffer], { type: 'application/x-gzip' })
const { fsBundle, wasmModule, vectorExtensionBundlePath } =
await this.loadPGliteResources()
this.db = await PGlite.create({
loadDataDir: fileBlob,
fsBundle: fsBundle,
wasmModule: wasmModule,
extensions: {
vector: vectorExtensionBundlePath,
live
},
})
// return drizzle(this.pgClient)
} catch (error) {
console.error('Error loading database:', error)
return null
}
}
private async migrateDatabase(): Promise<void> {
if (!this.db) {
throw new Error('Database client not initialized');
}
try {
// Execute SQL migrations
for (const [_key, migration] of Object.entries(migrations)) {
// Split SQL into individual commands and execute them one by one
const commands = migration.sql.split('\n\n').filter(cmd => cmd.trim());
for (const command of commands) {
console.log('Executing SQL migration:', command);
await this.db.query(command);
}
}
} catch (error) {
console.error('Error executing SQL migrations:', error);
throw error;
}
}
async save(): Promise<void> {
if (!this.db) {
return
}
try {
const blob: Blob = await this.db.dumpDataDir('gzip')
await this.app.vault.adapter.writeBinary(
this.dbPath,
Buffer.from(await blob.arrayBuffer()),
)
} catch (error) {
console.error('Error saving database:', error)
}
}
async cleanup() {
this.db?.close()
this.db = null
}
private async loadPGliteResources(): Promise<{
fsBundle: Blob
wasmModule: WebAssembly.Module
vectorExtensionBundlePath: URL
}> {
try {
// Convert base64 to binary data
const wasmBinary = Buffer.from(pgliteResources.wasmBase64, 'base64')
const dataBinary = Buffer.from(pgliteResources.dataBase64, 'base64')
const vectorBinary = Buffer.from(pgliteResources.vectorBase64, 'base64')
// Create blobs from binary data
const fsBundle = new Blob([dataBinary], {
type: 'application/octet-stream',
})
const wasmModule = await WebAssembly.compile(wasmBinary)
// Create a blob URL for the vector extension
const vectorBlob = new Blob([vectorBinary], {
type: 'application/gzip',
})
const vectorExtensionBundlePath = URL.createObjectURL(vectorBlob)
return {
fsBundle,
wasmModule,
vectorExtensionBundlePath: new URL(vectorExtensionBundlePath),
}
} catch (error) {
console.error('Error loading PGlite resources:', error)
throw error
}
}
}

20
src/database/exception.ts Normal file
View File

@@ -0,0 +1,20 @@
export class DatabaseException extends Error {
constructor(message: string) {
super(message)
this.name = 'DatabaseException'
}
}
export class DatabaseNotInitializedException extends DatabaseException {
constructor(message = 'Database not initialized') {
super(message)
this.name = 'DatabaseNotInitializedException'
}
}
export class DuplicateTemplateException extends DatabaseException {
constructor(templateName: string) {
super(`Template with name "${templateName}" already exists`)
this.name = 'DuplicateTemplateException'
}
}

View File

@@ -0,0 +1,162 @@
import { SerializedEditorState } from 'lexical'
import { App } from 'obsidian'
import { editorStateToPlainText } from '../../../components/chat-view/chat-input/utils/editor-state-to-plain-text'
import { ChatAssistantMessage, ChatConversationMeta, ChatMessage, ChatUserMessage } from '../../../types/chat'
import { ContentPart } from '../../../types/llm/request'
import { Mentionable, SerializedMentionable } from '../../../types/mentionable'
import { deserializeMentionable, serializeMentionable } from '../../../utils/mentionable'
import { DBManager } from '../../database-manager'
import { InsertMessage } from '../../schema'
import { ConversationRepository } from './conversation-repository'
export class ConversationManager {
private app: App
private repository: ConversationRepository
private dbManager: DBManager
constructor(app: App, dbManager: DBManager) {
this.app = app
this.dbManager = dbManager
const db = dbManager.getPgClient()
if (!db) throw new Error('Database not initialized')
this.repository = new ConversationRepository(app, db)
}
async createConversation(id: string, title = 'New chat'): Promise<void> {
const conversation = {
id,
title,
createdAt: new Date(),
updatedAt: new Date(),
}
await this.repository.create(conversation)
await this.dbManager.save()
}
async saveConversation(id: string, messages: ChatMessage[]): Promise<void> {
const conversation = await this.repository.findById(id)
if (!conversation) {
let title = 'New chat'
if (messages.length > 0 && messages[0].role === 'user') {
const query = editorStateToPlainText(messages[0].content)
if (query.length > 20) {
title = `${query.slice(0, 20)}...`
} else {
title = query
}
}
await this.createConversation(id, title)
}
// Delete existing messages
await this.repository.deleteAllMessagesFromConversation(id)
// Insert new messages
for (const message of messages) {
const insertMessage = this.serializeMessage(message, id)
await this.repository.createMessage(insertMessage)
}
// Update conversation timestamp
await this.repository.update(id, { updatedAt: new Date() })
await this.dbManager.save()
}
async findConversation(id: string): Promise<ChatMessage[] | null> {
const conversation = await this.repository.findById(id)
if (!conversation) {
return null
}
const messages = await this.repository.findMessagesByConversationId(id)
return messages.map(msg => this.deserializeMessage(msg))
}
async deleteConversation(id: string): Promise<void> {
await this.repository.delete(id)
await this.dbManager.save()
}
getAllConversations(callback: (conversations: ChatConversationMeta[]) => void): void {
const db = this.dbManager.getPgClient()
db?.live.query('SELECT * FROM conversations ORDER BY updated_at', [], (results) => {
callback(results.rows.map(conv => ({
id: conv.id,
title: conv.title,
schemaVersion: 2,
createdAt: conv.createdAt instanceof Date ? conv.createdAt.getTime() : conv.createdAt,
updatedAt: conv.updatedAt instanceof Date ? conv.updatedAt.getTime() : conv.updatedAt,
})))
})
}
async updateConversationTitle(id: string, title: string): Promise<void> {
await this.repository.update(id, { title })
await this.dbManager.save()
}
private serializeMessage(message: ChatMessage, conversationId: string): InsertMessage {
const base = {
id: message.id,
conversationId,
role: message.role,
createdAt: new Date(),
}
if (message.role === 'user') {
const userMessage: ChatUserMessage = message
return {
...base,
content: userMessage.content ? JSON.stringify(userMessage.content) : null,
promptContent: userMessage.promptContent
? typeof userMessage.promptContent === 'string'
? userMessage.promptContent
: JSON.stringify(userMessage.promptContent)
: null,
mentionables: JSON.stringify(userMessage.mentionables.map(serializeMentionable)),
similaritySearchResults: userMessage.similaritySearchResults
? JSON.stringify(userMessage.similaritySearchResults)
: null,
}
} else {
const assistantMessage: ChatAssistantMessage = message
return {
...base,
content: assistantMessage.content,
metadata: assistantMessage.metadata ? JSON.stringify(assistantMessage.metadata) : null,
}
}
}
private deserializeMessage(message: InsertMessage): ChatMessage {
if (message.role === 'user') {
return {
id: message.id,
role: 'user',
content: message.content ? JSON.parse(message.content) as SerializedEditorState : null,
promptContent: message.promptContent
? message.promptContent.startsWith('{')
? JSON.parse(message.promptContent) as ContentPart[]
: message.promptContent
: null,
mentionables: message.mentionables
? (JSON.parse(message.mentionables) as SerializedMentionable[])
.map(m => deserializeMentionable(m, this.app))
.filter((m: Mentionable | null): m is Mentionable => m !== null)
: [],
similaritySearchResults: message.similaritySearchResults
? JSON.parse(message.similaritySearchResults)
: undefined,
}
} else {
return {
id: message.id,
role: 'assistant',
content: message.content || '',
metadata: message.metadata ? JSON.parse(message.metadata) : undefined,
}
}
}
}

View File

@@ -0,0 +1,131 @@
import { PGliteInterface } from '@electric-sql/pglite'
import { App } from 'obsidian'
import {
InsertConversation,
InsertMessage,
SelectConversation,
SelectMessage,
} from '../../schema'
type QueryResult<T> = {
rows: T[]
}
export class ConversationRepository {
private app: App
private db: PGliteInterface
constructor(app: App, db: PGliteInterface) {
this.app = app
this.db = db
}
async create(conversation: InsertConversation): Promise<SelectConversation> {
const result = await this.db.query<SelectConversation>(
`INSERT INTO conversations (id, title, created_at, updated_at)
VALUES ($1, $2, $3, $4)
RETURNING *`,
[
conversation.id,
conversation.title,
conversation.createdAt || new Date(),
conversation.updatedAt || new Date()
]
) as QueryResult<SelectConversation>
return result.rows[0]
}
async createMessage(message: InsertMessage): Promise<SelectMessage> {
const result = await this.db.query<SelectMessage>(
`INSERT INTO messages (
id, conversation_id, role, content,
prompt_content, metadata, mentionables,
similarity_search_results, created_at
)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
RETURNING *`,
[
message.id,
message.conversationId,
message.role,
message.content,
message.promptContent,
message.metadata,
message.mentionables,
message.similaritySearchResults,
message.createdAt || new Date()
]
) as QueryResult<SelectMessage>
return result.rows[0]
}
async findById(id: string): Promise<SelectConversation | undefined> {
const result = await this.db.query<SelectConversation>(
`SELECT * FROM conversations WHERE id = $1 LIMIT 1`,
[id]
) as QueryResult<SelectConversation>
return result.rows[0]
}
async findMessagesByConversationId(conversationId: string): Promise<SelectMessage[]> {
const result = await this.db.query<SelectMessage>(
`SELECT * FROM messages
WHERE conversation_id = $1
ORDER BY created_at`,
[conversationId]
) as QueryResult<SelectMessage>
return result.rows
}
async findAll(): Promise<SelectConversation[]> {
const result = await this.db.query<SelectConversation>(
`SELECT * FROM conversations ORDER BY updated_at DESC`
) as QueryResult<SelectConversation>
return result.rows
}
async update(id: string, data: Partial<InsertConversation>): Promise<SelectConversation> {
const setClauses: string[] = []
const values: any[] = []
let paramIndex = 1
if (data.title !== undefined) {
setClauses.push(`title = $${paramIndex}`)
values.push(data.title)
paramIndex++
}
// Always update updated_at
setClauses.push(`updated_at = $${paramIndex}`)
values.push(new Date())
paramIndex++
// Add id as the last parameter
values.push(id)
const result = await this.db.query<SelectConversation>(
`UPDATE conversations
SET ${setClauses.join(', ')}
WHERE id = $${paramIndex}
RETURNING *`,
values
) as QueryResult<SelectConversation>
return result.rows[0]
}
async delete(id: string): Promise<boolean> {
const result = await this.db.query<SelectConversation>(
`DELETE FROM conversations WHERE id = $1 RETURNING *`,
[id]
) as QueryResult<SelectConversation>
return result.rows.length > 0
}
async deleteAllMessagesFromConversation(conversationId: string): Promise<void> {
await this.db.query(
`DELETE FROM messages WHERE conversation_id = $1`,
[conversationId]
)
}
}

View File

@@ -0,0 +1,51 @@
import fuzzysort from 'fuzzysort'
import { App } from 'obsidian'
import { DBManager } from '../../database-manager'
import { DuplicateTemplateException } from '../../exception'
import { InsertTemplate, SelectTemplate } from '../../schema'
import { TemplateRepository } from './template-repository'
export class TemplateManager {
private app: App
private repository: TemplateRepository
private dbManager: DBManager
constructor(app: App, dbManager: DBManager) {
this.app = app
this.dbManager = dbManager
this.repository = new TemplateRepository(app, dbManager.getPgClient())
}
async createTemplate(template: InsertTemplate): Promise<SelectTemplate> {
const existingTemplate = await this.repository.findByName(template.name)
if (existingTemplate) {
throw new DuplicateTemplateException(template.name)
}
const created = await this.repository.create(template)
await this.dbManager.save()
return created
}
async findAllTemplates(): Promise<SelectTemplate[]> {
return await this.repository.findAll()
}
async searchTemplates(query: string): Promise<SelectTemplate[]> {
const templates = await this.findAllTemplates()
const results = fuzzysort.go(query, templates, {
keys: ['name'],
threshold: 0.2,
limit: 20,
all: true,
})
return results.map((result) => result.obj)
}
async deleteTemplate(id: string): Promise<boolean> {
const deleted = await this.repository.delete(id)
await this.dbManager.save()
return deleted
}
}

View File

@@ -0,0 +1,98 @@
import { PGliteInterface } from '@electric-sql/pglite'
import { App } from 'obsidian'
import { DatabaseNotInitializedException } from '../../exception'
import { type InsertTemplate, type SelectTemplate } from '../../schema'
export class TemplateRepository {
private app: App
private db: PGliteInterface | null
constructor(app: App, pgClient: PGliteInterface | null) {
this.app = app
this.db = pgClient
}
async create(template: InsertTemplate): Promise<SelectTemplate> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const result = await this.db.query<SelectTemplate>(
`INSERT INTO "template" (name, content)
VALUES ($1, $2)
RETURNING *`,
[template.name, template.content]
)
return result.rows[0]
}
async findAll(): Promise<SelectTemplate[]> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const result = await this.db.query<SelectTemplate>(
`SELECT * FROM "template"`
)
return result.rows
}
async findByName(name: string): Promise<SelectTemplate | null> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const result = await this.db.query<SelectTemplate>(
`SELECT * FROM "template" WHERE name = $1`,
[name]
)
return result.rows[0] ?? null
}
async update(
id: string,
template: Partial<InsertTemplate>,
): Promise<SelectTemplate | null> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const setClauses: string[] = []
const params: any[] = []
let paramIndex = 1
if (template.name !== undefined) {
setClauses.push(`name = $${paramIndex}`)
params.push(template.name)
paramIndex++
}
if (template.content !== undefined) {
setClauses.push(`content = $${paramIndex}`)
params.push(template.content)
paramIndex++
}
setClauses.push(`updated_at = now()`)
params.push(id)
const result = await this.db.query<SelectTemplate>(
`UPDATE "template"
SET ${setClauses.join(', ')}
WHERE id = $${paramIndex}
RETURNING *`,
params
)
return result.rows[0] ?? null
}
async delete(id: string): Promise<boolean> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const result = await this.db.query<SelectTemplate>(
`DELETE FROM "template" WHERE id = $1 RETURNING *`,
[id]
)
return result.rows.length > 0
}
}

View File

@@ -0,0 +1,277 @@
import { backOff } from 'exponential-backoff'
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'
import { minimatch } from 'minimatch'
import { App, Notice, TFile } from 'obsidian'
import pLimit from 'p-limit'
import { IndexProgress } from '../../../components/chat-view/QueryProgress'
import {
LLMAPIKeyInvalidException,
LLMAPIKeyNotSetException,
LLMBaseUrlNotSetException,
LLMRateLimitExceededException,
} from '../../../core/llm/exception'
import { InsertVector, SelectVector } from '../../../database/schema'
import { EmbeddingModel } from '../../../types/embedding'
import { openSettingsModalWithError } from '../../../utils/open-settings-modal'
import { DBManager } from '../../database-manager'
import { VectorRepository } from './vector-repository'
export class VectorManager {
private app: App
private repository: VectorRepository
private dbManager: DBManager
constructor(app: App, dbManager: DBManager) {
this.app = app
this.dbManager = dbManager
this.repository = new VectorRepository(app, dbManager.getPgClient())
}
async performSimilaritySearch(
queryVector: number[],
embeddingModel: EmbeddingModel,
options: {
minSimilarity: number
limit: number
scope?: {
files: string[]
folders: string[]
}
},
): Promise<
(Omit<SelectVector, 'embedding'> & {
similarity: number
})[]
> {
return await this.repository.performSimilaritySearch(
queryVector,
embeddingModel,
options,
)
}
async updateVaultIndex(
embeddingModel: EmbeddingModel,
options: {
chunkSize: number
excludePatterns: string[]
includePatterns: string[]
reindexAll?: boolean
},
updateProgress?: (indexProgress: IndexProgress) => void,
): Promise<void> {
let filesToIndex: TFile[]
if (options.reindexAll) {
filesToIndex = await this.getFilesToIndex({
embeddingModel: embeddingModel,
excludePatterns: options.excludePatterns,
includePatterns: options.includePatterns,
reindexAll: true,
})
await this.repository.clearAllVectors(embeddingModel)
} else {
await this.deleteVectorsForDeletedFiles(embeddingModel)
filesToIndex = await this.getFilesToIndex({
embeddingModel: embeddingModel,
excludePatterns: options.excludePatterns,
includePatterns: options.includePatterns,
})
await this.repository.deleteVectorsForMultipleFiles(
filesToIndex.map((file) => file.path),
embeddingModel,
)
}
if (filesToIndex.length === 0) {
return
}
const textSplitter = RecursiveCharacterTextSplitter.fromLanguage(
'markdown',
{
chunkSize: options.chunkSize,
// TODO: Use token-based chunking after migrating to WebAssembly-based tiktoken
// Current token counting method is too slow for practical use
// lengthFunction: async (text) => {
// return await tokenCount(text)
// },
},
)
const contentChunks: InsertVector[] = (
await Promise.all(
filesToIndex.map(async (file) => {
const fileContent = await this.app.vault.cachedRead(file)
const fileDocuments = await textSplitter.createDocuments([
fileContent,
])
return fileDocuments.map((chunk): InsertVector => {
return {
path: file.path,
mtime: file.stat.mtime,
content: chunk.pageContent,
metadata: {
startLine: chunk.metadata.loc.lines.from as number,
endLine: chunk.metadata.loc.lines.to as number,
},
}
})
}),
)
).flat()
updateProgress?.({
completedChunks: 0,
totalChunks: contentChunks.length,
totalFiles: filesToIndex.length,
})
const embeddingProgress = { completed: 0, inserted: 0 }
const embeddingChunks: InsertVector[] = []
const batchSize = 100
const limit = pLimit(50)
const abortController = new AbortController()
const tasks = contentChunks.map((chunk) =>
limit(async () => {
if (abortController.signal.aborted) {
throw new Error('Operation was aborted')
}
try {
await backOff(
async () => {
const embedding = await embeddingModel.getEmbedding(chunk.content)
const embeddedChunk = {
path: chunk.path,
mtime: chunk.mtime,
content: chunk.content,
embedding,
metadata: chunk.metadata,
}
embeddingChunks.push(embeddedChunk)
embeddingProgress.completed++
updateProgress?.({
completedChunks: embeddingProgress.completed,
totalChunks: contentChunks.length,
totalFiles: filesToIndex.length,
})
// Insert vectors in batches
if (
embeddingChunks.length >=
embeddingProgress.inserted + batchSize ||
embeddingChunks.length === contentChunks.length
) {
await this.repository.insertVectors(
embeddingChunks.slice(
embeddingProgress.inserted,
embeddingProgress.inserted + batchSize,
),
embeddingModel,
)
embeddingProgress.inserted += batchSize
}
},
{
numOfAttempts: 5,
startingDelay: 1000,
timeMultiple: 1.5,
jitter: 'full',
},
)
} catch (error) {
abortController.abort()
throw error
}
}),
)
try {
await Promise.all(tasks)
} catch (error) {
if (
error instanceof LLMAPIKeyNotSetException ||
error instanceof LLMAPIKeyInvalidException ||
error instanceof LLMBaseUrlNotSetException
) {
openSettingsModalWithError(this.app, (error as Error).message)
} else if (error instanceof LLMRateLimitExceededException) {
new Notice(error.message)
} else {
console.error('Error embedding chunks:', error)
throw error
}
} finally {
await this.dbManager.save()
}
}
private async deleteVectorsForDeletedFiles(embeddingModel: EmbeddingModel) {
const indexedFilePaths =
await this.repository.getIndexedFilePaths(embeddingModel)
for (const filePath of indexedFilePaths) {
if (!this.app.vault.getAbstractFileByPath(filePath)) {
await this.repository.deleteVectorsForMultipleFiles(
[filePath],
embeddingModel,
)
}
}
}
private async getFilesToIndex({
embeddingModel,
excludePatterns,
includePatterns,
reindexAll,
}: {
embeddingModel: EmbeddingModel
excludePatterns: string[]
includePatterns: string[]
reindexAll?: boolean
}): Promise<TFile[]> {
let filesToIndex = this.app.vault.getMarkdownFiles()
filesToIndex = filesToIndex.filter((file) => {
return !excludePatterns.some((pattern) => minimatch(file.path, pattern))
})
if (includePatterns.length > 0) {
filesToIndex = filesToIndex.filter((file) => {
return includePatterns.some((pattern) => minimatch(file.path, pattern))
})
}
if (reindexAll) {
return filesToIndex
}
// Check for updated or new files
filesToIndex = await Promise.all(
filesToIndex.map(async (file) => {
const fileChunks = await this.repository.getVectorsByFilePath(
file.path,
embeddingModel,
)
if (fileChunks.length === 0) {
// File is not indexed, so we need to index it
const fileContent = await this.app.vault.cachedRead(file)
if (fileContent.length === 0) {
// Ignore empty files
return null
}
return file
}
const outOfDate = file.stat.mtime > fileChunks[0].mtime
if (outOfDate) {
// File has changed, so we need to re-index it
return file
}
return null
}),
).then((files) => files.filter(Boolean) as TFile[])
return filesToIndex
}
}

View File

@@ -0,0 +1,180 @@
import { PGliteInterface } from '@electric-sql/pglite'
import { App } from 'obsidian'
import { EmbeddingModel } from '../../../types/embedding'
import { DatabaseNotInitializedException } from '../../exception'
import { InsertVector, SelectVector, vectorTables } from '../../schema'
export class VectorRepository {
private app: App
private db: PGliteInterface | null
constructor(app: App, pgClient: PGliteInterface | null) {
this.app = app
this.db = pgClient
}
private getTableName(embeddingModel: EmbeddingModel): string {
const tableDefinition = vectorTables[embeddingModel.dimension]
if (!tableDefinition) {
throw new Error(`No table definition found for model: ${embeddingModel.id}`)
}
return tableDefinition.name
}
async getIndexedFilePaths(embeddingModel: EmbeddingModel): Promise<string[]> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
const result = await this.db.query<{ path: string }>(
`SELECT DISTINCT path FROM "${tableName}"`
)
return result.rows.map((row: { path: string }) => row.path)
}
async getVectorsByFilePath(
filePath: string,
embeddingModel: EmbeddingModel,
): Promise<SelectVector[]> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
const result = await this.db.query<SelectVector>(
`SELECT * FROM "${tableName}" WHERE path = $1`,
[filePath]
)
return result.rows
}
async deleteVectorsForSingleFile(
filePath: string,
embeddingModel: EmbeddingModel,
): Promise<void> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
await this.db.query(
`DELETE FROM "${tableName}" WHERE path = $1`,
[filePath]
)
}
async deleteVectorsForMultipleFiles(
filePaths: string[],
embeddingModel: EmbeddingModel,
): Promise<void> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
await this.db.query(
`DELETE FROM "${tableName}" WHERE path = ANY($1)`,
[filePaths]
)
}
async clearAllVectors(embeddingModel: EmbeddingModel): Promise<void> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
await this.db.query(`DELETE FROM "${tableName}"`)
}
async insertVectors(
data: InsertVector[],
embeddingModel: EmbeddingModel,
): Promise<void> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
// 构建批量插入的 SQL
const values = data.map((vector, index) => {
const offset = index * 5
return `($${offset + 1}, $${offset + 2}, $${offset + 3}, $${offset + 4}, $${offset + 5})`
}).join(',')
const params = data.flatMap(vector => [
vector.path,
vector.mtime,
vector.content,
`[${vector.embedding.join(',')}]`, // 转换为PostgreSQL vector格式
vector.metadata
])
await this.db.query(
`INSERT INTO "${tableName}" (path, mtime, content, embedding, metadata)
VALUES ${values}`,
params
)
}
async performSimilaritySearch(
queryVector: number[],
embeddingModel: EmbeddingModel,
options: {
minSimilarity: number
limit: number
scope?: {
files: string[]
folders: string[]
}
},
): Promise<
(Omit<SelectVector, 'embedding'> & {
similarity: number
})[]
> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
let scopeCondition = ''
const params: any[] = [`[${queryVector.join(',')}]`, options.minSimilarity, options.limit]
let paramIndex = 4
if (options.scope) {
const conditions: string[] = []
if (options.scope.files.length > 0) {
conditions.push(`path = ANY($${paramIndex})`)
params.push(options.scope.files)
paramIndex++
}
if (options.scope.folders.length > 0) {
const folderConditions = options.scope.folders.map((folder, idx) => {
params.push(`${folder}/%`)
return `path LIKE $${paramIndex + idx}`
})
conditions.push(`(${folderConditions.join(' OR ')})`)
paramIndex += options.scope.folders.length
}
if (conditions.length > 0) {
scopeCondition = `AND (${conditions.join(' OR ')})`
}
}
const query = `
SELECT
id, path, mtime, content, metadata,
1 - (embedding <=> $1::vector) as similarity
FROM "${tableName}"
WHERE 1 - (embedding <=> $1::vector) > $2
${scopeCondition}
ORDER BY similarity DESC
LIMIT $3
`
type SearchResult = Omit<SelectVector, 'embedding'> & { similarity: number }
const result = await this.db.query<SearchResult>(query, params)
return result.rows
}
}

7
src/database/pglite-resources.d.ts vendored Normal file
View File

@@ -0,0 +1,7 @@
export interface PgliteResources {
wasmBase64: string;
dataBase64: string;
vectorBase64: string;
}
export const pgliteResources: PgliteResources;

File diff suppressed because one or more lines are too long

156
src/database/schema.ts Normal file
View File

@@ -0,0 +1,156 @@
import { SerializedLexicalNode } from 'lexical'
import { SUPPORT_EMBEDDING_SIMENTION } from '../constants'
import { EmbeddingModelId } from '../types/embedding'
// PostgreSQL column types
interface ColumnDefinition {
type: string
notNull?: boolean
primaryKey?: boolean
defaultRandom?: boolean
unique?: boolean
defaultNow?: boolean
dimensions?: number
}
interface TableDefinition {
name: string
columns: Record<string, ColumnDefinition>
indices?: Record<string, {
type: string
columns: string[]
options?: string
}>
}
/* Vector Table */
const createVectorTable = (dimension: number): TableDefinition => {
const tableName = `embeddings_${dimension}`
const table: TableDefinition = {
name: tableName,
columns: {
id: { type: 'SERIAL', primaryKey: true },
path: { type: 'TEXT', notNull: true },
mtime: { type: 'BIGINT', notNull: true },
content: { type: 'TEXT', notNull: true },
embedding: { type: 'VECTOR', dimensions: dimension },
metadata: { type: 'JSONB', notNull: true },
}
}
if (dimension <= 2000) {
table.indices = {
[`embeddingIndex_${dimension}`]: {
type: 'HNSW',
columns: ['embedding'],
options: 'vector_cosine_ops'
}
}
}
return table
}
export const vectorTables = SUPPORT_EMBEDDING_SIMENTION.reduce<
Record<number, TableDefinition>
>((acc, dimension) => {
acc[dimension] = createVectorTable(dimension)
return acc
}, {})
// Type definitions for vector table
export interface VectorRecord {
id: number
path: string
mtime: number
content: string
embedding: number[]
metadata: VectorMetaData
}
export type SelectVector = VectorRecord
export type InsertVector = Omit<VectorRecord, 'id'>
export type VectorMetaData = {
startLine: number
endLine: number
}
// // Export individual vector tables for reference
// export const vectorTable0 = vectorTables[EMBEDDING_MODEL_OPTIONS[0].id]
// export const vectorTable1 = vectorTables[EMBEDDING_MODEL_OPTIONS[1].id]
// export const vectorTable2 = vectorTables[EMBEDDING_MODEL_OPTIONS[2].id]
// export const vectorTable3 = vectorTables[EMBEDDING_MODEL_OPTIONS[3].id]
// export const vectorTable4 = vectorTables[EMBEDDING_MODEL_OPTIONS[4].id]
// export const vectorTable5 = vectorTables[EMBEDDING_MODEL_OPTIONS[5].id]
/* Template Table */
export type TemplateContent = {
nodes: SerializedLexicalNode[]
}
export interface TemplateRecord {
id: string
name: string
content: TemplateContent
createdAt: Date
updatedAt: Date
}
export type SelectTemplate = TemplateRecord
export type InsertTemplate = Omit<TemplateRecord, 'id' | 'createdAt' | 'updatedAt'>
export const templateTable: TableDefinition = {
name: 'template',
columns: {
id: { type: 'UUID', primaryKey: true, defaultRandom: true },
name: { type: 'TEXT', notNull: true, unique: true },
content: { type: 'JSONB', notNull: true },
createdAt: { type: 'TIMESTAMP', notNull: true, defaultNow: true },
updatedAt: { type: 'TIMESTAMP', notNull: true, defaultNow: true }
}
}
export interface Conversation {
id: string // uuid
title: string
createdAt: Date
updatedAt: Date
}
export interface Message {
id: string // uuid
conversationId: string // uuid
role: 'user' | 'assistant'
content: string | null
promptContent?: string | null
metadata?: string | null
mentionables?: string | null
similaritySearchResults?: string | null
createdAt: Date
}
export type InsertConversation = {
id: string
title: string
createdAt?: Date
updatedAt?: Date
}
export type SelectConversation = Conversation
export type InsertMessage = {
id: string
conversationId: string
role: 'user' | 'assistant'
content: string | null
promptContent?: string | null
metadata?: string | null
mentionables?: string | null
similaritySearchResults?: string | null
createdAt?: Date
}
export type SelectMessage = Message

118
src/database/sql.ts Normal file
View File

@@ -0,0 +1,118 @@
export interface SqlMigration {
description: string;
sql: string;
}
export const migrations: Record<string, SqlMigration> = {
vector: {
description: "Creates vector tables and indexes for different models",
sql: `
-- Enable required extensions
CREATE EXTENSION IF NOT EXISTS vector;
-- Create vector tables for different models
CREATE TABLE IF NOT EXISTS "embeddings_1536" (
"id" serial PRIMARY KEY NOT NULL,
"path" text NOT NULL,
"mtime" bigint NOT NULL,
"content" text NOT NULL,
"embedding" vector(1536),
"metadata" jsonb NOT NULL
);
CREATE TABLE IF NOT EXISTS "embeddings_1024" (
"id" serial PRIMARY KEY NOT NULL,
"path" text NOT NULL,
"mtime" bigint NOT NULL,
"content" text NOT NULL,
"embedding" vector(1024),
"metadata" jsonb NOT NULL
);
CREATE TABLE IF NOT EXISTS "embeddings_768" (
"id" serial PRIMARY KEY NOT NULL,
"path" text NOT NULL,
"mtime" bigint NOT NULL,
"content" text NOT NULL,
"embedding" vector(768),
"metadata" jsonb NOT NULL
);
CREATE TABLE IF NOT EXISTS "embeddings_512" (
"id" serial PRIMARY KEY NOT NULL,
"path" text NOT NULL,
"mtime" bigint NOT NULL,
"content" text NOT NULL,
"embedding" vector(512),
"metadata" jsonb NOT NULL
);
CREATE TABLE IF NOT EXISTS "embeddings_384" (
"id" serial PRIMARY KEY NOT NULL,
"path" text NOT NULL,
"mtime" bigint NOT NULL,
"content" text NOT NULL,
"embedding" vector(384),
"metadata" jsonb NOT NULL
);
-- Create HNSW indexes for vector similarity search
CREATE INDEX IF NOT EXISTS "embeddingIndex_1536"
ON "embeddings_1536"
USING hnsw ("embedding" vector_cosine_ops);
CREATE INDEX IF NOT EXISTS "embeddingIndex_1024"
ON "embeddings_1024"
USING hnsw ("embedding" vector_cosine_ops);
CREATE INDEX IF NOT EXISTS "embeddingIndex_768"
ON "embeddings_768"
USING hnsw ("embedding" vector_cosine_ops);
CREATE INDEX IF NOT EXISTS "embeddingIndex_512"
ON "embeddings_512"
USING hnsw ("embedding" vector_cosine_ops);
CREATE INDEX IF NOT EXISTS "embeddingIndex_384"
ON "embeddings_384"
USING hnsw ("embedding" vector_cosine_ops);
`
},
template: {
description: "Creates template table with UUID support",
sql: `
-- Create template table
CREATE TABLE IF NOT EXISTS "template" (
"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
"name" text NOT NULL,
"content" jsonb NOT NULL,
"created_at" timestamp DEFAULT now() NOT NULL,
"updated_at" timestamp DEFAULT now() NOT NULL,
CONSTRAINT "template_name_unique" UNIQUE("name")
);
`
},
conversation: {
description: "Creates conversations and messages tables",
sql: `
CREATE TABLE IF NOT EXISTS "conversations" (
"id" uuid PRIMARY KEY NOT NULL,
"title" text NOT NULL,
"created_at" timestamp DEFAULT now() NOT NULL,
"updated_at" timestamp DEFAULT now() NOT NULL
);
CREATE TABLE IF NOT EXISTS "messages" (
"id" uuid PRIMARY KEY NOT NULL,
"conversation_id" uuid NOT NULL REFERENCES "conversations"("id") ON DELETE CASCADE,
"role" text NOT NULL,
"content" text,
"prompt_content" text,
"metadata" text,
"mentionables" text,
"similarity_search_results" text,
"created_at" timestamp DEFAULT now() NOT NULL
);
`
}
};