This commit is contained in:
duanfuxiang
2025-01-05 11:51:39 +08:00
commit 0c7ee142cb
215 changed files with 20611 additions and 0 deletions

View File

@@ -0,0 +1,162 @@
import { SerializedEditorState } from 'lexical'
import { App } from 'obsidian'
import { editorStateToPlainText } from '../../../components/chat-view/chat-input/utils/editor-state-to-plain-text'
import { ChatAssistantMessage, ChatConversationMeta, ChatMessage, ChatUserMessage } from '../../../types/chat'
import { ContentPart } from '../../../types/llm/request'
import { Mentionable, SerializedMentionable } from '../../../types/mentionable'
import { deserializeMentionable, serializeMentionable } from '../../../utils/mentionable'
import { DBManager } from '../../database-manager'
import { InsertMessage } from '../../schema'
import { ConversationRepository } from './conversation-repository'
export class ConversationManager {
private app: App
private repository: ConversationRepository
private dbManager: DBManager
constructor(app: App, dbManager: DBManager) {
this.app = app
this.dbManager = dbManager
const db = dbManager.getPgClient()
if (!db) throw new Error('Database not initialized')
this.repository = new ConversationRepository(app, db)
}
async createConversation(id: string, title = 'New chat'): Promise<void> {
const conversation = {
id,
title,
createdAt: new Date(),
updatedAt: new Date(),
}
await this.repository.create(conversation)
await this.dbManager.save()
}
async saveConversation(id: string, messages: ChatMessage[]): Promise<void> {
const conversation = await this.repository.findById(id)
if (!conversation) {
let title = 'New chat'
if (messages.length > 0 && messages[0].role === 'user') {
const query = editorStateToPlainText(messages[0].content)
if (query.length > 20) {
title = `${query.slice(0, 20)}...`
} else {
title = query
}
}
await this.createConversation(id, title)
}
// Delete existing messages
await this.repository.deleteAllMessagesFromConversation(id)
// Insert new messages
for (const message of messages) {
const insertMessage = this.serializeMessage(message, id)
await this.repository.createMessage(insertMessage)
}
// Update conversation timestamp
await this.repository.update(id, { updatedAt: new Date() })
await this.dbManager.save()
}
async findConversation(id: string): Promise<ChatMessage[] | null> {
const conversation = await this.repository.findById(id)
if (!conversation) {
return null
}
const messages = await this.repository.findMessagesByConversationId(id)
return messages.map(msg => this.deserializeMessage(msg))
}
async deleteConversation(id: string): Promise<void> {
await this.repository.delete(id)
await this.dbManager.save()
}
getAllConversations(callback: (conversations: ChatConversationMeta[]) => void): void {
const db = this.dbManager.getPgClient()
db?.live.query('SELECT * FROM conversations ORDER BY updated_at', [], (results) => {
callback(results.rows.map(conv => ({
id: conv.id,
title: conv.title,
schemaVersion: 2,
createdAt: conv.createdAt instanceof Date ? conv.createdAt.getTime() : conv.createdAt,
updatedAt: conv.updatedAt instanceof Date ? conv.updatedAt.getTime() : conv.updatedAt,
})))
})
}
async updateConversationTitle(id: string, title: string): Promise<void> {
await this.repository.update(id, { title })
await this.dbManager.save()
}
private serializeMessage(message: ChatMessage, conversationId: string): InsertMessage {
const base = {
id: message.id,
conversationId,
role: message.role,
createdAt: new Date(),
}
if (message.role === 'user') {
const userMessage: ChatUserMessage = message
return {
...base,
content: userMessage.content ? JSON.stringify(userMessage.content) : null,
promptContent: userMessage.promptContent
? typeof userMessage.promptContent === 'string'
? userMessage.promptContent
: JSON.stringify(userMessage.promptContent)
: null,
mentionables: JSON.stringify(userMessage.mentionables.map(serializeMentionable)),
similaritySearchResults: userMessage.similaritySearchResults
? JSON.stringify(userMessage.similaritySearchResults)
: null,
}
} else {
const assistantMessage: ChatAssistantMessage = message
return {
...base,
content: assistantMessage.content,
metadata: assistantMessage.metadata ? JSON.stringify(assistantMessage.metadata) : null,
}
}
}
private deserializeMessage(message: InsertMessage): ChatMessage {
if (message.role === 'user') {
return {
id: message.id,
role: 'user',
content: message.content ? JSON.parse(message.content) as SerializedEditorState : null,
promptContent: message.promptContent
? message.promptContent.startsWith('{')
? JSON.parse(message.promptContent) as ContentPart[]
: message.promptContent
: null,
mentionables: message.mentionables
? (JSON.parse(message.mentionables) as SerializedMentionable[])
.map(m => deserializeMentionable(m, this.app))
.filter((m: Mentionable | null): m is Mentionable => m !== null)
: [],
similaritySearchResults: message.similaritySearchResults
? JSON.parse(message.similaritySearchResults)
: undefined,
}
} else {
return {
id: message.id,
role: 'assistant',
content: message.content || '',
metadata: message.metadata ? JSON.parse(message.metadata) : undefined,
}
}
}
}

View File

@@ -0,0 +1,131 @@
import { PGliteInterface } from '@electric-sql/pglite'
import { App } from 'obsidian'
import {
InsertConversation,
InsertMessage,
SelectConversation,
SelectMessage,
} from '../../schema'
type QueryResult<T> = {
rows: T[]
}
export class ConversationRepository {
private app: App
private db: PGliteInterface
constructor(app: App, db: PGliteInterface) {
this.app = app
this.db = db
}
async create(conversation: InsertConversation): Promise<SelectConversation> {
const result = await this.db.query<SelectConversation>(
`INSERT INTO conversations (id, title, created_at, updated_at)
VALUES ($1, $2, $3, $4)
RETURNING *`,
[
conversation.id,
conversation.title,
conversation.createdAt || new Date(),
conversation.updatedAt || new Date()
]
) as QueryResult<SelectConversation>
return result.rows[0]
}
async createMessage(message: InsertMessage): Promise<SelectMessage> {
const result = await this.db.query<SelectMessage>(
`INSERT INTO messages (
id, conversation_id, role, content,
prompt_content, metadata, mentionables,
similarity_search_results, created_at
)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
RETURNING *`,
[
message.id,
message.conversationId,
message.role,
message.content,
message.promptContent,
message.metadata,
message.mentionables,
message.similaritySearchResults,
message.createdAt || new Date()
]
) as QueryResult<SelectMessage>
return result.rows[0]
}
async findById(id: string): Promise<SelectConversation | undefined> {
const result = await this.db.query<SelectConversation>(
`SELECT * FROM conversations WHERE id = $1 LIMIT 1`,
[id]
) as QueryResult<SelectConversation>
return result.rows[0]
}
async findMessagesByConversationId(conversationId: string): Promise<SelectMessage[]> {
const result = await this.db.query<SelectMessage>(
`SELECT * FROM messages
WHERE conversation_id = $1
ORDER BY created_at`,
[conversationId]
) as QueryResult<SelectMessage>
return result.rows
}
async findAll(): Promise<SelectConversation[]> {
const result = await this.db.query<SelectConversation>(
`SELECT * FROM conversations ORDER BY updated_at DESC`
) as QueryResult<SelectConversation>
return result.rows
}
async update(id: string, data: Partial<InsertConversation>): Promise<SelectConversation> {
const setClauses: string[] = []
const values: any[] = []
let paramIndex = 1
if (data.title !== undefined) {
setClauses.push(`title = $${paramIndex}`)
values.push(data.title)
paramIndex++
}
// Always update updated_at
setClauses.push(`updated_at = $${paramIndex}`)
values.push(new Date())
paramIndex++
// Add id as the last parameter
values.push(id)
const result = await this.db.query<SelectConversation>(
`UPDATE conversations
SET ${setClauses.join(', ')}
WHERE id = $${paramIndex}
RETURNING *`,
values
) as QueryResult<SelectConversation>
return result.rows[0]
}
async delete(id: string): Promise<boolean> {
const result = await this.db.query<SelectConversation>(
`DELETE FROM conversations WHERE id = $1 RETURNING *`,
[id]
) as QueryResult<SelectConversation>
return result.rows.length > 0
}
async deleteAllMessagesFromConversation(conversationId: string): Promise<void> {
await this.db.query(
`DELETE FROM messages WHERE conversation_id = $1`,
[conversationId]
)
}
}

View File

@@ -0,0 +1,51 @@
import fuzzysort from 'fuzzysort'
import { App } from 'obsidian'
import { DBManager } from '../../database-manager'
import { DuplicateTemplateException } from '../../exception'
import { InsertTemplate, SelectTemplate } from '../../schema'
import { TemplateRepository } from './template-repository'
export class TemplateManager {
private app: App
private repository: TemplateRepository
private dbManager: DBManager
constructor(app: App, dbManager: DBManager) {
this.app = app
this.dbManager = dbManager
this.repository = new TemplateRepository(app, dbManager.getPgClient())
}
async createTemplate(template: InsertTemplate): Promise<SelectTemplate> {
const existingTemplate = await this.repository.findByName(template.name)
if (existingTemplate) {
throw new DuplicateTemplateException(template.name)
}
const created = await this.repository.create(template)
await this.dbManager.save()
return created
}
async findAllTemplates(): Promise<SelectTemplate[]> {
return await this.repository.findAll()
}
async searchTemplates(query: string): Promise<SelectTemplate[]> {
const templates = await this.findAllTemplates()
const results = fuzzysort.go(query, templates, {
keys: ['name'],
threshold: 0.2,
limit: 20,
all: true,
})
return results.map((result) => result.obj)
}
async deleteTemplate(id: string): Promise<boolean> {
const deleted = await this.repository.delete(id)
await this.dbManager.save()
return deleted
}
}

View File

@@ -0,0 +1,98 @@
import { PGliteInterface } from '@electric-sql/pglite'
import { App } from 'obsidian'
import { DatabaseNotInitializedException } from '../../exception'
import { type InsertTemplate, type SelectTemplate } from '../../schema'
export class TemplateRepository {
private app: App
private db: PGliteInterface | null
constructor(app: App, pgClient: PGliteInterface | null) {
this.app = app
this.db = pgClient
}
async create(template: InsertTemplate): Promise<SelectTemplate> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const result = await this.db.query<SelectTemplate>(
`INSERT INTO "template" (name, content)
VALUES ($1, $2)
RETURNING *`,
[template.name, template.content]
)
return result.rows[0]
}
async findAll(): Promise<SelectTemplate[]> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const result = await this.db.query<SelectTemplate>(
`SELECT * FROM "template"`
)
return result.rows
}
async findByName(name: string): Promise<SelectTemplate | null> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const result = await this.db.query<SelectTemplate>(
`SELECT * FROM "template" WHERE name = $1`,
[name]
)
return result.rows[0] ?? null
}
async update(
id: string,
template: Partial<InsertTemplate>,
): Promise<SelectTemplate | null> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const setClauses: string[] = []
const params: any[] = []
let paramIndex = 1
if (template.name !== undefined) {
setClauses.push(`name = $${paramIndex}`)
params.push(template.name)
paramIndex++
}
if (template.content !== undefined) {
setClauses.push(`content = $${paramIndex}`)
params.push(template.content)
paramIndex++
}
setClauses.push(`updated_at = now()`)
params.push(id)
const result = await this.db.query<SelectTemplate>(
`UPDATE "template"
SET ${setClauses.join(', ')}
WHERE id = $${paramIndex}
RETURNING *`,
params
)
return result.rows[0] ?? null
}
async delete(id: string): Promise<boolean> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const result = await this.db.query<SelectTemplate>(
`DELETE FROM "template" WHERE id = $1 RETURNING *`,
[id]
)
return result.rows.length > 0
}
}

View File

@@ -0,0 +1,277 @@
import { backOff } from 'exponential-backoff'
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'
import { minimatch } from 'minimatch'
import { App, Notice, TFile } from 'obsidian'
import pLimit from 'p-limit'
import { IndexProgress } from '../../../components/chat-view/QueryProgress'
import {
LLMAPIKeyInvalidException,
LLMAPIKeyNotSetException,
LLMBaseUrlNotSetException,
LLMRateLimitExceededException,
} from '../../../core/llm/exception'
import { InsertVector, SelectVector } from '../../../database/schema'
import { EmbeddingModel } from '../../../types/embedding'
import { openSettingsModalWithError } from '../../../utils/open-settings-modal'
import { DBManager } from '../../database-manager'
import { VectorRepository } from './vector-repository'
export class VectorManager {
private app: App
private repository: VectorRepository
private dbManager: DBManager
constructor(app: App, dbManager: DBManager) {
this.app = app
this.dbManager = dbManager
this.repository = new VectorRepository(app, dbManager.getPgClient())
}
async performSimilaritySearch(
queryVector: number[],
embeddingModel: EmbeddingModel,
options: {
minSimilarity: number
limit: number
scope?: {
files: string[]
folders: string[]
}
},
): Promise<
(Omit<SelectVector, 'embedding'> & {
similarity: number
})[]
> {
return await this.repository.performSimilaritySearch(
queryVector,
embeddingModel,
options,
)
}
async updateVaultIndex(
embeddingModel: EmbeddingModel,
options: {
chunkSize: number
excludePatterns: string[]
includePatterns: string[]
reindexAll?: boolean
},
updateProgress?: (indexProgress: IndexProgress) => void,
): Promise<void> {
let filesToIndex: TFile[]
if (options.reindexAll) {
filesToIndex = await this.getFilesToIndex({
embeddingModel: embeddingModel,
excludePatterns: options.excludePatterns,
includePatterns: options.includePatterns,
reindexAll: true,
})
await this.repository.clearAllVectors(embeddingModel)
} else {
await this.deleteVectorsForDeletedFiles(embeddingModel)
filesToIndex = await this.getFilesToIndex({
embeddingModel: embeddingModel,
excludePatterns: options.excludePatterns,
includePatterns: options.includePatterns,
})
await this.repository.deleteVectorsForMultipleFiles(
filesToIndex.map((file) => file.path),
embeddingModel,
)
}
if (filesToIndex.length === 0) {
return
}
const textSplitter = RecursiveCharacterTextSplitter.fromLanguage(
'markdown',
{
chunkSize: options.chunkSize,
// TODO: Use token-based chunking after migrating to WebAssembly-based tiktoken
// Current token counting method is too slow for practical use
// lengthFunction: async (text) => {
// return await tokenCount(text)
// },
},
)
const contentChunks: InsertVector[] = (
await Promise.all(
filesToIndex.map(async (file) => {
const fileContent = await this.app.vault.cachedRead(file)
const fileDocuments = await textSplitter.createDocuments([
fileContent,
])
return fileDocuments.map((chunk): InsertVector => {
return {
path: file.path,
mtime: file.stat.mtime,
content: chunk.pageContent,
metadata: {
startLine: chunk.metadata.loc.lines.from as number,
endLine: chunk.metadata.loc.lines.to as number,
},
}
})
}),
)
).flat()
updateProgress?.({
completedChunks: 0,
totalChunks: contentChunks.length,
totalFiles: filesToIndex.length,
})
const embeddingProgress = { completed: 0, inserted: 0 }
const embeddingChunks: InsertVector[] = []
const batchSize = 100
const limit = pLimit(50)
const abortController = new AbortController()
const tasks = contentChunks.map((chunk) =>
limit(async () => {
if (abortController.signal.aborted) {
throw new Error('Operation was aborted')
}
try {
await backOff(
async () => {
const embedding = await embeddingModel.getEmbedding(chunk.content)
const embeddedChunk = {
path: chunk.path,
mtime: chunk.mtime,
content: chunk.content,
embedding,
metadata: chunk.metadata,
}
embeddingChunks.push(embeddedChunk)
embeddingProgress.completed++
updateProgress?.({
completedChunks: embeddingProgress.completed,
totalChunks: contentChunks.length,
totalFiles: filesToIndex.length,
})
// Insert vectors in batches
if (
embeddingChunks.length >=
embeddingProgress.inserted + batchSize ||
embeddingChunks.length === contentChunks.length
) {
await this.repository.insertVectors(
embeddingChunks.slice(
embeddingProgress.inserted,
embeddingProgress.inserted + batchSize,
),
embeddingModel,
)
embeddingProgress.inserted += batchSize
}
},
{
numOfAttempts: 5,
startingDelay: 1000,
timeMultiple: 1.5,
jitter: 'full',
},
)
} catch (error) {
abortController.abort()
throw error
}
}),
)
try {
await Promise.all(tasks)
} catch (error) {
if (
error instanceof LLMAPIKeyNotSetException ||
error instanceof LLMAPIKeyInvalidException ||
error instanceof LLMBaseUrlNotSetException
) {
openSettingsModalWithError(this.app, (error as Error).message)
} else if (error instanceof LLMRateLimitExceededException) {
new Notice(error.message)
} else {
console.error('Error embedding chunks:', error)
throw error
}
} finally {
await this.dbManager.save()
}
}
private async deleteVectorsForDeletedFiles(embeddingModel: EmbeddingModel) {
const indexedFilePaths =
await this.repository.getIndexedFilePaths(embeddingModel)
for (const filePath of indexedFilePaths) {
if (!this.app.vault.getAbstractFileByPath(filePath)) {
await this.repository.deleteVectorsForMultipleFiles(
[filePath],
embeddingModel,
)
}
}
}
private async getFilesToIndex({
embeddingModel,
excludePatterns,
includePatterns,
reindexAll,
}: {
embeddingModel: EmbeddingModel
excludePatterns: string[]
includePatterns: string[]
reindexAll?: boolean
}): Promise<TFile[]> {
let filesToIndex = this.app.vault.getMarkdownFiles()
filesToIndex = filesToIndex.filter((file) => {
return !excludePatterns.some((pattern) => minimatch(file.path, pattern))
})
if (includePatterns.length > 0) {
filesToIndex = filesToIndex.filter((file) => {
return includePatterns.some((pattern) => minimatch(file.path, pattern))
})
}
if (reindexAll) {
return filesToIndex
}
// Check for updated or new files
filesToIndex = await Promise.all(
filesToIndex.map(async (file) => {
const fileChunks = await this.repository.getVectorsByFilePath(
file.path,
embeddingModel,
)
if (fileChunks.length === 0) {
// File is not indexed, so we need to index it
const fileContent = await this.app.vault.cachedRead(file)
if (fileContent.length === 0) {
// Ignore empty files
return null
}
return file
}
const outOfDate = file.stat.mtime > fileChunks[0].mtime
if (outOfDate) {
// File has changed, so we need to re-index it
return file
}
return null
}),
).then((files) => files.filter(Boolean) as TFile[])
return filesToIndex
}
}

View File

@@ -0,0 +1,180 @@
import { PGliteInterface } from '@electric-sql/pglite'
import { App } from 'obsidian'
import { EmbeddingModel } from '../../../types/embedding'
import { DatabaseNotInitializedException } from '../../exception'
import { InsertVector, SelectVector, vectorTables } from '../../schema'
export class VectorRepository {
private app: App
private db: PGliteInterface | null
constructor(app: App, pgClient: PGliteInterface | null) {
this.app = app
this.db = pgClient
}
private getTableName(embeddingModel: EmbeddingModel): string {
const tableDefinition = vectorTables[embeddingModel.dimension]
if (!tableDefinition) {
throw new Error(`No table definition found for model: ${embeddingModel.id}`)
}
return tableDefinition.name
}
async getIndexedFilePaths(embeddingModel: EmbeddingModel): Promise<string[]> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
const result = await this.db.query<{ path: string }>(
`SELECT DISTINCT path FROM "${tableName}"`
)
return result.rows.map((row: { path: string }) => row.path)
}
async getVectorsByFilePath(
filePath: string,
embeddingModel: EmbeddingModel,
): Promise<SelectVector[]> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
const result = await this.db.query<SelectVector>(
`SELECT * FROM "${tableName}" WHERE path = $1`,
[filePath]
)
return result.rows
}
async deleteVectorsForSingleFile(
filePath: string,
embeddingModel: EmbeddingModel,
): Promise<void> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
await this.db.query(
`DELETE FROM "${tableName}" WHERE path = $1`,
[filePath]
)
}
async deleteVectorsForMultipleFiles(
filePaths: string[],
embeddingModel: EmbeddingModel,
): Promise<void> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
await this.db.query(
`DELETE FROM "${tableName}" WHERE path = ANY($1)`,
[filePaths]
)
}
async clearAllVectors(embeddingModel: EmbeddingModel): Promise<void> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
await this.db.query(`DELETE FROM "${tableName}"`)
}
async insertVectors(
data: InsertVector[],
embeddingModel: EmbeddingModel,
): Promise<void> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
// 构建批量插入的 SQL
const values = data.map((vector, index) => {
const offset = index * 5
return `($${offset + 1}, $${offset + 2}, $${offset + 3}, $${offset + 4}, $${offset + 5})`
}).join(',')
const params = data.flatMap(vector => [
vector.path,
vector.mtime,
vector.content,
`[${vector.embedding.join(',')}]`, // 转换为PostgreSQL vector格式
vector.metadata
])
await this.db.query(
`INSERT INTO "${tableName}" (path, mtime, content, embedding, metadata)
VALUES ${values}`,
params
)
}
async performSimilaritySearch(
queryVector: number[],
embeddingModel: EmbeddingModel,
options: {
minSimilarity: number
limit: number
scope?: {
files: string[]
folders: string[]
}
},
): Promise<
(Omit<SelectVector, 'embedding'> & {
similarity: number
})[]
> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
let scopeCondition = ''
const params: any[] = [`[${queryVector.join(',')}]`, options.minSimilarity, options.limit]
let paramIndex = 4
if (options.scope) {
const conditions: string[] = []
if (options.scope.files.length > 0) {
conditions.push(`path = ANY($${paramIndex})`)
params.push(options.scope.files)
paramIndex++
}
if (options.scope.folders.length > 0) {
const folderConditions = options.scope.folders.map((folder, idx) => {
params.push(`${folder}/%`)
return `path LIKE $${paramIndex + idx}`
})
conditions.push(`(${folderConditions.join(' OR ')})`)
paramIndex += options.scope.folders.length
}
if (conditions.length > 0) {
scopeCondition = `AND (${conditions.join(' OR ')})`
}
}
const query = `
SELECT
id, path, mtime, content, metadata,
1 - (embedding <=> $1::vector) as similarity
FROM "${tableName}"
WHERE 1 - (embedding <=> $1::vector) > $2
${scopeCondition}
ORDER BY similarity DESC
LIMIT $3
`
type SearchResult = Omit<SelectVector, 'embedding'> & { similarity: number }
const result = await this.db.query<SearchResult>(query, params)
return result.rows
}
}