use web worker to run pglite

This commit is contained in:
duanfuxiang
2025-03-19 21:01:32 +08:00
parent 76288377c3
commit 679d7142eb
25 changed files with 985 additions and 461 deletions

View File

@@ -14,7 +14,6 @@ import {
} from 'react'
import { v4 as uuidv4 } from 'uuid'
import { ModeSelect } from './chat-input/ModeSelect'
import { ApplyViewState } from '../../ApplyView'
import { APPLY_VIEW_TYPE } from '../../constants'
import { useApp } from '../../contexts/AppContext'
@@ -53,6 +52,7 @@ const readFileContent = (filePath: string): string => {
return `Content of file: ${filePath}`;
}
import { ModeSelect } from './chat-input/ModeSelect'
import PromptInputWithActions, { ChatUserInputRef } from './chat-input/PromptInputWithActions'
import { editorStateToPlainText } from './chat-input/utils/editor-state-to-plain-text'
import { ChatHistory } from './ChatHistory'

View File

@@ -1,16 +1,15 @@
import * as Tooltip from '@radix-ui/react-tooltip';
import { Check, CircleCheckBig, CircleHelp, CopyIcon, FilePlus2 } from 'lucide-react';
import { ComponentPropsWithoutRef, useState } from 'react';
import { ReactNode, useState } from 'react';
import ReactMarkdown from 'react-markdown';
import rehypeRaw from 'rehype-raw';
import { useApp } from 'src/contexts/AppContext';
function CopyButton({ message }: { message: string }) {
const [copied, setCopied] = useState(false)
const handleCopy = async () => {
await navigator.clipboard.writeText(message.trim())
await navigator.clipboard.writeText(message)
setCopied(true)
setTimeout(() => {
setCopied(false)
@@ -49,8 +48,6 @@ function CreateNewFileButton({ message }: { message: string }) {
const handleCreate = async () => {
const firstLine = message.split('\n')[0].trim().replace(/[\\\/:]/g, '');
const filename = firstLine.slice(0, 200) + (firstLine.length > 200 ? '...' : '') || 'untitled';
console.log('filename', filename)
console.log('message', message)
await app.vault.create(`/${filename}.md`, message)
setCreated(true)
setTimeout(() => {
@@ -68,7 +65,7 @@ function CreateNewFileButton({ message }: { message: string }) {
className="infio-chat-message-actions-icon--copied"
/>
) : (
<FilePlus2 onClick={handleCreate} size={12} />
<FilePlus2 onClick={handleCreate} size={12} />
)}
</button>
</Tooltip.Trigger>
@@ -82,58 +79,70 @@ function CreateNewFileButton({ message }: { message: string }) {
)
}
const MarkdownWithIcons = ({ markdownContent, className }: { markdownContent: string, className?: string }) => {
// 预处理markdown内容将<icon>标签转换为ReactMarkdown可以处理的格式
const processedContent = markdownContent.replace(
/<icon\s+name=['"]([^'"]+)['"]\s+size=\{(\d+)\}(\s+className=['"]([^'"]+)['"])?[^>]*\/>/g,
(_, name, size, __, className) =>
`<span data-icon="${name}" data-size="${size}" ${className ? `class="${className}"` : ''}></span>`
);
type IconType = 'ask_followup_question' | 'attempt_completion';
const rawContent = markdownContent.replace(
/<icon\s+name=['"]([^'"]+)['"]\s+size=\{(\d+)\}(\s+className=['"]([^'"]+)['"])?[^>]*\/>/g,
() => ``
).trim();
interface MarkdownWithIconsProps {
markdownContent: string;
finish: boolean
className?: string;
iconName?: IconType;
iconSize?: number;
iconClassName?: string;
}
const components = {
span: (props: ComponentPropsWithoutRef<'span'> & {
'data-icon'?: string;
'data-size'?: string;
}) => {
if (props['data-icon']) {
const name = props['data-icon'];
const size = props['data-size'] ? Number(props['data-size']) : 16;
const className = props.className || '';
const MarkdownWithIcons = ({
markdownContent,
finish,
className,
iconName,
iconSize = 14,
iconClassName = "infio-markdown-icon"
}: MarkdownWithIconsProps) => {
// Handle icon rendering directly without string manipulation
const renderIcon = (): ReactNode => {
if (!iconName) return null;
switch (name) {
case 'ask_followup_question':
return <CircleHelp size={size} className={className} />;
case 'attempt_completion':
return <CircleCheckBig size={size} className={className} />;
default:
return null;
}
}
return <span {...props} />;
},
switch (iconName) {
case 'ask_followup_question':
return <CircleHelp size={iconSize} className={iconClassName} />;
case 'attempt_completion':
return <CircleCheckBig size={iconSize} className={iconClassName} />;
default:
return null;
}
};
const renderTitle = (): ReactNode => {
if (!iconName) return null;
switch (iconName) {
case 'ask_followup_question':
return 'Ask Followup Question:';
case 'attempt_completion':
return 'Task Completion';
default:
return null;
}
};
// Component for markdown content
return (
<>
<ReactMarkdown
className={`${className}`}
components={components}
rehypePlugins={[rehypeRaw]}
>
{processedContent}
</ReactMarkdown>
{processedContent &&
<div className={`${className}`}>
<span>{iconName && renderIcon()} {renderTitle()}</span>
<ReactMarkdown
className={`${className}`}
rehypePlugins={[rehypeRaw]}
>
{markdownContent}
</ReactMarkdown>
</div>
{markdownContent && finish &&
<div className="infio-chat-message-actions">
<CopyButton message={rawContent} />
<CreateNewFileButton message={rawContent} />
<CopyButton message={markdownContent} />
<CreateNewFileButton message={markdownContent} />
</div>}
</>
);
};

View File

@@ -123,16 +123,22 @@ function ReactMarkdown({
<MarkdownWithIcons
key={"attempt-completion-" + index}
className="infio-markdown infio-attempt-completion"
markdownContent={
`<icon name='attempt_completion' size={14} className="infio-markdown-icon" />
${block.result && block.result.trimStart()}`} />
markdownContent={block.result}
finish={block.finish}
iconName="attempt_completion"
iconSize={14}
iconClassName="infio-markdown-icon"
/>
) : block.type === 'ask_followup_question' ? (
<MarkdownWithIcons
key={"ask-followup-question-" + index}
className="infio-markdown infio-followup-question"
markdownContent={
`<icon name='ask_followup_question' size={14} className="infio-markdown-icon" />
${block.question && block.question.trimStart()}`} />
markdownContent={block.question}
finish={block.finish}
iconName="ask_followup_question"
iconSize={14}
iconClassName="infio-markdown-icon"
/>
) : block.type === 'switch_mode' ? (
<MarkdownSwitchModeBlock
key={"switch-mode-" + index}

View File

@@ -60,6 +60,7 @@ export class GeminiProvider implements BaseLLMProvider {
: undefined
try {
console.log(request)
const model = this.client.getGenerativeModel({
model: request.model,
generationConfig: {

View File

@@ -28,12 +28,9 @@ function getDeepResearchCapabilitiesSection(): string {
CAPABILITIES
- You have access to tools that let you search the web using internet search engines like Google to find relevant information on current events, facts, data, and other online content.
- Using search_web, you can simulate a human research process: first searching with relevant keywords to obtain initial results (containing URLs, titles, and content snippets).
- You should evaluate the relevance and reliability of each search result based on its title and content snippet, then select the most relevant URLs for deeper investigation.
- Use fetch_urls_content to retrieve complete webpage content from selected URLs to gain detailed information.
- You can conduct multiple rounds of searches and content retrieval (maximum 3 rounds), optimizing your search keywords in each round based on previously gathered information, just as a human would perform deep research.
- Synthesize all collected information to answer the user's questions comprehensively, accurately, and in a well-structured manner, citing information sources when appropriate.
`
- Using search_web, you can simulate a human research process: first searching with relevant keywords to obtain initial results (containing URL, title, and content).
- Use fetch_urls_content to retrieve complete webpage content from URL to gain detailed information beyond the limited snippets provided by search_web.
- Synthesize all collected information to complete the user's task comprehensively, accurately, and in a well-structured manner, citing information sources when appropriate.`
}
export function getCapabilitiesSection(
@@ -45,4 +42,4 @@ export function getCapabilitiesSection(
return getDeepResearchCapabilitiesSection();
}
return getObsidianCapabilitiesSection(cwd, searchWebTool);
}
}

View File

@@ -16,15 +16,17 @@ Usage:
<query>Your search query here</query>
</search_web>
Examples:
Examples1:
<search_web>
<query>capital of France population statistics 2023</query>
</search_web>
Examples2:
<search_web>
<query>"renewable energy" growth statistics Europe</query>
</search_web>
Examples3:
<search_web>
<query>react vs angular vs vue.js comparison</query>
</search_web>`

View File

@@ -1,4 +1,4 @@
import { App } from 'obsidian'
import { App, TFile } from 'obsidian'
import { QueryProgressState } from '../../components/chat-view/QueryProgress'
import { DBManager } from '../../database/database-manager'
@@ -13,7 +13,8 @@ export class RAGEngine {
private app: App
private settings: InfioSettings
private vectorManager: VectorManager
private embeddingModel: EmbeddingModel | null = null
private embeddingModel: EmbeddingModel | null = null
private initialized = false
constructor(
app: App,
@@ -23,7 +24,7 @@ export class RAGEngine {
this.app = app
this.settings = settings
this.vectorManager = dbManager.getVectorManager()
this.embeddingModel = getEmbeddingModel(settings)
this.embeddingModel = getEmbeddingModel(settings)
}
setSettings(settings: InfioSettings) {
@@ -34,16 +35,14 @@ export class RAGEngine {
// TODO: Implement automatic vault re-indexing when settings are changed.
// Currently, users must manually re-index the vault.
async updateVaultIndex(
options: { reindexAll: boolean } = {
reindexAll: false,
},
options: { reindexAll: boolean },
onQueryProgressChange?: (queryProgress: QueryProgressState) => void,
): Promise<void> {
if (!this.embeddingModel) {
throw new Error('Embedding model is not set')
): Promise<void> {
if (!this.embeddingModel) {
throw new Error('Embedding model is not set')
}
await this.vectorManager.updateVaultIndex(
this.embeddingModel,
this.embeddingModel,
{
chunkSize: this.settings.ragOptions.chunkSize,
excludePatterns: this.settings.ragOptions.excludePatterns,
@@ -57,7 +56,23 @@ export class RAGEngine {
})
},
)
}
this.initialized = true
}
async updateFileIndex(file: TFile) {
await this.vectorManager.UpdateFileVectorIndex(
this.embeddingModel,
this.settings.ragOptions.chunkSize,
file,
)
}
async deleteFileIndex(file: TFile) {
await this.vectorManager.DeleteFileVectorIndex(
this.embeddingModel,
file,
)
}
async processQuery({
query,
@@ -78,13 +93,19 @@ export class RAGEngine {
if (!this.embeddingModel) {
throw new Error('Embedding model is not set')
}
// TODO: Decide the vault index update strategy.
// Current approach: Update on every query.
await this.updateVaultIndex({ reindexAll: false }, onQueryProgressChange)
if (!this.initialized) {
await this.updateVaultIndex({ reindexAll: false }, onQueryProgressChange)
}
const queryEmbedding = await this.getQueryEmbedding(query)
onQueryProgressChange?.({
type: 'querying',
})
})
console.log('query, ', {
minSimilarity: this.settings.ragOptions.minSimilarity,
limit: this.settings.ragOptions.limit,
scope,
})
const queryResult = await this.vectorManager.performSimilaritySearch(
queryEmbedding,
this.embeddingModel,
@@ -93,7 +114,8 @@ export class RAGEngine {
limit: this.settings.ragOptions.limit,
scope,
},
)
)
console.log('queryResult', queryResult)
onQueryProgressChange?.({
type: 'querying-done',
queryResult,

View File

@@ -1,38 +1,33 @@
import { PGlite } from '@electric-sql/pglite'
// @ts-expect-error
import { type PGliteWithLive, live } from '@electric-sql/pglite/live'
import { App, normalizePath } from 'obsidian'
import { type PGliteWithLive } from '@electric-sql/pglite/live'
import { App } from 'obsidian'
import { PGLITE_DB_PATH } from '../constants'
// import { PGLITE_DB_PATH } from '../constants'
import { createAndInitDb } from '../pgworker'
import { ConversationManager } from './modules/conversation/conversation-manager'
import { TemplateManager } from './modules/template/template-manager'
import { VectorManager } from './modules/vector/vector-manager'
import { pgliteResources } from './pglite-resources'
import { migrations } from './sql'
// import { pgliteResources } from './pglite-resources'
// import { migrations } from './sql'
export class DBManager {
private app: App
private dbPath: string
// private app: App
// private dbPath: string
private db: PGliteWithLive | null = null
// private db: PgliteDatabase | null = null
private vectorManager: VectorManager
private templateManager: TemplateManager
private conversationManager: ConversationManager
constructor(app: App, dbPath: string) {
constructor(app: App) {
this.app = app
this.dbPath = dbPath
// this.dbPath = dbPath
}
static async create(app: App): Promise<DBManager> {
const dbManager = new DBManager(app, normalizePath(PGLITE_DB_PATH))
await dbManager.loadExistingDatabase()
if (!dbManager.db) {
await dbManager.createNewDatabase()
}
await dbManager.migrateDatabase()
await dbManager.save()
const dbManager = new DBManager(app)
dbManager.db = await createAndInitDb()
dbManager.vectorManager = new VectorManager(app, dbManager)
dbManager.templateManager = new TemplateManager(app, dbManager)
@@ -57,81 +52,70 @@ export class DBManager {
return this.conversationManager
}
private async createNewDatabase() {
const { fsBundle, wasmModule, vectorExtensionBundlePath } =
await this.loadPGliteResources()
this.db = await PGlite.create({
fsBundle: fsBundle,
wasmModule: wasmModule,
extensions: {
vector: vectorExtensionBundlePath,
live,
},
})
}
// private async createNewDatabase() {
// const { fsBundle, wasmModule, vectorExtensionBundlePath } =
// await this.loadPGliteResources()
// this.db = await PGlite.create({
// fsBundle: fsBundle,
// wasmModule: wasmModule,
// extensions: {
// vector: vectorExtensionBundlePath,
// live,
// },
// })
// }
private async loadExistingDatabase() {
try {
const databaseFileExists = await this.app.vault.adapter.exists(
this.dbPath,
)
if (!databaseFileExists) {
return null
}
const fileBuffer = await this.app.vault.adapter.readBinary(this.dbPath)
const fileBlob = new Blob([fileBuffer], { type: 'application/x-gzip' })
const { fsBundle, wasmModule, vectorExtensionBundlePath } =
await this.loadPGliteResources()
this.db = await PGlite.create({
loadDataDir: fileBlob,
fsBundle: fsBundle,
wasmModule: wasmModule,
extensions: {
vector: vectorExtensionBundlePath,
live
},
})
// return drizzle(this.pgClient)
} catch (error) {
console.error('Error loading database:', error)
console.log(this.dbPath)
return null
}
}
// private async loadExistingDatabase() {
// try {
// const databaseFileExists = await this.app.vault.adapter.exists(
// this.dbPath,
// )
// if (!databaseFileExists) {
// return null
// }
// const fileBuffer = await this.app.vault.adapter.readBinary(this.dbPath)
// const fileBlob = new Blob([fileBuffer], { type: 'application/x-gzip' })
// const { fsBundle, wasmModule, vectorExtensionBundlePath } =
// await this.loadPGliteResources()
// this.db = await PGlite.create({
// loadDataDir: fileBlob,
// fsBundle: fsBundle,
// wasmModule: wasmModule,
// extensions: {
// vector: vectorExtensionBundlePath,
// live
// },
// })
// // return drizzle(this.pgClient)
// } catch (error) {
// console.error('Error loading database:', error)
// console.log(this.dbPath)
// return null
// }
// }
private async migrateDatabase(): Promise<void> {
if (!this.db) {
throw new Error('Database client not initialized');
}
// private async migrateDatabase(): Promise<void> {
// if (!this.db) {
// throw new Error('Database client not initialized');
// }
try {
// Execute SQL migrations
for (const [_key, migration] of Object.entries(migrations)) {
// Split SQL into individual commands and execute them one by one
const commands = migration.sql.split('\n\n').filter(cmd => cmd.trim());
for (const command of commands) {
await this.db.query(command);
}
}
} catch (error) {
console.error('Error executing SQL migrations:', error);
throw error;
}
}
// try {
// // Execute SQL migrations
// for (const [_key, migration] of Object.entries(migrations)) {
// // Split SQL into individual commands and execute them one by one
// const commands = migration.sql.split('\n\n').filter(cmd => cmd.trim());
// for (const command of commands) {
// await this.db.query(command);
// }
// }
// } catch (error) {
// console.error('Error executing SQL migrations:', error);
// throw error;
// }
// }
async save(): Promise<void> {
if (!this.db) {
return
}
try {
const blob: Blob = await this.db.dumpDataDir('gzip')
await this.app.vault.adapter.writeBinary(
this.dbPath,
Buffer.from(await blob.arrayBuffer()),
)
} catch (error) {
console.error('Error saving database:', error)
}
console.log("need remove")
}
async cleanup() {
@@ -139,37 +123,37 @@ export class DBManager {
this.db = null
}
private async loadPGliteResources(): Promise<{
fsBundle: Blob
wasmModule: WebAssembly.Module
vectorExtensionBundlePath: URL
}> {
try {
// Convert base64 to binary data
const wasmBinary = Buffer.from(pgliteResources.wasmBase64, 'base64')
const dataBinary = Buffer.from(pgliteResources.dataBase64, 'base64')
const vectorBinary = Buffer.from(pgliteResources.vectorBase64, 'base64')
// private async loadPGliteResources(): Promise<{
// fsBundle: Blob
// wasmModule: WebAssembly.Module
// vectorExtensionBundlePath: URL
// }> {
// try {
// // Convert base64 to binary data
// const wasmBinary = Buffer.from(pgliteResources.wasmBase64, 'base64')
// const dataBinary = Buffer.from(pgliteResources.dataBase64, 'base64')
// const vectorBinary = Buffer.from(pgliteResources.vectorBase64, 'base64')
// Create blobs from binary data
const fsBundle = new Blob([dataBinary], {
type: 'application/octet-stream',
})
const wasmModule = await WebAssembly.compile(wasmBinary)
// // Create blobs from binary data
// const fsBundle = new Blob([dataBinary], {
// type: 'application/octet-stream',
// })
// const wasmModule = await WebAssembly.compile(wasmBinary)
// Create a blob URL for the vector extension
const vectorBlob = new Blob([vectorBinary], {
type: 'application/gzip',
})
const vectorExtensionBundlePath = URL.createObjectURL(vectorBlob)
// // Create a blob URL for the vector extension
// const vectorBlob = new Blob([vectorBinary], {
// type: 'application/gzip',
// })
// const vectorExtensionBundlePath = URL.createObjectURL(vectorBlob)
return {
fsBundle,
wasmModule,
vectorExtensionBundlePath: new URL(vectorExtensionBundlePath),
}
} catch (error) {
console.error('Error loading PGlite resources:', error)
throw error
}
}
// return {
// fsBundle,
// wasmModule,
// vectorExtensionBundlePath: new URL(vectorExtensionBundlePath),
// }
// } catch (error) {
// console.error('Error loading PGlite resources:', error)
// throw error
// }
// }
}

View File

@@ -30,7 +30,6 @@ export class ConversationManager {
updatedAt: new Date(),
}
await this.repository.create(conversation)
await this.dbManager.save()
}
async saveConversation(id: string, messages: ChatMessage[]): Promise<void> {
@@ -59,7 +58,6 @@ export class ConversationManager {
// Update conversation timestamp
await this.repository.update(id, { updatedAt: new Date() })
await this.dbManager.save()
}
async findConversation(id: string): Promise<ChatMessage[] | null> {
@@ -74,7 +72,6 @@ export class ConversationManager {
async deleteConversation(id: string): Promise<void> {
await this.repository.delete(id)
await this.dbManager.save()
}
getAllConversations(callback: (conversations: ChatConversationMeta[]) => void): void {
@@ -92,7 +89,6 @@ export class ConversationManager {
async updateConversationTitle(id: string, title: string): Promise<void> {
await this.repository.update(id, { title })
await this.dbManager.save()
}
// convert ChatMessage to InsertMessage

View File

@@ -24,7 +24,6 @@ export class TemplateManager {
throw new DuplicateTemplateException(template.name)
}
const created = await this.repository.create(template)
await this.dbManager.save()
return created
}
@@ -45,7 +44,6 @@ export class TemplateManager {
async deleteTemplate(id: string): Promise<boolean> {
const deleted = await this.repository.delete(id)
await this.dbManager.save()
return deleted
}
}

View File

@@ -6,10 +6,10 @@ import pLimit from 'p-limit'
import { IndexProgress } from '../../../components/chat-view/QueryProgress'
import {
LLMAPIKeyInvalidException,
LLMAPIKeyNotSetException,
LLMBaseUrlNotSetException,
LLMRateLimitExceededException,
LLMAPIKeyInvalidException,
LLMAPIKeyNotSetException,
LLMBaseUrlNotSetException,
LLMRateLimitExceededException,
} from '../../../core/llm/exception'
import { InsertVector, SelectVector } from '../../../database/schema'
import { EmbeddingModel } from '../../../types/embedding'
@@ -19,260 +19,353 @@ import { DBManager } from '../../database-manager'
import { VectorRepository } from './vector-repository'
export class VectorManager {
private app: App
private repository: VectorRepository
private dbManager: DBManager
private app: App
private repository: VectorRepository
private dbManager: DBManager
constructor(app: App, dbManager: DBManager) {
this.app = app
this.dbManager = dbManager
this.repository = new VectorRepository(app, dbManager.getPgClient())
}
constructor(app: App, dbManager: DBManager) {
this.app = app
this.dbManager = dbManager
this.repository = new VectorRepository(app, dbManager.getPgClient())
}
async performSimilaritySearch(
queryVector: number[],
embeddingModel: EmbeddingModel,
options: {
minSimilarity: number
limit: number
scope?: {
files: string[]
folders: string[]
}
},
): Promise<
(Omit<SelectVector, 'embedding'> & {
similarity: number
})[]
> {
return await this.repository.performSimilaritySearch(
queryVector,
embeddingModel,
options,
)
}
async performSimilaritySearch(
queryVector: number[],
embeddingModel: EmbeddingModel,
options: {
minSimilarity: number
limit: number
scope?: {
files: string[]
folders: string[]
}
},
): Promise<
(Omit<SelectVector, 'embedding'> & {
similarity: number
})[]
> {
return await this.repository.performSimilaritySearch(
queryVector,
embeddingModel,
options,
)
}
async updateVaultIndex(
embeddingModel: EmbeddingModel,
options: {
chunkSize: number
excludePatterns: string[]
includePatterns: string[]
reindexAll?: boolean
},
updateProgress?: (indexProgress: IndexProgress) => void,
): Promise<void> {
let filesToIndex: TFile[]
if (options.reindexAll) {
filesToIndex = await this.getFilesToIndex({
embeddingModel: embeddingModel,
excludePatterns: options.excludePatterns,
includePatterns: options.includePatterns,
reindexAll: true,
})
await this.repository.clearAllVectors(embeddingModel)
} else {
await this.deleteVectorsForDeletedFiles(embeddingModel)
filesToIndex = await this.getFilesToIndex({
embeddingModel: embeddingModel,
excludePatterns: options.excludePatterns,
includePatterns: options.includePatterns,
})
await this.repository.deleteVectorsForMultipleFiles(
filesToIndex.map((file) => file.path),
embeddingModel,
)
}
async updateVaultIndex(
embeddingModel: EmbeddingModel,
options: {
chunkSize: number
excludePatterns: string[]
includePatterns: string[]
reindexAll?: boolean
},
updateProgress?: (indexProgress: IndexProgress) => void,
): Promise<void> {
let filesToIndex: TFile[]
if (options.reindexAll) {
filesToIndex = await this.getFilesToIndex({
embeddingModel: embeddingModel,
excludePatterns: options.excludePatterns,
includePatterns: options.includePatterns,
reindexAll: true,
})
await this.repository.clearAllVectors(embeddingModel)
} else {
await this.cleanVectorsForDeletedFiles(embeddingModel)
filesToIndex = await this.getFilesToIndex({
embeddingModel: embeddingModel,
excludePatterns: options.excludePatterns,
includePatterns: options.includePatterns,
})
await this.repository.deleteVectorsForMultipleFiles(
filesToIndex.map((file) => file.path),
embeddingModel,
)
}
if (filesToIndex.length === 0) {
return
}
if (filesToIndex.length === 0) {
return
}
const textSplitter = RecursiveCharacterTextSplitter.fromLanguage(
'markdown',
{
chunkSize: options.chunkSize,
// TODO: Use token-based chunking after migrating to WebAssembly-based tiktoken
// Current token counting method is too slow for practical use
// lengthFunction: async (text) => {
// return await tokenCount(text)
// },
},
)
const textSplitter = RecursiveCharacterTextSplitter.fromLanguage(
'markdown',
{
chunkSize: options.chunkSize,
// TODO: Use token-based chunking after migrating to WebAssembly-based tiktoken
// Current token counting method is too slow for practical use
// lengthFunction: async (text) => {
// return await tokenCount(text)
// },
},
)
const contentChunks: InsertVector[] = (
await Promise.all(
filesToIndex.map(async (file) => {
const fileContent = await this.app.vault.cachedRead(file)
const fileDocuments = await textSplitter.createDocuments([
fileContent,
])
return fileDocuments.map((chunk): InsertVector => {
return {
path: file.path,
mtime: file.stat.mtime,
const contentChunks: InsertVector[] = (
await Promise.all(
filesToIndex.map(async (file) => {
const fileContent = await this.app.vault.cachedRead(file)
const fileDocuments = await textSplitter.createDocuments([
fileContent,
])
return fileDocuments.map((chunk): InsertVector => {
return {
path: file.path,
mtime: file.stat.mtime,
content: chunk.pageContent,
embedding: [],
metadata: {
startLine: chunk.metadata.loc.lines.from as number,
endLine: chunk.metadata.loc.lines.to as number,
},
}
})
}),
)
).flat()
metadata: {
startLine: Number(chunk.metadata.loc.lines.from),
endLine: Number(chunk.metadata.loc.lines.to),
},
}
})
}),
)
).flat()
updateProgress?.({
completedChunks: 0,
totalChunks: contentChunks.length,
totalFiles: filesToIndex.length,
})
updateProgress?.({
completedChunks: 0,
totalChunks: contentChunks.length,
totalFiles: filesToIndex.length,
})
const embeddingProgress = { completed: 0, inserted: 0 }
const embeddingChunks: InsertVector[] = []
const batchSize = 100
const limit = pLimit(50)
const abortController = new AbortController()
const tasks = contentChunks.map((chunk) =>
limit(async () => {
if (abortController.signal.aborted) {
throw new Error('Operation was aborted')
}
try {
await backOff(
async () => {
const embedding = await embeddingModel.getEmbedding(chunk.content)
const embeddedChunk = {
path: chunk.path,
mtime: chunk.mtime,
content: chunk.content,
embedding,
metadata: chunk.metadata,
}
embeddingChunks.push(embeddedChunk)
embeddingProgress.completed++
updateProgress?.({
completedChunks: embeddingProgress.completed,
totalChunks: contentChunks.length,
totalFiles: filesToIndex.length,
})
const embeddingProgress = { completed: 0 }
const embeddingChunks: InsertVector[] = []
const batchSize = 100
const limit = pLimit(50)
const abortController = new AbortController()
const tasks = contentChunks.map((chunk) =>
limit(async () => {
if (abortController.signal.aborted) {
throw new Error('Operation was aborted')
}
try {
await backOff(
async () => {
const embedding = await embeddingModel.getEmbedding(chunk.content)
const embeddedChunk = {
path: chunk.path,
mtime: chunk.mtime,
content: chunk.content,
embedding,
metadata: chunk.metadata,
}
embeddingChunks.push(embeddedChunk)
embeddingProgress.completed++
updateProgress?.({
completedChunks: embeddingProgress.completed,
totalChunks: contentChunks.length,
totalFiles: filesToIndex.length,
})
},
{
numOfAttempts: 5,
startingDelay: 1000,
timeMultiple: 1.5,
jitter: 'full',
},
)
} catch (error) {
abortController.abort()
throw error
}
}),
)
// Insert vectors in batches
if (
embeddingChunks.length >=
embeddingProgress.inserted + batchSize ||
embeddingChunks.length === contentChunks.length
) {
await this.repository.insertVectors(
embeddingChunks.slice(
embeddingProgress.inserted,
embeddingProgress.inserted + batchSize,
),
embeddingModel,
)
embeddingProgress.inserted += batchSize
}
},
{
numOfAttempts: 5,
startingDelay: 1000,
timeMultiple: 1.5,
jitter: 'full',
},
)
} catch (error) {
abortController.abort()
throw error
}
}),
)
try {
await Promise.all(tasks)
try {
await Promise.all(tasks)
} catch (error) {
if (
error instanceof LLMAPIKeyNotSetException ||
error instanceof LLMAPIKeyInvalidException ||
error instanceof LLMBaseUrlNotSetException
) {
openSettingsModalWithError(this.app, (error as Error).message)
} else if (error instanceof LLMRateLimitExceededException) {
new Notice(error.message)
} else {
console.error('Error embedding chunks:', error)
throw error
}
} finally {
await this.dbManager.save()
}
}
// all embedding generated, batch insert
if (embeddingChunks.length > 0) {
// batch insert all vectors
let inserted = 0
while (inserted < embeddingChunks.length) {
const chunksToInsert = embeddingChunks.slice(
inserted,
Math.min(inserted + batchSize, embeddingChunks.length)
)
await this.repository.insertVectors(chunksToInsert, embeddingModel)
inserted += chunksToInsert.length
}
}
} catch (error) {
if (
error instanceof LLMAPIKeyNotSetException ||
error instanceof LLMAPIKeyInvalidException ||
error instanceof LLMBaseUrlNotSetException
) {
openSettingsModalWithError(this.app, error.message)
} else if (error instanceof LLMRateLimitExceededException) {
new Notice(error.message)
} else {
console.error('Error embedding chunks:', error)
throw error
}
}
}
private async deleteVectorsForDeletedFiles(embeddingModel: EmbeddingModel) {
const indexedFilePaths =
await this.repository.getIndexedFilePaths(embeddingModel)
for (const filePath of indexedFilePaths) {
if (!this.app.vault.getAbstractFileByPath(filePath)) {
await this.repository.deleteVectorsForMultipleFiles(
[filePath],
embeddingModel,
)
}
}
}
async UpdateFileVectorIndex(
embeddingModel: EmbeddingModel,
chunkSize: number,
file: TFile
) {
private async getFilesToIndex({
embeddingModel,
excludePatterns,
includePatterns,
reindexAll,
}: {
embeddingModel: EmbeddingModel
excludePatterns: string[]
includePatterns: string[]
reindexAll?: boolean
}): Promise<TFile[]> {
let filesToIndex = this.app.vault.getMarkdownFiles()
// Delete existing vectors for the files
await this.repository.deleteVectorsForSingleFile(
file.path,
embeddingModel,
)
filesToIndex = filesToIndex.filter((file) => {
return !excludePatterns.some((pattern) => minimatch(file.path, pattern))
})
// Embed the files
const textSplitter = RecursiveCharacterTextSplitter.fromLanguage(
'markdown',
{
chunkSize,
},
)
const fileContent = await this.app.vault.cachedRead(file)
const fileDocuments = await textSplitter.createDocuments([
fileContent,
])
if (includePatterns.length > 0) {
filesToIndex = filesToIndex.filter((file) => {
return includePatterns.some((pattern) => minimatch(file.path, pattern))
})
}
const contentChunks: InsertVector[] = fileDocuments.map((chunk): InsertVector => {
return {
path: file.path,
mtime: file.stat.mtime,
content: chunk.pageContent,
embedding: [],
metadata: {
startLine: Number(chunk.metadata.loc.lines.from),
endLine: Number(chunk.metadata.loc.lines.to),
},
}
})
if (reindexAll) {
return filesToIndex
}
const embeddingChunks: InsertVector[] = []
const limit = pLimit(50)
const abortController = new AbortController()
const tasks = contentChunks.map((chunk) =>
limit(async () => {
if (abortController.signal.aborted) {
throw new Error('Operation was aborted')
}
try {
await backOff(
async () => {
const embedding = await embeddingModel.getEmbedding(chunk.content)
const embeddedChunk = {
path: chunk.path,
mtime: chunk.mtime,
content: chunk.content,
embedding,
metadata: chunk.metadata,
}
embeddingChunks.push(embeddedChunk)
},
{
numOfAttempts: 5,
startingDelay: 1000,
timeMultiple: 1.5,
jitter: 'full',
},
)
} catch (error) {
abortController.abort()
throw error
}
}),
)
// Check for updated or new files
filesToIndex = await Promise.all(
filesToIndex.map(async (file) => {
const fileChunks = await this.repository.getVectorsByFilePath(
file.path,
embeddingModel,
)
if (fileChunks.length === 0) {
// File is not indexed, so we need to index it
const fileContent = await this.app.vault.cachedRead(file)
if (fileContent.length === 0) {
// Ignore empty files
return null
}
return file
}
const outOfDate = file.stat.mtime > fileChunks[0].mtime
if (outOfDate) {
// File has changed, so we need to re-index it
return file
}
return null
}),
).then((files) => files.filter(Boolean))
try {
await Promise.all(tasks)
return filesToIndex
}
// all embedding generated, batch insert
if (embeddingChunks.length > 0) {
const batchSize = 100
let inserted = 0
while (inserted < embeddingChunks.length) {
const chunksToInsert = embeddingChunks.slice(inserted, Math.min(inserted + batchSize, embeddingChunks.length))
await this.repository.insertVectors(chunksToInsert, embeddingModel)
inserted += chunksToInsert.length
}
}
} catch (error) {
console.error('Error embedding chunks:', error)
}
}
async DeleteFileVectorIndex(
embeddingModel: EmbeddingModel,
file: TFile
) {
await this.repository.deleteVectorsForSingleFile(file.path, embeddingModel)
}
private async cleanVectorsForDeletedFiles(
embeddingModel: EmbeddingModel,
) {
const indexedFilePaths = await this.repository.getAllIndexedFilePaths(embeddingModel)
const needToDelete = indexedFilePaths.filter(filePath => !this.app.vault.getAbstractFileByPath(filePath))
if (needToDelete.length > 0) {
await this.repository.deleteVectorsForMultipleFiles(
needToDelete,
embeddingModel,
)
}
}
private async getFilesToIndex({
embeddingModel,
excludePatterns,
includePatterns,
reindexAll,
}: {
embeddingModel: EmbeddingModel
excludePatterns: string[]
includePatterns: string[]
reindexAll?: boolean
}): Promise<TFile[]> {
let filesToIndex = this.app.vault.getMarkdownFiles()
filesToIndex = filesToIndex.filter((file) => {
return !excludePatterns.some((pattern) => minimatch(file.path, pattern))
})
if (includePatterns.length > 0) {
filesToIndex = filesToIndex.filter((file) => {
return includePatterns.some((pattern) => minimatch(file.path, pattern))
})
}
if (reindexAll) {
return filesToIndex
}
// Check for updated or new files
filesToIndex = await Promise.all(
filesToIndex.map(async (file) => {
const fileChunks = await this.repository.getVectorsByFilePath(
file.path,
embeddingModel,
)
if (fileChunks.length === 0) {
// File is not indexed, so we need to index it
const fileContent = await this.app.vault.cachedRead(file)
if (fileContent.length === 0) {
// Ignore empty files
return null
}
return file
}
const outOfDate = file.stat.mtime > fileChunks[0].mtime
if (outOfDate) {
// File has changed, so we need to re-index it
return file
}
return null
}),
).then((files) => files.filter(Boolean))
return filesToIndex
}
}

View File

@@ -22,7 +22,7 @@ export class VectorRepository {
return tableDefinition.name
}
async getIndexedFilePaths(embeddingModel: EmbeddingModel): Promise<string[]> {
async getAllIndexedFilePaths(embeddingModel: EmbeddingModel): Promise<string[]> {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
@@ -80,7 +80,7 @@ export class VectorRepository {
if (!this.db) {
throw new DatabaseNotInitializedException()
}
const tableName = this.getTableName(embeddingModel)
const tableName = this.getTableName(embeddingModel)
await this.db.query(`DELETE FROM "${tableName}"`)
}
@@ -160,7 +160,11 @@ export class VectorRepository {
if (conditions.length > 0) {
scopeCondition = `AND (${conditions.join(' OR ')})`
}
}
}
const queryVectorLength = `SELECT count(1) FROM "${tableName}"`;
const queryVectorLengthResult = await this.db.query(queryVectorLength)
console.log('queryVectorLengthResult, ', queryVectorLengthResult)
const query = `
SELECT

View File

@@ -1,5 +1,6 @@
// @ts-nocheck
import { EditorView } from '@codemirror/view'
// import { PGlite } from '@electric-sql/pglite'
import { Editor, MarkdownView, Notice, Plugin, TFile } from 'obsidian'
import { ApplyView } from './ApplyView'
@@ -25,8 +26,8 @@ import {
InfioSettings,
parseInfioSettings,
} from './types/settings'
import './utils/path'
import { getMentionableBlockData } from './utils/obsidian'
import './utils/path'
// Remember to rename these classes and interfaces!
export default class InfioPlugin extends Plugin {
@@ -41,7 +42,7 @@ export default class InfioPlugin extends Plugin {
inlineEdit: InlineEdit | null = null
private dbManagerInitPromise: Promise<DBManager> | null = null
private ragEngineInitPromise: Promise<RAGEngine> | null = null
// private pg: PGlite | null = null
async onload() {
await this.loadSettings()
@@ -49,6 +50,9 @@ export default class InfioPlugin extends Plugin {
this.settingTab = new InfioSettingTab(this.app, this)
this.addSettingTab(this.settingTab)
// create and init pglite db
// this.pg = await createAndInitDb()
// This creates an icon in the left ribbon.
this.addRibbonIcon('wand-sparkles', 'Open infio copilot', () =>
this.openChatView(),
@@ -120,6 +124,17 @@ export default class InfioPlugin extends Plugin {
this.app.metadataCache.on("changed", (file: TFile) => {
if (file) {
eventListener.handleFileChange(file);
console.log("file changed: filename: ", file.name);
this.ragEngine?.updateFileIndex(file);
}
})
);
this.registerEvent(
this.app.metadataCache.on("deleted", (file: TFile) => {
if (file) {
console.log("file deleted: filename: ", file.name)
this.ragEngine?.deleteFileIndex(file);
}
})
);
@@ -322,7 +337,7 @@ export default class InfioPlugin extends Plugin {
}
onunload() {
this.dbManager?.cleanup()
// this.dbManager?.cleanup()
this.dbManager = null
}

19
src/pgworker/index.ts Normal file
View File

@@ -0,0 +1,19 @@
import { live } from '@electric-sql/pglite/live';
import { PGliteWorker } from '@electric-sql/pglite/worker';
import PGWorker from './pglite.worker';
export const createAndInitDb = async () => {
const worker = new PGWorker();
const pg = await PGliteWorker.create(
worker,
{
extensions: {
live,
},
},
)
console.log('PGlite DB created')
return pg
}

View File

@@ -0,0 +1,78 @@
// @ts-nocheck
import { PGlite } from '@electric-sql/pglite'
import { PGliteWorkerOptions, worker } from '@electric-sql/pglite/worker'
import { pgliteResources } from '../database/pglite-resources'
import { migrations } from '../database/sql'
export { }
const loadPGliteResources = async (): Promise<{
fsBundle: Blob
wasmModule: WebAssembly.Module
vectorExtensionBundlePath: URL
}> => {
try {
// Convert base64 to binary data
const wasmBinary = Buffer.from(pgliteResources.wasmBase64, 'base64')
const dataBinary = Buffer.from(pgliteResources.dataBase64, 'base64')
const vectorBinary = Buffer.from(pgliteResources.vectorBase64, 'base64')
// Create blobs from binary data
const fsBundle = new Blob([dataBinary], {
type: 'application/octet-stream',
})
const wasmModule = await WebAssembly.compile(wasmBinary)
// Create a blob URL for the vector extension
const vectorBlob = new Blob([vectorBinary], {
type: 'application/gzip',
})
const vectorExtensionBundlePath = URL.createObjectURL(vectorBlob)
return {
fsBundle,
wasmModule,
vectorExtensionBundlePath: new URL(vectorExtensionBundlePath),
}
} catch (error) {
console.error('Error loading PGlite resources:', error)
throw error
}
}
worker({
async init(options: PGliteWorkerOptions) {
let db: PGlite;
try {
const { fsBundle, wasmModule, vectorExtensionBundlePath } =
await loadPGliteResources()
db = await PGlite.create('idb://infio-db', {
relaxedDurability: true,
fsBundle: fsBundle,
wasmModule: wasmModule,
...options,
extensions: {
...options.extensions,
vector: vectorExtensionBundlePath,
},
})
} catch (error) {
console.error('Error creating PGlite instance:', error)
throw error
}
// Execute SQL migrations
for (const [_key, migration] of Object.entries(migrations)) {
// Split SQL into individual commands and execute them one by one
const commands = migration.sql.split('\n\n').filter(cmd => cmd.trim());
for (const command of commands) {
await db.exec(command);
}
}
return db
},
})

4
src/pgworker/worker.d.ts vendored Normal file
View File

@@ -0,0 +1,4 @@
declare module 'pglite.worker' {
const WorkerFactory: new () => Worker;
export default WorkerFactory;
}

View File

@@ -234,7 +234,7 @@ export const ComboBoxComponent: React.FC<ComboBoxComponentProps> = ({
<Popover.Root modal={false} open={isOpen} onOpenChange={setIsOpen}>
<Popover.Trigger asChild>
<div className="infio-llm-setting-item-control">
<span className="infio-llm-setting-model-id">[{modelProvider}]{modelId}</span>
<span className="infio-llm-setting-model-id">[{modelProvider}]&nbsp;{modelId}</span>
</div>
</Popover.Trigger>
<Popover.Content

View File

@@ -1,7 +1,5 @@
import { LLMModel } from './llm/model'
import { EmbeddingModelInfo } from '../utils/api'
export type EmbeddingModelId =
| 'text-embedding-3-small'
| 'text-embedding-004'

View File

@@ -10,6 +10,7 @@ export enum ApiProvider {
Groq = "Groq",
Ollama = "Ollama",
OpenAICompatible = "OpenAICompatible",
TransformersJs = "TransformersJs",
}
export type LLMModel = {

View File

@@ -29,6 +29,7 @@ export type ParsedMsgBlock =
} | {
type: 'attempt_completion'
result: string
finish: boolean
} | {
type: 'search_and_replace'
path: string
@@ -44,7 +45,8 @@ export type ParsedMsgBlock =
finish: boolean
} | {
type: 'ask_followup_question'
question: string
question: string,
finish: boolean
} | {
type: 'list_files'
path: string
@@ -402,6 +404,7 @@ export function parseMsgBlocks(
parsedResult.push({
type: 'attempt_completion',
result,
finish: node.sourceCodeLocation.endTag !== undefined
})
lastEndOffset = endOffset
} else if (node.nodeName === 'ask_followup_question') {
@@ -425,6 +428,7 @@ export function parseMsgBlocks(
parsedResult.push({
type: 'ask_followup_question',
question,
finish: node.sourceCodeLocation.endTag !== undefined
})
lastEndOffset = endOffset
} else if (node.nodeName === 'switch_mode') {

View File

@@ -372,7 +372,7 @@ export class PromptGenerator {
? await getFileOrFolderContent(currentFile.file, this.app.vault)
: undefined
const currentFileContentPrompt = isNewChat && currentFileContent
const currentFileContentPrompt = isNewChat && currentFileContent && this.settings.mode !== 'research'
? `<current_file_content path="${currentFile.file.path}">\n${currentFileContent}\n</current_file_content>`
: undefined

View File

@@ -20,8 +20,6 @@ export async function webSearch(query: string, serperApiKey: string): Promise<st
return new Promise((resolve, reject) => {
const url = `${SERPER_BASE_URL}?q=${encodeURIComponent(query)}&engine=google&api_key=${serperApiKey}&num=20`;
console.log(url)
https.get(url, (res: any) => {
let data = '';
@@ -31,7 +29,6 @@ export async function webSearch(query: string, serperApiKey: string): Promise<st
res.on('end', () => {
try {
console.log(data)
let parsedData: SearchResponse;
try {
parsedData = JSON.parse(data);
@@ -90,8 +87,6 @@ export async function fetchUrlsContent(urls: string[], apiKey: string): Promise<
}
});
console.log('fetchUrlsContent', results);
Promise.all(results).then((texts) => {
resolve(texts.join('\n\n'));
}).catch((error) => {
@@ -123,8 +118,6 @@ function fetchJina(url: string, apiKey: string): Promise<string> {
});
res.on('end', () => {
console.log(data);
try {
// check if there is an error response
const response = JSON.parse(data);