This commit is contained in:
duanfuxiang
2025-01-05 11:51:39 +08:00
commit 0c7ee142cb
215 changed files with 20611 additions and 0 deletions

View File

@@ -0,0 +1,11 @@
export type { FewShotExample } from "./shared";
export { DEFAULT_AUTOCOMPLETE_SETTINGS as DEFAULT_SETTINGS, settingsSchema, pluginDataSchema } from "./v1/v1";
export type { Settings, Trigger, PluginData } from "./v1/v1";
export {
MAX_DELAY, MAX_FREQUENCY_PENALTY,
MAX_MAX_CHAR_LIMIT,
MAX_MAX_TOKENS, MAX_PRESENCE_PENALTY, MAX_TEMPERATURE, MAX_TOP_P,
MIN_DELAY, MIN_FREQUENCY_PENALTY,
MIN_MAX_CHAR_LIMIT,
MIN_MAX_TOKENS, MIN_PRESENCE_PENALTY, MIN_TEMPERATURE, MIN_TOP_P
} from "./shared";

View File

@@ -0,0 +1,73 @@
import { cloneDeep, get, has, set } from "lodash";
import { findEqualPaths, isRegexValid } from "../../utils/auto-complete";
import {
DEFAULT_SETTINGS as DEFAULT_SETTINGS_V0,
Settings as SettingsV0,
Trigger,
settingsSchema as settingsSchemaV0
} from "./v0/v0";
import {
DEFAULT_AUTOCOMPLETE_SETTINGS as DEFAULT_SETTINGS_V1,
Settings as SettingsV1,
settingsSchema as settingsSchemaV1
} from "./v1/v1";
export function migrateFromV0ToV1(settings: SettingsV0): SettingsV1 {
// eslint-disable @typescript-eslint/no-explicit-any
const updatedSettings: any = cloneDeep(settings);
migrateDefaultSettings(updatedSettings, DEFAULT_SETTINGS_V0, DEFAULT_SETTINGS_V1);
updatedSettings.triggers.forEach((trigger: Trigger) => {
// Check if the trigger type is 'regex' and if its value does not end with '$'
if (trigger.type === 'regex' && !trigger.value.endsWith('$')) {
// Append '$' to the trigger value
trigger.value += '$';
}
});
updatedSettings.triggers = updatedSettings
.triggers
.filter((trigger: Trigger) => trigger.value.length > 0)
.filter((trigger: Trigger) => trigger.type !== 'regex' || isRegexValid(trigger.value));
// Add the 'version' property with the value '1'
updatedSettings.version = '1';
if (!isRegexValid(updatedSettings.chainOfThoughRemovalRegex)) {
updatedSettings.chainOfThoughRemovalRegex = DEFAULT_SETTINGS_V1.chainOfThoughRemovalRegex;
}
updatedSettings.ignoredFilePatterns = DEFAULT_SETTINGS_V1.ignoredFilePatterns;
updatedSettings.ignoredTags = DEFAULT_SETTINGS_V1.ignoredTags;
updatedSettings.cacheSuggestions = DEFAULT_SETTINGS_V1.cacheSuggestions;
updatedSettings.ollamaApiSettings = DEFAULT_SETTINGS_V1.ollamaApiSettings;
updatedSettings.debugMode = DEFAULT_SETTINGS_V1.debugMode;
// Parsing the updated settings to ensure they match the SettingsV1 schema
return settingsSchemaV1.parse(updatedSettings);
}
function migrateDefaultSettings(setting: any, previousDefault: any, currentDefault: any): any {
const unchangedDefaultProperties = findEqualPaths(setting, previousDefault);
for (const path of unchangedDefaultProperties) {
if (has(currentDefault, path)) {
const newDefaultValue = get(currentDefault, path);
set(setting, path, newDefaultValue);
}
}
}
export const isSettingsV0 = (settings: object): boolean => {
const result = settingsSchemaV0.safeParse(settings);
return result.success;
}
export const isSettingsV1 = (settings: object): boolean => {
const result = settingsSchemaV1.safeParse(settings);
return result.success;
}

View File

@@ -0,0 +1,57 @@
import { z } from "zod";
export const MIN_DELAY = 0;
export const MAX_DELAY = 2000;
export const MIN_MAX_CHAR_LIMIT = 100;
export const MAX_MAX_CHAR_LIMIT = 10000;
export const MIN_MAX_TOKENS = 128;
export const MAX_MAX_TOKENS = 8192;
export const MIN_TEMPERATURE = 0;
export const MAX_TEMPERATURE = 1;
export const MIN_TOP_P = 0;
export const MAX_TOP_P = 1;
export const MIN_FREQUENCY_PENALTY = 0;
export const MAX_FREQUENCY_PENALTY = 2;
export const MIN_PRESENCE_PENALTY = 0;
export const MAX_PRESENCE_PENALTY = 2;
export const azureOAIApiSettingsSchema = z.object({
key: z.string(),
url: z.string().url().or(z.string().max(0)),
}).strict();
export const openAIApiSettingsSchema = z.object({
key: z.string(),
url: z.string().url(),
model: z.string(),
}).strict();
export const ollamaApiSettingsSchema = z.object({
url: z.string().url(),
model: z.string(),
}).strict();
export const modelOptionsSchema = z.object({
temperature: z.number()
.min(0, { message: `Temperature must be at least ${MIN_TEMPERATURE}` })
.max(1, { message: `Temperature must be at most ${MAX_TEMPERATURE}` }),
top_p: z.number()
.min(0, { message: `top_p must be greater than ${MIN_TOP_P}` })
.max(1, { message: `top_p must be at most ${MAX_TOP_P}` }),
frequency_penalty: z.number()
.min(0, { message: `Frequency penalty must be at least ${MIN_FREQUENCY_PENALTY}` })
.max(2, { message: `Frequency penalty must be at most ${MAX_FREQUENCY_PENALTY}` }),
presence_penalty: z.number().min(MIN_PRESENCE_PENALTY, { message: `Presence penalty must be at least ${MIN_PRESENCE_PENALTY}` }).max(MAX_PRESENCE_PENALTY, { message: `Presence penalty must be at most ${MAX_PRESENCE_PENALTY}` }),
max_tokens: z.number().int()
.min(MIN_MAX_TOKENS, { message: `max_tokens must be at least than ${MIN_MAX_TOKENS}` }).max(MAX_MAX_TOKENS, { message: `max_tokens must be at most ${MAX_MAX_TOKENS}` }),
}).strict();
export const fewShotExampleSchema = z.object({
// TODO: figure out how to make this compatible with the context enum and its namespace.
context: z.enum(["Text", "Heading", "BlockQuotes", "UnorderedList", "NumberedList", "CodeBlock", "MathBlock", "TaskList"]),
input: z.string().min(3, { message: "The Input must be at least 3 characters long" }),
answer: z.string().min(3, { message: "The Answer must be at least 3 characters long" }),
}).strict();
export type FewShotExample = z.infer<typeof fewShotExampleSchema>;

View File

@@ -0,0 +1,18 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../shared";
const example: FewShotExample = {
context: Context.BlockQuotes,
input: `# Matthew effect
The Matthew effect is named after the following New Testament verse:
> <mask/>
According to Malcolm Gladwell, the Matthew effect means that more successful people are most likely given special attention and opportunities leading to further success. `,
answer: `THOUGHT: The <mask/> is located inside a Markdown quote block, so I should write a quote. The text after <mask/> is about the Matthew effect. The text before <mask/> say the quote is from the New Testament. So my answer should contain something from the New Testament that is about the Matthew effect.
ANSWER: For unto everyone that hath shall be given, and he shall have abundance.
> But from him that hath not shall taken away even that which he hath.
> Matthew 25:29`,
};
export default example;

View File

@@ -0,0 +1,23 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../shared";
const messages: FewShotExample = {
context: Context.CodeBlock,
input: `# debounce
A debounce function makes sure that a function is only triggered once per user input. This is useful for event based triggers. You can implement in javascript like this:
\`\`\`javascript
function debounce(func, timeout = 300){
<mask/>
}
\`\`\`
`,
answer: `THOUGHT: The <mask/> is located in JavaScript code block. The text before the <mask/> is describes what a debounce function does, and it defines the function signature. So the answer should not include the function signature to avoid duplication. The <mask/> is inside this function, so the answer should finish the implementation of the function. There is some indentation before the <mask/>, so the answer should be indented as well.
ANSWER:let timer;
return (...args) => {
clearTimeout(timer);
timer = setTimeout(() => { func.apply(this, args); }, timeout);
};`,
};
export default messages;

View File

@@ -0,0 +1,19 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../shared";
const messages: FewShotExample = {
context: Context.CodeBlock,
input: `\`\`\`python
def fibonacci(<mask/>) -> int:
\tif n == 0 or n == 1:
\t\treturn n
\telse:
\t\treturn fibonacci(n-1) + fibonacci(n-2)
\`\`\`
`,
answer: `THOUGHT: The <mask/> is located in Python code block. Based on the location of </mask> I should write the function's arguments and types. Based on the text after </mask>, I see that the function uses a variable n with type int, which is not yet defined.
ANSWER: n: int`,
};
export default messages;

View File

@@ -0,0 +1,16 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../shared";
const example: FewShotExample = {
context: Context.Heading,
input: `# <mask/>
A neuron is considered dead if it does not activate for any of the training instance in the training dataset. Because it never activates it will never have a gradient due to the chain rule so it also cannot change anymore. The dead ReLU problem can have due to a wide variety of reasons, such as:
1. Poorly initialized weights.
2. Extremely high learning rates during training.
`,
answer: `THOUGHT: <mask/> is located inside a Markdown headings, so I should write a title. There is no text in the header yet. The text after <mask/> is about the dead ReLU problem, so the title should reflect this.
ANSWER: The dead ReLU problem`,
};
export default example;

View File

@@ -0,0 +1,13 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../shared";
const example: FewShotExample = {
context: Context.Heading,
input: `# The Softmax <mask/>
The softmax function transforms a vector into a probability distribution such that the sum of the vector is equal to 1.`,
answer: `THOUGHT: <mask/> is located inside a Markdown headings. The header already contains the text "The Softmax" contains so my answer should be coherent with that. The text after <mask/> is about the softmax function, so the title should reflect this.
ANSWER: function`,
};
export default example;

View File

@@ -0,0 +1,15 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../shared";
const messages: FewShotExample = {
context: Context.MathBlock,
input: String.raw`# Logarithm definition
A logarithm is the power to which a base must be raised to yield a given number. For example $2^3 =8$; therefore, 3 is the logarithm of 8 to base 2, or in other words $<mask/>$.3 =
`,
answer: String.raw`THOUGHT: The <mask/> is located inline math block. The text before the mask is about logarithm. The text is giving an example but the math notation still needs to be completed. So my answer should be the latex formula for this example.
ANSWER: 3 = \log_2(8)`,
};
export default messages;

View File

@@ -0,0 +1,19 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../shared";
const messages: FewShotExample = {
context: Context.MathBlock,
input: String.raw`# Sample mean
The sample mean, or sometime called average, is defined as:
$$
sample\_mean(x) = <mask/>
$$
The average value has the property that 50% of the weighted* value will be above and below it. This weighted property can make it more sensitive to outliers than the median.
`,
answer: String.raw`THOUGHT: The <mask/> is located in math block. Based on the text before and after the mask my answer should be the latex formula for the sample mean.
ANSWER: \frac{1}{n} \sum_i^n x_i`,
};
export default messages;

View File

@@ -0,0 +1,20 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../shared";
const example: FewShotExample = {
context: Context.NumberedList,
input: `# Binary search
Binary is a sorting O(log(n)) sorting algorithm. It works as follows:
1. Ensure you have a sorted array.
2. Check the middle element in the list:
3. Return the index if this is the item you are looking for.
4. <mask/>
`,
answer: `THOUGHT: THOUGHT: The <mask/> is located inside a Markdown list. The current number 4 and it is already written so I should not be in the answer. The text before the <mask/> is bout the binary search algorithm and steps in the algorithm but a few steps are missing. There is no text after the <mask/>. So my answer should the remaining steps of the algorithm.
ANSWER: Go to step 2 with the remaining right half if the item is smaller than the target.
5. Go to step 2 with the remaining left half if the item is larger than the target.
6. If there are no more elements to check, the return indicates that the item is not in the list.`,
};
export default example;

View File

@@ -0,0 +1,26 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../shared";
const example: FewShotExample = {
context: Context.TaskList,
input: ` # Prepare for conference
Before going to a conference, there are a few things to do:
- [ ] Finish presentation
- [ ] Write outline
- [ ] Create slides
- [ ] Practice presentation
- [ ] Book flights
- [ ] Reserve hotel
- [ ] Pack suitcase
- [ ] <mask/>
- [ ] Arrange transportation to airport
`,
answer: `THOUGHT: The <mask/> is located inside a Markdown subtask list. The tasks before and after the <mask/> are about preparing and traveling to a conference. The parent task is about packing a suitcase. The syntax for the subtask is already there, so I don't need to write it. So my answer should finish that subtask. Additionally, the answer can also contain additional subtasks that are not already there.
ANSWER:Clothes
- [ ] Toiletries
- [ ] Travel documents
`,
};
export default example;

View File

@@ -0,0 +1,20 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../shared";
const example: FewShotExample = {
context: Context.TaskList,
input: ` # Write blog post about Obsidian
For my Obsidian blog post, I need to do the following:
- [ ] Research about Obsidian.
- [ ] Create an outline for the blog.<mask/>
`,
answer: `THOUGHT: The <mask/> is located inside a Markdown task list. The tasks before the <mask/> are about writing a blog about Obsidian. There is no text after the <mask/>. The task before the <mask/> looks already finished. So my answer should contain additional tasks that are not already there and should be help to complete this blog post.
ANSWER:
- [ ] Gather relevant visual aids.
- [ ] Write the first draft
- [ ] Review and edit the first draft
`,
};
export default example;

View File

@@ -0,0 +1,21 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../shared";
const example: FewShotExample = {
context: Context.Text,
input: `# Locality-sensitive hashing (LSH)
Locality-sensitive hashing (LSH) is an algorithm that hashes similar items into the same buckets with high probability.
## Potential problems
### Collision (AND)
This happens when distant points are hashed into the same bucket. <mask/>
### Split (OR)
Nearby points are hashed into different buckets. This problem can be solved by using multiple hash tables instead of one. Points are candidates neighbors if they are a candidate in any of the hash tables. As a result the false negative rate reduces significantly, while the false positives rate only increase slightly.
`,
answer: `THOUGHT: The <mask/> is located in a paragraph at the end of sentence, so the answer should offer a completion of this paragraph. The paragraph is in English so the answer must be in this language. The text be the <mask/> is about Locality-sensitive hashing and its potential problems, so my answer should reflect that. The text after the <mask/> is a similiar paragraph about the second problem of Locality-sensitive hashing. So my answer should be about the split problem problem, so my answer should have a similar structure as this paragraph.
ANSWER:This problem can be solved by having multiple projection/hashes per table, where points are candidates neighbors if they occur in all query bins. As a result the false positive rate reduces significantly, while the false negative rate only increase slightly.`,
};
export default example;

View File

@@ -0,0 +1,13 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../shared";
const example: FewShotExample = {
context: Context.Text,
input: `# Digitizing sound waves
Typical sound waves are complex and consist of multiple waves each with their amplitude, frequency, and phase. When we digitize a sound, we sample the amplitude, the difference compared to the base level, at fixed intervals. This gives a sequence of digital values that can be used to approximate the original sound wave by recreating the pressure changes over time. <mask/> The sampling rate must be chosen correctly, else we cannot represent specific frequencies, or we introduce unintended distortions.`,
answer: `THOUGHT: The paragraph is in English so the answer must be in this language. The text before the <mask/> is about digitizing sound waves. The text after the <mask/> mentions the sampling rate, but it never explains what it is. So the answer should be a sentence that explains what the sampling rate is writing in such a way that it fits with the text before and after the <mask/>.
ANSWER: The chosen interval is known as the sampling rate.`,
};
export default example;

View File

@@ -0,0 +1,24 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../shared";
const example: FewShotExample = {
context: Context.UnorderedList,
input: `# Relu activation function
The ReLU activation function is a relatively simple non-linear function:
$$
ReLU(x) = max(0, x)
$$
Advantages:
- <mask/>
Disadvantages:
- Dead ReLU problem, whereby specific activation will only output zeros and thus will not have any gradients. This can be computationally wasteful since we still need matrix multiplication.
- Range $[0, \\infty]$ so Exploding Gradients can still be a problem.
`,
answer: `THOUGHT: The <mask/> is located inside a Markdown list. The dash is already there, so I don't need to write it. The text before the <mask/> is about the ReLU activation function, and it looks like this list is about the advantages of the relu function. The text after the <mask/> is about the disadvantages of the ReLU function. So my answer should finish the advantage item and maybe suggest a new advantage.
ANSWER: Computational cheap activations and gradients.
- Vanishing gradient problem is rare, assuming correct weight initialization.`,
};
export default example;

View File

@@ -0,0 +1,18 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../shared";
const example: FewShotExample = {
context: Context.UnorderedList,
input: `# SOLID
SOLID is a set of design principles from Robert C. Martin. It consists of the following sub-principles:
- S: Single Responsibility Principle: A class should have only one reason to change, meaning it should only have one job or responsibility.<mask/>
- I: Interface Segregation Principle: Clients should not be forced to depend on interfaces they do not use.
- D: Dependency Inversion Principle: High-level modules should not depend on low-level modules, but both should depend on abstractions.
`,
answer: `THOUGHT: The <mask/> is located inside a Markdown list. The text before the list is about SOLID. The list item before the mask is about Single Responsibility Principle. The list items Interface Segregation and Dependency Inversion principles. So the Open-Closed and Liskov Substitution principles are missing. So my answer should contain these list items.
ANSWER:
- O: Open-Closed Principle: Software entities should be open for extension, but closed for modification.
- L: Liskov Substitution Principle: Subtypes must be substitutable for their base types without altering the correctness of the program.`,
};
export default example;

View File

@@ -0,0 +1,144 @@
import { z } from "zod";
import { azureOAIApiSettingsSchema, fewShotExampleSchema, MAX_DELAY, MAX_MAX_CHAR_LIMIT, MIN_DELAY, MIN_MAX_CHAR_LIMIT, modelOptionsSchema, openAIApiSettingsSchema } from "../shared";
import block_qoute_example from "./few-shot-examples/block-qoute-example";
import codeblock_function_completion from "./few-shot-examples/codeblock-function-completion";
import codeblock_function_parameters from "./few-shot-examples/codeblock-function-parameters";
import header_example from "./few-shot-examples/header-example";
import header_example_relu from "./few-shot-examples/header-example-relu";
import math_block_inline from "./few-shot-examples/math-block-inline";
import math_block_multi_line from "./few-shot-examples/math-block-multi-line";
import numbered_list_example from "./few-shot-examples/numbered-list-example";
import sub_task_list_example from "./few-shot-examples/sub-task-list-example";
import task_list_example from "./few-shot-examples/task-list-example";
import text_completion_end from "./few-shot-examples/text-completion-end";
import text_completion_middle from "./few-shot-examples/text-completion-middle";
import unordered_list_pro_and_con_list from "./few-shot-examples/unordered-list-pro-and-con-list";
import unordered_list_solid from "./few-shot-examples/unordered-list-solid";
export const triggerSchema = z.object({
type: z.enum(['string', 'regex']),
value: z.string(),
}).strict();
export const settingsSchema = z.object({
enabled: z.boolean(),
advancedMode: z.boolean(),
apiProvider: z.enum(['azure', 'openai']),
azureOAIApiSettings: azureOAIApiSettingsSchema,
openAIApiSettings: openAIApiSettingsSchema,
triggers: z.array(triggerSchema),
delay: z.number().int().min(MIN_DELAY, { message: "Delay must be between 0ms and 2000ms" }).max(MAX_DELAY, { message: "Delay must be between 0ms and 2000ms" }),
modelOptions: modelOptionsSchema,
systemMessage: z.string().min(3, { message: "System message must be at least 3 characters long" }),
fewShotExamples: z.array(fewShotExampleSchema),
userMessageTemplate: z.string().min(3, { message: "User message template must be at least 3 characters long" }),
chainOfThoughRemovalRegex: z.string(),
dontIncludeDataviews: z.boolean(),
// TODO: see if we can replace this with tokens in the future.
maxPrefixCharLimit: z.number().int().min(MIN_MAX_CHAR_LIMIT, { message: `Max prefix char limit must be at least ${MIN_MAX_CHAR_LIMIT}` }).max(MAX_MAX_CHAR_LIMIT, { message: `Max prefix char limit must be at most ${MAX_MAX_CHAR_LIMIT}` }),
maxSuffixCharLimit: z.number().int().min(MIN_MAX_CHAR_LIMIT, { message: `Max prefix char limit must be at least ${MIN_MAX_CHAR_LIMIT}` }).max(MAX_MAX_CHAR_LIMIT, { message: `Max prefix char limit must be at most ${MAX_MAX_CHAR_LIMIT}` }),
removeDuplicateMathBlockIndicator: z.boolean(),
removeDuplicateCodeBlockIndicator: z.boolean()
}).strict();
export const pluginDataSchema = z.object({
settings: settingsSchema,
}).strict();
export const DEFAULT_SETTINGS: Settings = {
// General settings
enabled: true,
advancedMode: false,
apiProvider: "openai",
// API settings
azureOAIApiSettings: {
key: "",
url: "",
},
openAIApiSettings: {
key: "",
url: "https://api.openai.com/v1/chat/completions",
model: "gpt-3.5-turbo",
},
// Trigger settings
triggers: [
{ type: "string", value: "# " },
{ type: "string", value: ". " },
{ type: "string", value: ": " },
{ type: "string", value: ", " },
{ type: "string", value: "! " },
{ type: "string", value: "? " },
{ type: "string", value: "`" },
{ type: "string", value: "' " },
{ type: "string", value: "= " },
{ type: "string", value: "$ " },
{ type: "string", value: "\n" },
// bullet list
{ type: "regex", value: "[\\t ]*(\\-|\\*)[\\t ]+$" },
// numbered list
{ type: "regex", value: "[\\t ]*[0-9A-Za-z]+\\.[\\t ]+$" },
// new line with spaces
{ type: "regex", value: "\\$\\$\\n[\\t ]*$" },
// markdown multiline code block
{ type: "regex", value: "```[a-zA-Z0-9]*(\\n\\s*)?$" },
// task list normal, sub or numbered.
{ type: "regex", value: "\\s*(-|[0-9]+\\.) \\[.\\]\\s+$" },
],
delay: 1000,
// Request settings
modelOptions: {
temperature: 1,
top_p: 0.1,
frequency_penalty: 0.25,
presence_penalty: 0,
max_tokens: 800,
},
// Prompt settings
systemMessage: `Your job is to predict the most logical text that should be written at the location of the <mask/>.
Your answer can be either code, a single word, or multiple sentences.
Your answer must be in the same language as the text that is already there.
Your response must have the following format:
THOUGHT: here you explain your reasoning of what could be at the location of <mask/>
ANSWER: here you write the text that should be at the location of <mask/>
`,
fewShotExamples: [
block_qoute_example,
codeblock_function_completion,
codeblock_function_parameters,
header_example,
numbered_list_example,
sub_task_list_example,
task_list_example,
text_completion_end,
text_completion_middle,
unordered_list_pro_and_con_list,
unordered_list_solid,
math_block_inline,
math_block_multi_line,
header_example_relu,
].sort((a, b) => a.toString().localeCompare(b.toString())),
userMessageTemplate: "{{prefix}}<mask/>{{suffix}}",
chainOfThoughRemovalRegex: `(.|\\n)*ANSWER:`,
// Preprocessing settings
dontIncludeDataviews: true,
maxPrefixCharLimit: 2000,
maxSuffixCharLimit: 2000,
// Postprocessing settings
removeDuplicateMathBlockIndicator: true,
removeDuplicateCodeBlockIndicator: true,
};
export const DEFAULT_PLUGIN_DATA: PluginData = {
settings: DEFAULT_SETTINGS,
}
export type Settings = z.input<typeof settingsSchema>;
export type Trigger = z.infer<typeof triggerSchema>;
export type PluginData = z.infer<typeof pluginDataSchema>;

View File

@@ -0,0 +1,18 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.BlockQuotes,
input: `# Matthew effect
In the book Outliers, Malcolm Gladwell popularized the term Matthew effect.
This effect is named after the following New Testament verse:
> <mask/>
According to Malcolm Gladwell, the Matthew effect means that more successful people are most likely given special attention and opportunities leading to further success.`,
answer: `THOUGHT: The answer is a biblical verse from Matthew's effect, related to accumulating advantage; Gladwell mentions it in Outliers.
LANGUAGE: English
ANSWER: For unto everyone that hath shall be given, and he shall have abundance. But from him that hath not shall taken away even that which he hath.
> Matthew 25:29`,
};
export default example;

View File

@@ -0,0 +1,23 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const messages: FewShotExample = {
context: Context.CodeBlock,
input: `# debounce
A debounce function makes sure that a function is only triggered once per user input. This is useful for event based triggers. You can implement in javascript like this:
\`\`\`javascript
function debounce(func, timeout = 300){
<mask/>
}
\`\`\`
`,
answer: `THOUGHT: This should include debounce logic, clearTimeout, setTimeout, prevent rapid calls, and function wrapper.
LANGUAGE: JavaScript
ANSWER:let timer;
return (...args) => {
clearTimeout(timer);
timer = setTimeout(() => { func.apply(this, args); }, timeout);
};`,
};
export default messages;

View File

@@ -0,0 +1,19 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const messages: FewShotExample = {
context: Context.CodeBlock,
input: `\`\`\`python
def fibonacci(<mask/>) -> int:
\tif n == 0 or n == 1:
\t\treturn n
\telse:
\t\treturn fibonacci(n-1) + fibonacci(n-2)
\`\`\`
`,
answer: `THOUGHT: This function finds the nth Fibonacci number. The 'n' arg of type int is missing. Based on the location of <mask/>, the answer must be function arguments.
LANGUAGE: Python
ANSWER: n: int`,
};
export default messages;

View File

@@ -0,0 +1,16 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.Heading,
input: `# <mask/>
A neuron is considered dead if it does not activate for any of the training instance in the training dataset. Because it never activates it will never have a gradient due to the chain rule so it also cannot change anymore. The dead ReLU problem can have due to a wide variety of reasons, such as:
1. Poorly initialized weights.
2. Extremely high learning rates during training.
`,
answer: `THOUGHT: The paragraph discusses the ReLU activation function, and dead neurons never activate. This problem is named the "Dead ReLU problem".
LANGUAGE: English
ANSWER: The dead ReLU problem`,
};
export default example;

View File

@@ -0,0 +1,13 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.Heading,
input: `# The <mask/> function
The softmax function transforms a vector into a probability distribution such that the sum of the vector is equal to 1.`,
answer: `THOUGHT: The paragraph describes the softmax function and converts the vector to probability distributions; the title already contains "The" and "function". The answer must add the missing word to the title.
LANGUAGE: English
ANSWER: Softmax`,
};
export default example;

View File

@@ -0,0 +1,13 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const messages: FewShotExample = {
context: Context.MathBlock,
input: String.raw`# Logarithm definition
A logarithm is the power to which a base must be raised to yield a given number. For example, $2^3 =8$; therefore, $3$ is the logarithm of $8$ to base $2$, or in other words $ <mask/>$`,
answer: String.raw`The text close to the <mask/> is about the definition of the log and logarithm of 8 to base 2. The answer is an inline formula for base 2 of 8 equals 3.
LANGUAGE: LaTeX, English
ANSWER: 3 = \log_2(8)`,
};
export default messages;

View File

@@ -0,0 +1,19 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const messages: FewShotExample = {
context: Context.MathBlock,
input: String.raw`# Sample mean
The sample mean, or sometime called average, is defined as:
$$
sample\_mean(x) = <mask/>
$$
The average value has the property that 50% of the weighted* value will be above and below it. This weighted property can make it more sensitive to outliers than the median.
`,
answer: String.raw`THOUGHT: The text is about sample mean; the math block needs LaTeX for the sum of observations divided by the number of observations.
LANGUAGE: LaTeX, English
ANSWER: \frac{1}{n} \sum_i^n x_i`,
};
export default messages;

View File

@@ -0,0 +1,20 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.NumberedList,
input: `# Binary search
Binary is a sorting O(log(n)) searching algorithm. It works as follows:
1. Ensure you have a sorted array.
2. Check the middle element in the list:
3. Return the index if this is the item you are looking for.
4. <mask/>
5. Go to step 2 with the remaining left half if the item is larger than the target.
6. If there are no more elements to check, the return indicates that the item is not in the list.
`,
answer: `THOUGHT: The list contains steps of the binary search algorithm. It is missing the decision to split right if item < target.
LANGUAGE: English
ANSWER: Go to step 2 with the remaining right half if the item is smaller than the target.`,
};
export default example;

View File

@@ -0,0 +1,23 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.TaskList,
input: `# Prepare for conference
Before going to a conference, there are a few things to do:
- [ ] Finish presentation
- [ ] Write outline
- [ ] Create slides
- [ ] Practice presentation
- [ ] Book flights
- [ ] Reserve hotel
- [ ] Pack suitcase
- [ ] <mask/>
- [ ] Arrange transportation to airport
`,
answer: `THOUGHT: The answer must be a subtask of 'Pack suitcase'; typical subtasks: 'Clothes,' 'Toiletries,' 'Travel documents'; ' - [ ] ' already there.
LANGUAGE: English
ANSWER: Clothes`,
};
export default example;

View File

@@ -0,0 +1,19 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.TaskList,
input: ` # Write blog post about Obsidian
For my Obsidian blog post, I need to do the following:
- [ ] Research about Obsidian.
- [ ] Create an outline for the blog.
- [ ] Gather relevant visual aids.
- [ ] <mask/>
- [ ] Review and edit the first draft
`,
answer: `THOUGHT: The <mask/> is in the middle of a task sequence between gathering visuals before editing 1st draft. The 1st draft is missing and fits the sequence.
LANGUAGE: English
ANSWER: Write the first draft`,
};
export default example;

View File

@@ -0,0 +1,20 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.Text,
input: `# Locality-sensitive hashing (LSH)
Locality-sensitive hashing (LSH) is an algorithm that hashes similar items into the same buckets with high probability.
## Potential problems
### Collision (AND)
This happens when distant points are hashed into the same bucket. <mask/>
### Split (OR)
Nearby points are hashed into different buckets. This problem can be solved by using multiple hash tables instead of one. Points are candidates neighbors if they are a candidate in any of the hash tables. As a result the false negative rate reduces significantly, while the false positives rate only increase slightly.`,
answer: `THOUGHT: The answer must be the next sentence. It must explain the strategy to mitigate a collision problem, such as having multiple projections/hashes per table.
LANGUAGE: English
ANSWER: This problem can be solved by having multiple projections/hashes per table, where points are candidates' neighbors if they occur in all query bins.`,
};
export default example;

View File

@@ -0,0 +1,14 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.Text,
input: `# Digitizing sound waves
Typical sound waves are complex and consist of multiple waves, each with amplitude, frequency, and phase.
When we digitize a sound, we sample the amplitude, the difference compared to the base level, at fixed intervals. This gives a sequence of digital values that can be used to approximate the original sound wave by recreating the pressure changes over time. The <mask/> is known as the sampling rate. The sampling rate must be chosen correctly, or we cannot represent specific frequencies or introduce unintended distortions.`,
answer: `THOUGHT: The text is about digitizing sound waves and the needed properties. The <mask/> is an incomplete sentence starting with 'The' and ending with 'is known as the sampling rate.', I should avoid overlap with this. The answer is a description for the property known as the 'sampling rate'.
LANGUAGE: English
ANSWER: chosen interval`,
};
export default example;

View File

@@ -0,0 +1,24 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.UnorderedList,
input: `# Relu activation function
The ReLU activation function is a relatively simple non-linear function:
$$
ReLU(x) = max(0, x)
$$
Advantages:
- <mask/>
Disadvantages:
- Dead ReLU problem, whereby specific activation will only output zeros and thus will not have any gradients. This can be computationally wasteful since we still need matrix multiplication.
- Range $[0, \\infty]$ so Exploding Gradients can still be a problem.
`,
answer: `THOUGHT: Answer must be advantage of ReLU: simple, efficient, sparsity, addresses vanishing gradient to some extent, popular in practice. The "- " is already there.
LANGUAGE: English
ANSWER: Computational cheap activations and gradients.
- Vanishing gradient problem is rare, assuming correct weight initialization.`,
};
export default example;

View File

@@ -0,0 +1,19 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.UnorderedList,
input: `# SOLID
SOLID is a set of design principles from Robert C. Martin. It consists of the following sub-principles:
- S: Single Responsibility Principle: A class should have only one reason to change, meaning it should only have one job or responsibility.
- <mask/>
- I: Interface Segregation Principle: Clients should not be forced to depend on interfaces they do not use.
- D: Dependency Inversion Principle: High-level modules should not depend on low-level modules, but both should depend on abstractions.
`,
answer: `THOUGHT: This is a list of SOLID principles. The "Open-Closed Principle" and "Liskov Substitution Principle" are missing. I must add them in a consistent format.
LANGUAGE: English
ANSWER: O: Open-Closed Principle: Software entities should be open for extension, but closed for modification.
- L: Liskov Substitution Principle: Subtypes must be substitutable for their base types without altering the correctness of the program.`,
};
export default example;

View File

@@ -0,0 +1,201 @@
import { z } from "zod";
import { isRegexValid, isValidIgnorePattern } from "../../../utils/auto-complete";
import {
azureOAIApiSettingsSchema, fewShotExampleSchema,
MAX_DELAY,
MAX_MAX_CHAR_LIMIT,
MIN_DELAY,
MIN_MAX_CHAR_LIMIT,
modelOptionsSchema,
ollamaApiSettingsSchema,
openAIApiSettingsSchema,
} from "../shared";
import block_qoute_example from "./few-shot-examples/block-qoute-example";
import codeblock_function_completion from "./few-shot-examples/codeblock-function-completion";
import codeblock_function_parameters from "./few-shot-examples/codeblock-function-parameters";
import header_example from "./few-shot-examples/header-example";
import header_example_relu from "./few-shot-examples/header-example-relu";
import math_block_inline from "./few-shot-examples/math-block-inline";
import math_block_multi_line from "./few-shot-examples/math-block-multiline";
import numbered_list_example from "./few-shot-examples/numbered-list-example";
import sub_task_list_example from "./few-shot-examples/subtask-list-example";
import task_list_example from "./few-shot-examples/task-list-example";
import text_completion_end from "./few-shot-examples/text-completion-end";
import text_completion_middle from "./few-shot-examples/text-completion-middle";
import unordered_list_pro_and_con_list from "./few-shot-examples/unordered-list-pro-and-con-list";
import unordered_list_solid from "./few-shot-examples/unordered-list-solid";
export const triggerSchema = z.object({
type: z.enum(['string', 'regex']),
value: z.string().min(1, { message: "Trigger value must be at least 1 character long" })
}).strict().superRefine((trigger, ctx) => {
if (trigger.type === "regex") {
if (!trigger.value.endsWith("$")) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: "Regex triggers must end with a $.",
path: ["value"],
});
}
if (!isRegexValid(trigger.value)) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: `Invalid regex: "${trigger.value}"`,
path: ["value"],
});
}
}
});
export const settingsSchema = z.object({
version: z.literal("1"),
enabled: z.boolean(),
advancedMode: z.boolean(),
apiProvider: z.enum(['azure', 'openai', "ollama"]),
azureOAIApiSettings: azureOAIApiSettingsSchema,
openAIApiSettings: openAIApiSettingsSchema,
ollamaApiSettings: ollamaApiSettingsSchema,
triggers: z.array(triggerSchema),
delay: z.number().int().min(MIN_DELAY, { message: "Delay must be between 0ms and 2000ms" }).max(MAX_DELAY, { message: "Delay must be between 0ms and 2000ms" }),
modelOptions: modelOptionsSchema,
systemMessage: z.string().min(3, { message: "System message must be at least 3 characters long" }),
fewShotExamples: z.array(fewShotExampleSchema),
userMessageTemplate: z.string().min(3, { message: "User message template must be at least 3 characters long" }),
chainOfThoughRemovalRegex: z.string().refine((regex) => isRegexValid(regex), { message: "Invalid regex" }),
dontIncludeDataviews: z.boolean(),
maxPrefixCharLimit: z.number().int().min(MIN_MAX_CHAR_LIMIT, { message: `Max prefix char limit must be at least ${MIN_MAX_CHAR_LIMIT}` }).max(MAX_MAX_CHAR_LIMIT, { message: `Max prefix char limit must be at most ${MAX_MAX_CHAR_LIMIT}` }),
maxSuffixCharLimit: z.number().int().min(MIN_MAX_CHAR_LIMIT, { message: `Max prefix char limit must be at least ${MIN_MAX_CHAR_LIMIT}` }).max(MAX_MAX_CHAR_LIMIT, { message: `Max prefix char limit must be at most ${MAX_MAX_CHAR_LIMIT}` }),
removeDuplicateMathBlockIndicator: z.boolean(),
removeDuplicateCodeBlockIndicator: z.boolean(),
ignoredFilePatterns: z.string().refine((value) => value
.split("\n")
.filter(s => s.trim().length > 0)
.filter(s => !isValidIgnorePattern(s)).length === 0,
{ message: "Invalid ignore pattern" }
),
ignoredTags: z.string().refine((value) => value
.split("\n")
.filter(s => s.includes(" ")).length === 0, { message: "Tags cannot contain spaces" }
).refine((value) => value
.split("\n")
.filter(s => s.includes("#")).length === 0, { message: "Enter tags without the # symbol" }
).refine((value) => value
.split("\n")
.filter(s => s.includes(",")).length === 0, { message: "Enter each tag on a new line without commas" }
),
cacheSuggestions: z.boolean(),
debugMode: z.boolean(),
}).strict();
export const pluginDataSchema = z.object({
settings: settingsSchema,
}).strict();
export const DEFAULT_AUTOCOMPLETE_SETTINGS = {
// version: "1",
// General settings
autocompleteEnabled: true,
advancedMode: false,
apiProvider: "openai",
// API settings
azureOAIApiSettings: {
key: "",
url: "https://YOUR_AOI_SERVICE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME/chat/completions",
},
openAIApiSettings: {
key: "",
url: "https://api.openai.com/v1/chat/completions",
model: "gpt-3.5-turbo",
},
ollamaApiSettings: {
url: "http://localhost:11434/api/chat",
model: "",
},
// Trigger settings
triggers: [
{ type: "string", value: "# " },
{ type: "string", value: ". " },
{ type: "string", value: ": " },
{ type: "string", value: ", " },
{ type: "string", value: "! " },
{ type: "string", value: "? " },
{ type: "string", value: "`" },
{ type: "string", value: "' " },
{ type: "string", value: "= " },
{ type: "string", value: "$ " },
{ type: "string", value: "> " },
{ type: "string", value: "\n" },
// bullet list
{ type: "regex", value: "[\\t ]*(\\-|\\*)[\\t ]+$" },
// numbered list
{ type: "regex", value: "[\\t ]*[0-9A-Za-z]+\\.[\\t ]+$" },
// new line with spaces
{ type: "regex", value: "\\$\\$\\n[\\t ]*$" },
// markdown multiline code block
{ type: "regex", value: "```[a-zA-Z0-9]*(\\n\\s*)?$" },
// task list normal, sub or numbered.
{ type: "regex", value: "\\s*(-|[0-9]+\\.) \\[.\\]\\s+$" },
],
delay: 500,
// Request settings
modelOptions: {
temperature: 1,
top_p: 0.1,
frequency_penalty: 0.25,
presence_penalty: 0,
max_tokens: 800,
},
// Prompt settings
systemMessage: `Your job is to predict the most logical text that should be written at the location of the <mask/>.
Your answer can be either code, a single word, or multiple sentences.
If the <mask/> is in the middle of a partial sentence, your answer should only be the 1 or 2 words fixes the sentence and not the entire sentence.
You are not allowed to have any overlapping text directly surrounding the <mask/>.
Your answer must be in the same language as the text directly surrounding the <mask/>.
Your response must have the following format:
THOUGHT: here, you reason about the answer; use the 80/20 principle to be brief.
LANGUAGE: here, you write the language of your answer, e.g. English, Python, Dutch, etc.
ANSWER: here, you write the text that should be at the location of <mask/>
`,
fewShotExamples: [
block_qoute_example,
codeblock_function_completion,
codeblock_function_parameters,
header_example,
numbered_list_example,
sub_task_list_example,
task_list_example,
text_completion_end,
text_completion_middle,
unordered_list_pro_and_con_list,
unordered_list_solid,
math_block_inline,
math_block_multi_line,
header_example_relu,
].sort((a, b) => a.toString().localeCompare(b.toString())),
userMessageTemplate: "{{prefix}}<mask/>{{suffix}}",
chainOfThoughRemovalRegex: `(.|\\n)*ANSWER:`,
// Preprocessing settings
dontIncludeDataviews: true,
maxPrefixCharLimit: 4000,
maxSuffixCharLimit: 4000,
// Postprocessing settings
removeDuplicateMathBlockIndicator: true,
removeDuplicateCodeBlockIndicator: true,
ignoredFilePatterns: "**/secret/**\n",
ignoredTags: "",
cacheSuggestions: true,
debugMode: false,
};
export type Trigger = z.infer<typeof triggerSchema>;
export type Settings = z.input<typeof settingsSchema>;
export type PluginData = z.infer<typeof pluginDataSchema>;