This commit is contained in:
duanfuxiang
2025-01-05 11:51:39 +08:00
commit 0c7ee142cb
215 changed files with 20611 additions and 0 deletions

View File

@@ -0,0 +1,18 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.BlockQuotes,
input: `# Matthew effect
In the book Outliers, Malcolm Gladwell popularized the term Matthew effect.
This effect is named after the following New Testament verse:
> <mask/>
According to Malcolm Gladwell, the Matthew effect means that more successful people are most likely given special attention and opportunities leading to further success.`,
answer: `THOUGHT: The answer is a biblical verse from Matthew's effect, related to accumulating advantage; Gladwell mentions it in Outliers.
LANGUAGE: English
ANSWER: For unto everyone that hath shall be given, and he shall have abundance. But from him that hath not shall taken away even that which he hath.
> Matthew 25:29`,
};
export default example;

View File

@@ -0,0 +1,23 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const messages: FewShotExample = {
context: Context.CodeBlock,
input: `# debounce
A debounce function makes sure that a function is only triggered once per user input. This is useful for event based triggers. You can implement in javascript like this:
\`\`\`javascript
function debounce(func, timeout = 300){
<mask/>
}
\`\`\`
`,
answer: `THOUGHT: This should include debounce logic, clearTimeout, setTimeout, prevent rapid calls, and function wrapper.
LANGUAGE: JavaScript
ANSWER:let timer;
return (...args) => {
clearTimeout(timer);
timer = setTimeout(() => { func.apply(this, args); }, timeout);
};`,
};
export default messages;

View File

@@ -0,0 +1,19 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const messages: FewShotExample = {
context: Context.CodeBlock,
input: `\`\`\`python
def fibonacci(<mask/>) -> int:
\tif n == 0 or n == 1:
\t\treturn n
\telse:
\t\treturn fibonacci(n-1) + fibonacci(n-2)
\`\`\`
`,
answer: `THOUGHT: This function finds the nth Fibonacci number. The 'n' arg of type int is missing. Based on the location of <mask/>, the answer must be function arguments.
LANGUAGE: Python
ANSWER: n: int`,
};
export default messages;

View File

@@ -0,0 +1,16 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.Heading,
input: `# <mask/>
A neuron is considered dead if it does not activate for any of the training instance in the training dataset. Because it never activates it will never have a gradient due to the chain rule so it also cannot change anymore. The dead ReLU problem can have due to a wide variety of reasons, such as:
1. Poorly initialized weights.
2. Extremely high learning rates during training.
`,
answer: `THOUGHT: The paragraph discusses the ReLU activation function, and dead neurons never activate. This problem is named the "Dead ReLU problem".
LANGUAGE: English
ANSWER: The dead ReLU problem`,
};
export default example;

View File

@@ -0,0 +1,13 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.Heading,
input: `# The <mask/> function
The softmax function transforms a vector into a probability distribution such that the sum of the vector is equal to 1.`,
answer: `THOUGHT: The paragraph describes the softmax function and converts the vector to probability distributions; the title already contains "The" and "function". The answer must add the missing word to the title.
LANGUAGE: English
ANSWER: Softmax`,
};
export default example;

View File

@@ -0,0 +1,13 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const messages: FewShotExample = {
context: Context.MathBlock,
input: String.raw`# Logarithm definition
A logarithm is the power to which a base must be raised to yield a given number. For example, $2^3 =8$; therefore, $3$ is the logarithm of $8$ to base $2$, or in other words $ <mask/>$`,
answer: String.raw`The text close to the <mask/> is about the definition of the log and logarithm of 8 to base 2. The answer is an inline formula for base 2 of 8 equals 3.
LANGUAGE: LaTeX, English
ANSWER: 3 = \log_2(8)`,
};
export default messages;

View File

@@ -0,0 +1,19 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const messages: FewShotExample = {
context: Context.MathBlock,
input: String.raw`# Sample mean
The sample mean, or sometime called average, is defined as:
$$
sample\_mean(x) = <mask/>
$$
The average value has the property that 50% of the weighted* value will be above and below it. This weighted property can make it more sensitive to outliers than the median.
`,
answer: String.raw`THOUGHT: The text is about sample mean; the math block needs LaTeX for the sum of observations divided by the number of observations.
LANGUAGE: LaTeX, English
ANSWER: \frac{1}{n} \sum_i^n x_i`,
};
export default messages;

View File

@@ -0,0 +1,20 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.NumberedList,
input: `# Binary search
Binary is a sorting O(log(n)) searching algorithm. It works as follows:
1. Ensure you have a sorted array.
2. Check the middle element in the list:
3. Return the index if this is the item you are looking for.
4. <mask/>
5. Go to step 2 with the remaining left half if the item is larger than the target.
6. If there are no more elements to check, the return indicates that the item is not in the list.
`,
answer: `THOUGHT: The list contains steps of the binary search algorithm. It is missing the decision to split right if item < target.
LANGUAGE: English
ANSWER: Go to step 2 with the remaining right half if the item is smaller than the target.`,
};
export default example;

View File

@@ -0,0 +1,23 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.TaskList,
input: `# Prepare for conference
Before going to a conference, there are a few things to do:
- [ ] Finish presentation
- [ ] Write outline
- [ ] Create slides
- [ ] Practice presentation
- [ ] Book flights
- [ ] Reserve hotel
- [ ] Pack suitcase
- [ ] <mask/>
- [ ] Arrange transportation to airport
`,
answer: `THOUGHT: The answer must be a subtask of 'Pack suitcase'; typical subtasks: 'Clothes,' 'Toiletries,' 'Travel documents'; ' - [ ] ' already there.
LANGUAGE: English
ANSWER: Clothes`,
};
export default example;

View File

@@ -0,0 +1,19 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.TaskList,
input: ` # Write blog post about Obsidian
For my Obsidian blog post, I need to do the following:
- [ ] Research about Obsidian.
- [ ] Create an outline for the blog.
- [ ] Gather relevant visual aids.
- [ ] <mask/>
- [ ] Review and edit the first draft
`,
answer: `THOUGHT: The <mask/> is in the middle of a task sequence between gathering visuals before editing 1st draft. The 1st draft is missing and fits the sequence.
LANGUAGE: English
ANSWER: Write the first draft`,
};
export default example;

View File

@@ -0,0 +1,20 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.Text,
input: `# Locality-sensitive hashing (LSH)
Locality-sensitive hashing (LSH) is an algorithm that hashes similar items into the same buckets with high probability.
## Potential problems
### Collision (AND)
This happens when distant points are hashed into the same bucket. <mask/>
### Split (OR)
Nearby points are hashed into different buckets. This problem can be solved by using multiple hash tables instead of one. Points are candidates neighbors if they are a candidate in any of the hash tables. As a result the false negative rate reduces significantly, while the false positives rate only increase slightly.`,
answer: `THOUGHT: The answer must be the next sentence. It must explain the strategy to mitigate a collision problem, such as having multiple projections/hashes per table.
LANGUAGE: English
ANSWER: This problem can be solved by having multiple projections/hashes per table, where points are candidates' neighbors if they occur in all query bins.`,
};
export default example;

View File

@@ -0,0 +1,14 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.Text,
input: `# Digitizing sound waves
Typical sound waves are complex and consist of multiple waves, each with amplitude, frequency, and phase.
When we digitize a sound, we sample the amplitude, the difference compared to the base level, at fixed intervals. This gives a sequence of digital values that can be used to approximate the original sound wave by recreating the pressure changes over time. The <mask/> is known as the sampling rate. The sampling rate must be chosen correctly, or we cannot represent specific frequencies or introduce unintended distortions.`,
answer: `THOUGHT: The text is about digitizing sound waves and the needed properties. The <mask/> is an incomplete sentence starting with 'The' and ending with 'is known as the sampling rate.', I should avoid overlap with this. The answer is a description for the property known as the 'sampling rate'.
LANGUAGE: English
ANSWER: chosen interval`,
};
export default example;

View File

@@ -0,0 +1,24 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.UnorderedList,
input: `# Relu activation function
The ReLU activation function is a relatively simple non-linear function:
$$
ReLU(x) = max(0, x)
$$
Advantages:
- <mask/>
Disadvantages:
- Dead ReLU problem, whereby specific activation will only output zeros and thus will not have any gradients. This can be computationally wasteful since we still need matrix multiplication.
- Range $[0, \\infty]$ so Exploding Gradients can still be a problem.
`,
answer: `THOUGHT: Answer must be advantage of ReLU: simple, efficient, sparsity, addresses vanishing gradient to some extent, popular in practice. The "- " is already there.
LANGUAGE: English
ANSWER: Computational cheap activations and gradients.
- Vanishing gradient problem is rare, assuming correct weight initialization.`,
};
export default example;

View File

@@ -0,0 +1,19 @@
import Context from "../../../../core/autocomplete/context-detection";
import { FewShotExample } from "../../index";
const example: FewShotExample = {
context: Context.UnorderedList,
input: `# SOLID
SOLID is a set of design principles from Robert C. Martin. It consists of the following sub-principles:
- S: Single Responsibility Principle: A class should have only one reason to change, meaning it should only have one job or responsibility.
- <mask/>
- I: Interface Segregation Principle: Clients should not be forced to depend on interfaces they do not use.
- D: Dependency Inversion Principle: High-level modules should not depend on low-level modules, but both should depend on abstractions.
`,
answer: `THOUGHT: This is a list of SOLID principles. The "Open-Closed Principle" and "Liskov Substitution Principle" are missing. I must add them in a consistent format.
LANGUAGE: English
ANSWER: O: Open-Closed Principle: Software entities should be open for extension, but closed for modification.
- L: Liskov Substitution Principle: Subtypes must be substitutable for their base types without altering the correctness of the program.`,
};
export default example;

View File

@@ -0,0 +1,201 @@
import { z } from "zod";
import { isRegexValid, isValidIgnorePattern } from "../../../utils/auto-complete";
import {
azureOAIApiSettingsSchema, fewShotExampleSchema,
MAX_DELAY,
MAX_MAX_CHAR_LIMIT,
MIN_DELAY,
MIN_MAX_CHAR_LIMIT,
modelOptionsSchema,
ollamaApiSettingsSchema,
openAIApiSettingsSchema,
} from "../shared";
import block_qoute_example from "./few-shot-examples/block-qoute-example";
import codeblock_function_completion from "./few-shot-examples/codeblock-function-completion";
import codeblock_function_parameters from "./few-shot-examples/codeblock-function-parameters";
import header_example from "./few-shot-examples/header-example";
import header_example_relu from "./few-shot-examples/header-example-relu";
import math_block_inline from "./few-shot-examples/math-block-inline";
import math_block_multi_line from "./few-shot-examples/math-block-multiline";
import numbered_list_example from "./few-shot-examples/numbered-list-example";
import sub_task_list_example from "./few-shot-examples/subtask-list-example";
import task_list_example from "./few-shot-examples/task-list-example";
import text_completion_end from "./few-shot-examples/text-completion-end";
import text_completion_middle from "./few-shot-examples/text-completion-middle";
import unordered_list_pro_and_con_list from "./few-shot-examples/unordered-list-pro-and-con-list";
import unordered_list_solid from "./few-shot-examples/unordered-list-solid";
export const triggerSchema = z.object({
type: z.enum(['string', 'regex']),
value: z.string().min(1, { message: "Trigger value must be at least 1 character long" })
}).strict().superRefine((trigger, ctx) => {
if (trigger.type === "regex") {
if (!trigger.value.endsWith("$")) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: "Regex triggers must end with a $.",
path: ["value"],
});
}
if (!isRegexValid(trigger.value)) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: `Invalid regex: "${trigger.value}"`,
path: ["value"],
});
}
}
});
export const settingsSchema = z.object({
version: z.literal("1"),
enabled: z.boolean(),
advancedMode: z.boolean(),
apiProvider: z.enum(['azure', 'openai', "ollama"]),
azureOAIApiSettings: azureOAIApiSettingsSchema,
openAIApiSettings: openAIApiSettingsSchema,
ollamaApiSettings: ollamaApiSettingsSchema,
triggers: z.array(triggerSchema),
delay: z.number().int().min(MIN_DELAY, { message: "Delay must be between 0ms and 2000ms" }).max(MAX_DELAY, { message: "Delay must be between 0ms and 2000ms" }),
modelOptions: modelOptionsSchema,
systemMessage: z.string().min(3, { message: "System message must be at least 3 characters long" }),
fewShotExamples: z.array(fewShotExampleSchema),
userMessageTemplate: z.string().min(3, { message: "User message template must be at least 3 characters long" }),
chainOfThoughRemovalRegex: z.string().refine((regex) => isRegexValid(regex), { message: "Invalid regex" }),
dontIncludeDataviews: z.boolean(),
maxPrefixCharLimit: z.number().int().min(MIN_MAX_CHAR_LIMIT, { message: `Max prefix char limit must be at least ${MIN_MAX_CHAR_LIMIT}` }).max(MAX_MAX_CHAR_LIMIT, { message: `Max prefix char limit must be at most ${MAX_MAX_CHAR_LIMIT}` }),
maxSuffixCharLimit: z.number().int().min(MIN_MAX_CHAR_LIMIT, { message: `Max prefix char limit must be at least ${MIN_MAX_CHAR_LIMIT}` }).max(MAX_MAX_CHAR_LIMIT, { message: `Max prefix char limit must be at most ${MAX_MAX_CHAR_LIMIT}` }),
removeDuplicateMathBlockIndicator: z.boolean(),
removeDuplicateCodeBlockIndicator: z.boolean(),
ignoredFilePatterns: z.string().refine((value) => value
.split("\n")
.filter(s => s.trim().length > 0)
.filter(s => !isValidIgnorePattern(s)).length === 0,
{ message: "Invalid ignore pattern" }
),
ignoredTags: z.string().refine((value) => value
.split("\n")
.filter(s => s.includes(" ")).length === 0, { message: "Tags cannot contain spaces" }
).refine((value) => value
.split("\n")
.filter(s => s.includes("#")).length === 0, { message: "Enter tags without the # symbol" }
).refine((value) => value
.split("\n")
.filter(s => s.includes(",")).length === 0, { message: "Enter each tag on a new line without commas" }
),
cacheSuggestions: z.boolean(),
debugMode: z.boolean(),
}).strict();
export const pluginDataSchema = z.object({
settings: settingsSchema,
}).strict();
export const DEFAULT_AUTOCOMPLETE_SETTINGS = {
// version: "1",
// General settings
autocompleteEnabled: true,
advancedMode: false,
apiProvider: "openai",
// API settings
azureOAIApiSettings: {
key: "",
url: "https://YOUR_AOI_SERVICE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME/chat/completions",
},
openAIApiSettings: {
key: "",
url: "https://api.openai.com/v1/chat/completions",
model: "gpt-3.5-turbo",
},
ollamaApiSettings: {
url: "http://localhost:11434/api/chat",
model: "",
},
// Trigger settings
triggers: [
{ type: "string", value: "# " },
{ type: "string", value: ". " },
{ type: "string", value: ": " },
{ type: "string", value: ", " },
{ type: "string", value: "! " },
{ type: "string", value: "? " },
{ type: "string", value: "`" },
{ type: "string", value: "' " },
{ type: "string", value: "= " },
{ type: "string", value: "$ " },
{ type: "string", value: "> " },
{ type: "string", value: "\n" },
// bullet list
{ type: "regex", value: "[\\t ]*(\\-|\\*)[\\t ]+$" },
// numbered list
{ type: "regex", value: "[\\t ]*[0-9A-Za-z]+\\.[\\t ]+$" },
// new line with spaces
{ type: "regex", value: "\\$\\$\\n[\\t ]*$" },
// markdown multiline code block
{ type: "regex", value: "```[a-zA-Z0-9]*(\\n\\s*)?$" },
// task list normal, sub or numbered.
{ type: "regex", value: "\\s*(-|[0-9]+\\.) \\[.\\]\\s+$" },
],
delay: 500,
// Request settings
modelOptions: {
temperature: 1,
top_p: 0.1,
frequency_penalty: 0.25,
presence_penalty: 0,
max_tokens: 800,
},
// Prompt settings
systemMessage: `Your job is to predict the most logical text that should be written at the location of the <mask/>.
Your answer can be either code, a single word, or multiple sentences.
If the <mask/> is in the middle of a partial sentence, your answer should only be the 1 or 2 words fixes the sentence and not the entire sentence.
You are not allowed to have any overlapping text directly surrounding the <mask/>.
Your answer must be in the same language as the text directly surrounding the <mask/>.
Your response must have the following format:
THOUGHT: here, you reason about the answer; use the 80/20 principle to be brief.
LANGUAGE: here, you write the language of your answer, e.g. English, Python, Dutch, etc.
ANSWER: here, you write the text that should be at the location of <mask/>
`,
fewShotExamples: [
block_qoute_example,
codeblock_function_completion,
codeblock_function_parameters,
header_example,
numbered_list_example,
sub_task_list_example,
task_list_example,
text_completion_end,
text_completion_middle,
unordered_list_pro_and_con_list,
unordered_list_solid,
math_block_inline,
math_block_multi_line,
header_example_relu,
].sort((a, b) => a.toString().localeCompare(b.toString())),
userMessageTemplate: "{{prefix}}<mask/>{{suffix}}",
chainOfThoughRemovalRegex: `(.|\\n)*ANSWER:`,
// Preprocessing settings
dontIncludeDataviews: true,
maxPrefixCharLimit: 4000,
maxSuffixCharLimit: 4000,
// Postprocessing settings
removeDuplicateMathBlockIndicator: true,
removeDuplicateCodeBlockIndicator: true,
ignoredFilePatterns: "**/secret/**\n",
ignoredTags: "",
cacheSuggestions: true,
debugMode: false,
};
export type Trigger = z.infer<typeof triggerSchema>;
export type Settings = z.input<typeof settingsSchema>;
export type PluginData = z.infer<typeof pluginDataSchema>;