import { INFIO_BASE_URL, OPENROUTER_BASE_URL } from '../constants' import { ApiProvider } from '../types/llm/model' import { InfioSettings } from '../types/settings' export interface ModelInfo { maxTokens?: number contextWindow?: number supportsImages?: boolean supportsComputerUse?: boolean supportsPromptCache: boolean // this value is hardcoded for now inputPrice?: number outputPrice?: number cacheWritesPrice?: number cacheReadsPrice?: number description?: string reasoningEffort?: string, thinking?: boolean maxThinkingTokens?: number supportsReasoningBudget?: boolean requiredReasoningBudget?: boolean tiers?: readonly { readonly contextWindow: number, readonly inputPrice: number, readonly outputPrice: number, readonly cacheReadsPrice: number, }[] } export interface EmbeddingModelInfo { dimensions: number description?: string } // Anthropic // https://docs.anthropic.com/en/docs/about-claude/models export type AnthropicModelId = keyof typeof anthropicModels export const anthropicDefaultModelId: AnthropicModelId = "claude-sonnet-4-20250514" export const anthropicDefaultInsightModelId: AnthropicModelId = "claude-sonnet-4-20250514" export const anthropicDefaultAutoCompleteModelId: AnthropicModelId = "claude-3-5-haiku-20241022" export const anthropicDefaultEmbeddingModelId: AnthropicModelId = null // this is not supported embedding model export const anthropicModels = { "claude-sonnet-4-20250514": { maxTokens: 64_000, // Overridden to 8k if `enableReasoningEffort` is false. contextWindow: 200_000, supportsImages: true, supportsComputerUse: true, supportsPromptCache: true, inputPrice: 3.0, // $3 per million input tokens outputPrice: 15.0, // $15 per million output tokens cacheWritesPrice: 3.75, // $3.75 per million tokens cacheReadsPrice: 0.3, // $0.30 per million tokens supportsReasoningBudget: true, }, "claude-opus-4-20250514": { maxTokens: 32_000, // Overridden to 8k if `enableReasoningEffort` is false. contextWindow: 200_000, supportsImages: true, supportsComputerUse: true, supportsPromptCache: true, inputPrice: 15.0, // $15 per million input tokens outputPrice: 75.0, // $75 per million output tokens cacheWritesPrice: 18.75, // $18.75 per million tokens cacheReadsPrice: 1.5, // $1.50 per million tokens supportsReasoningBudget: true, }, "claude-3-7-sonnet-20250219:thinking": { maxTokens: 128_000, // Unlocked by passing `beta` flag to the model. Otherwise, it's 64k. contextWindow: 200_000, supportsImages: true, supportsComputerUse: true, supportsPromptCache: true, inputPrice: 3.0, // $3 per million input tokens outputPrice: 15.0, // $15 per million output tokens cacheWritesPrice: 3.75, // $3.75 per million tokens cacheReadsPrice: 0.3, // $0.30 per million tokens supportsReasoningBudget: true, requiredReasoningBudget: true, }, "claude-3-7-sonnet-20250219": { maxTokens: 8192, // Since we already have a `:thinking` virtual model we aren't setting `supportsReasoningBudget: true` here. contextWindow: 200_000, supportsImages: true, supportsComputerUse: true, supportsPromptCache: true, inputPrice: 3.0, // $3 per million input tokens outputPrice: 15.0, // $15 per million output tokens cacheWritesPrice: 3.75, // $3.75 per million tokens cacheReadsPrice: 0.3, // $0.30 per million tokens }, "claude-3-5-sonnet-20241022": { maxTokens: 8192, contextWindow: 200_000, supportsImages: true, supportsComputerUse: true, supportsPromptCache: true, inputPrice: 3.0, // $3 per million input tokens outputPrice: 15.0, // $15 per million output tokens cacheWritesPrice: 3.75, // $3.75 per million tokens cacheReadsPrice: 0.3, // $0.30 per million tokens }, "claude-3-5-haiku-20241022": { maxTokens: 8192, contextWindow: 200_000, supportsImages: false, supportsPromptCache: true, inputPrice: 1.0, outputPrice: 5.0, cacheWritesPrice: 1.25, cacheReadsPrice: 0.1, }, "claude-3-opus-20240229": { maxTokens: 4096, contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, inputPrice: 15.0, outputPrice: 75.0, cacheWritesPrice: 18.75, cacheReadsPrice: 1.5, }, "claude-3-haiku-20240307": { maxTokens: 4096, contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, inputPrice: 0.25, outputPrice: 1.25, cacheWritesPrice: 0.3, cacheReadsPrice: 0.03, }, } as const satisfies Record // as const assertion makes the object // Infio export const infioDefaultModelId = "gemini/gemini-2.5-pro-preview-06-05" // for chat export const infioDefaultInsightModelId = "deepseek/deepseek-v3" // for insight export const infioDefaultAutoCompleteModelId = "groq/llama-3.3-70b-versatile" // for auto complete export const infioDefaultEmbeddingModelId = "openai/text-embedding-3-small" // for embedding export const infioDefaultModelInfo: ModelInfo = { maxTokens: 8192, contextWindow: 65_536, supportsImages: false, supportsComputerUse: true, supportsPromptCache: true, inputPrice: 0.272, outputPrice: 1.088, cacheWritesPrice: 0.14, cacheReadsPrice: 0.014, } let infioModelsCache: Record | null = null; async function fetchInfioModels(apiKey?: string): Promise> { if (infioModelsCache) { return infioModelsCache; } try { const headers: Record = { 'Content-Type': 'application/json' }; // 添加Authorization请求头,使用Bearer格式,如果有API密钥的话 if (apiKey) { headers['Authorization'] = `Bearer ${apiKey}`; } const response = await fetch(INFIO_BASE_URL + "/model_group/info", { method: 'GET', headers: headers }); const data = await response.json(); const models: Record = {}; if (data?.data) { for (const model of data.data) { models[model.model_group] = { maxTokens: model.max_output_tokens, contextWindow: model.max_input_tokens, supportsImages: false, supportsPromptCache: false, inputPrice: model.input_cost_per_token ? model.input_cost_per_token * 1000000 : 0, outputPrice: model.output_cost_per_token ? model.output_cost_per_token * 1000000 : 0, }; } } infioModelsCache = models; return models; } catch (error) { console.error('Failed to fetch Infio models:', error); // 如果出错,返回默认模型 return { [infioDefaultModelId]: infioDefaultModelInfo }; } } export const infioEmbeddingModels = { "openai/text-embedding-3-small": { dimensions: 1536, description: "Increased performance over 2nd generation ada embedding model" }, "gemini/gemini-embedding-exp-03-07": { dimensions: 1024, description: "Most capable 2nd generation embedding model, replacing 16 first generation models" }, "deepseek/embedding-large-text": { dimensions: 1024, description: "Most capable embedding model for both English and non-English tasks" }, "deepseek/embedding-text": { dimensions: 512, description: "Most capable embedding model for both English and non-English tasks" } } as const satisfies Record // OpenRouter // https://openrouter.ai/models?order=newest&supported_parameters=tools export const openRouterDefaultModelId = "google/gemini-2.5-pro-preview" // for chat export const openRouterDefaultInsightModelId = "deepseek/deepseek-chat-v3-0324" // for insight export const openRouterDefaultAutoCompleteModelId = "google/gemini-2.5-flash-preview-05-20" // for auto complete export const openRouterDefaultEmbeddingModelId = null // this is not supported embedding model export const openRouterDefaultModelInfo: ModelInfo = { maxTokens: 8192, contextWindow: 200_000, supportsImages: true, supportsComputerUse: true, supportsPromptCache: true, inputPrice: 3.0, outputPrice: 15.0, cacheWritesPrice: 3.75, cacheReadsPrice: 0.3, description: "The new Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: New Sonnet scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal", } let openRouterModelsCache: Record | null = null; async function fetchOpenRouterModels(): Promise> { if (openRouterModelsCache) { return openRouterModelsCache; } try { const response = await fetch(OPENROUTER_BASE_URL + "/models"); const data = await response.json(); const models: Record = {}; if (data?.data) { for (const model of data.data) { models[model.id] = { maxTokens: model.top_provider?.max_completion_tokens ?? model.context_length, contextWindow: model.context_length, supportsImages: model.architecture?.modality?.includes("image") ?? false, supportsPromptCache: false, inputPrice: model.pricing?.prompt ?? 0, outputPrice: model.pricing?.completion ?? 0, description: model.description, }; } } openRouterModelsCache = models; return models; } catch (error) { console.error('Failed to fetch OpenRouter models:', error); return { [openRouterDefaultModelId]: openRouterDefaultModelInfo }; } } // Gemini // https://ai.google.dev/gemini-api/docs/models/gemini export type GeminiModelId = keyof typeof geminiModels export const geminiDefaultModelId: GeminiModelId = "gemini-2.5-pro-preview-05-06" export const geminiDefaultInsightModelId: GeminiModelId = "gemini-2.5-flash-preview-05-20" export const geminiDefaultAutoCompleteModelId: GeminiModelId = "gemini-2.5-flash-preview-05-20" export const geminiDefaultEmbeddingModelId: keyof typeof geminiEmbeddingModels = "text-embedding-004" export const geminiModels = { "gemini-2.5-flash-preview-05-20:thinking": { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, inputPrice: 0.15, outputPrice: 3.5, cacheReadsPrice: 0.0375, cacheWritesPrice: 1.0, maxThinkingTokens: 24_576, supportsReasoningBudget: true, requiredReasoningBudget: true, }, "gemini-2.5-flash-preview-05-20": { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, inputPrice: 0.15, outputPrice: 0.6, cacheReadsPrice: 0.0375, cacheWritesPrice: 1.0, }, "gemini-2.5-flash-preview-04-17:thinking": { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: false, inputPrice: 0.15, outputPrice: 3.5, maxThinkingTokens: 24_576, supportsReasoningBudget: true, requiredReasoningBudget: true, }, "gemini-2.5-flash-preview-04-17": { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: false, inputPrice: 0.15, outputPrice: 0.6, }, "gemini-2.5-pro-exp-03-25": { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-2.5-pro-preview-03-25": { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. outputPrice: 15, cacheReadsPrice: 0.625, cacheWritesPrice: 4.5, tiers: [ { contextWindow: 200_000, inputPrice: 1.25, outputPrice: 10, cacheReadsPrice: 0.31, }, { contextWindow: Infinity, inputPrice: 2.5, outputPrice: 15, cacheReadsPrice: 0.625, }, ], }, "gemini-2.5-pro-preview-05-06": { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. outputPrice: 15, cacheReadsPrice: 0.625, cacheWritesPrice: 4.5, tiers: [ { contextWindow: 200_000, inputPrice: 1.25, outputPrice: 10, cacheReadsPrice: 0.31, }, { contextWindow: Infinity, inputPrice: 2.5, outputPrice: 15, cacheReadsPrice: 0.625, }, ], }, "gemini-2.0-flash-001": { maxTokens: 8192, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, inputPrice: 0.1, outputPrice: 0.4, cacheReadsPrice: 0.025, cacheWritesPrice: 1.0, }, "gemini-2.0-flash-lite-preview-02-05": { maxTokens: 8192, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-2.0-pro-exp-02-05": { maxTokens: 8192, contextWindow: 2_097_152, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-2.0-flash-thinking-exp-01-21": { maxTokens: 65_536, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-2.0-flash-thinking-exp-1219": { maxTokens: 8192, contextWindow: 32_767, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-2.0-flash-exp": { maxTokens: 8192, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-1.5-flash-002": { maxTokens: 8192, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: true, inputPrice: 0.15, // This is the pricing for prompts above 128k tokens. outputPrice: 0.6, cacheReadsPrice: 0.0375, cacheWritesPrice: 1.0, tiers: [ { contextWindow: 128_000, inputPrice: 0.075, outputPrice: 0.3, cacheReadsPrice: 0.01875, }, { contextWindow: Infinity, inputPrice: 0.15, outputPrice: 0.6, cacheReadsPrice: 0.0375, }, ], }, "gemini-1.5-flash-exp-0827": { maxTokens: 8192, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-1.5-flash-8b-exp-0827": { maxTokens: 8192, contextWindow: 1_048_576, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-1.5-pro-002": { maxTokens: 8192, contextWindow: 2_097_152, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-1.5-pro-exp-0827": { maxTokens: 8192, contextWindow: 2_097_152, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemini-exp-1206": { maxTokens: 8192, contextWindow: 2_097_152, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, } as const satisfies Record export const geminiEmbeddingModels = { "text-embedding-004": { dimensions: 768, description: "The text-embedding-004 model can generate advanced embeddings for words, phrases, and sentences. The resulting embeddings can then be used for tasks such as semantic search, text classification, clustering, and more." } } as const satisfies Record // OpenAI Native // https://openai.com/api/pricing/ export type OpenAiNativeModelId = keyof typeof openAiNativeModels export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4o" export const openAiNativeDefaultInsightModelId: OpenAiNativeModelId = "gpt-4o-mini" export const openAiNativeDefaultAutoCompleteModelId: OpenAiNativeModelId = "gpt-4o-mini" export const openAiNativeDefaultEmbeddingModelId: keyof typeof openAINativeEmbeddingModels = "text-embedding-3-small" export const openAiNativeModels = { // don't support tool use yet "o3-mini": { maxTokens: 100_000, contextWindow: 200_000, supportsImages: false, supportsPromptCache: true, inputPrice: 1.1, outputPrice: 4.4, reasoningEffort: "medium", }, "o3-mini-high": { maxTokens: 100_000, contextWindow: 200_000, supportsImages: false, supportsPromptCache: true, inputPrice: 1.1, outputPrice: 4.4, reasoningEffort: "high", }, "o3-mini-low": { maxTokens: 100_000, contextWindow: 200_000, supportsImages: false, supportsPromptCache: true, inputPrice: 1.1, outputPrice: 4.4, reasoningEffort: "low", }, "o1-pro": { maxTokens: 100_000, contextWindow: 200_000, supportsImages: true, supportsPromptCache: false, inputPrice: 150, outputPrice: 600, }, o1: { maxTokens: 100_000, contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, inputPrice: 15, outputPrice: 60, }, "o1-preview": { maxTokens: 32_768, contextWindow: 128_000, supportsImages: true, supportsPromptCache: true, inputPrice: 15, outputPrice: 60, }, "o1-mini": { maxTokens: 65_536, contextWindow: 128_000, supportsImages: true, supportsPromptCache: true, inputPrice: 1.1, outputPrice: 4.4, }, "gpt-4.5-preview": { maxTokens: 16_384, contextWindow: 128_000, supportsImages: true, supportsPromptCache: true, inputPrice: 75, outputPrice: 150, }, "gpt-4o": { maxTokens: 16_384, contextWindow: 128_000, supportsImages: true, supportsPromptCache: true, inputPrice: 2.5, outputPrice: 10, }, "gpt-4o-mini": { maxTokens: 16_384, contextWindow: 128_000, supportsImages: true, supportsPromptCache: true, inputPrice: 0.15, outputPrice: 0.6, }, } as const satisfies Record export const openAINativeEmbeddingModels = { "text-embedding-3-small": { dimensions: 1536, description: "Increased performance over 2nd generation ada embedding model" }, "text-embedding-3-large": { dimensions: 3072, description: "Most capable embedding model for both English and non-English tasks" }, "text-embedding-ada-002": { dimensions: 1536, description: "Most capable 2nd generation embedding model, replacing 16 first generation models" } } as const satisfies Record // DeepSeek // https://api-docs.deepseek.com/quick_start/pricing export type DeepSeekModelId = keyof typeof deepSeekModels export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat" export const deepSeekDefaultInsightModelId: DeepSeekModelId = "deepseek-chat" export const deepSeekDefaultAutoCompleteModelId: DeepSeekModelId = "deepseek-chat" export const deepSeekDefaultEmbeddingModelId = null // this is not supported embedding model export const deepSeekModels = { "deepseek-chat": { maxTokens: 8_000, contextWindow: 64_000, supportsImages: false, supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it inputPrice: 0.272, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this) outputPrice: 1.088, cacheWritesPrice: 0.14, cacheReadsPrice: 0.014, }, "deepseek-reasoner": { maxTokens: 8_000, contextWindow: 64_000, supportsImages: false, supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this) outputPrice: 2.19, cacheWritesPrice: 0.55, cacheReadsPrice: 0.14, }, } as const satisfies Record // Qwen // https://help.aliyun.com/zh/model-studio/getting-started/ export type QwenModelId = keyof typeof qwenModels export const qwenDefaultModelId: QwenModelId = "qwen3-235b-a22b" export const qwenDefaultInsightModelId: QwenModelId = "qwen3-32b" export const qwenDefaultAutoCompleteModelId: QwenModelId = "qwen3-32b" export const qwenDefaultEmbeddingModelId: keyof typeof qwenEmbeddingModels = "text-embedding-v3" export const qwenModels = { "qwen3-235b-a22b": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.002, outputPrice: 0.006, cacheWritesPrice: 0.002, cacheReadsPrice: 0.006, }, "qwen3-32b": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.002, outputPrice: 0.006, }, "qwen3-30b-a3b": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.002, outputPrice: 0.006, }, "qwen3-14b": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.002, outputPrice: 0.006, }, "qwen3-8b": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.002, outputPrice: 0.006, }, "qwen2.5-coder-32b-instruct": { maxTokens: 8_192, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.002, outputPrice: 0.006, cacheWritesPrice: 0.002, cacheReadsPrice: 0.006, }, "qwen2.5-coder-14b-instruct": { maxTokens: 8_192, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.002, outputPrice: 0.006, cacheWritesPrice: 0.002, cacheReadsPrice: 0.006, }, "qwen2.5-coder-7b-instruct": { maxTokens: 8_192, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.001, outputPrice: 0.002, cacheWritesPrice: 0.001, cacheReadsPrice: 0.002, }, "qwen2.5-coder-3b-instruct": { maxTokens: 8_192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.0, outputPrice: 0.0, cacheWritesPrice: 0.0, cacheReadsPrice: 0.0, }, "qwen2.5-coder-1.5b-instruct": { maxTokens: 8_192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.0, outputPrice: 0.0, cacheWritesPrice: 0.0, cacheReadsPrice: 0.0, }, "qwen2.5-coder-0.5b-instruct": { maxTokens: 8_192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.0, outputPrice: 0.0, cacheWritesPrice: 0.0, cacheReadsPrice: 0.0, }, "qwen-coder-plus-latest": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 3.5, outputPrice: 7, cacheWritesPrice: 3.5, cacheReadsPrice: 7, }, "qwen-plus-latest": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.8, outputPrice: 2, cacheWritesPrice: 0.8, cacheReadsPrice: 0.2, }, "qwen-turbo-latest": { maxTokens: 1_000_000, contextWindow: 1_000_000, supportsImages: false, supportsPromptCache: false, inputPrice: 0.8, outputPrice: 2, cacheWritesPrice: 0.8, cacheReadsPrice: 2, }, "qwen-max-latest": { maxTokens: 30_720, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 2.4, outputPrice: 9.6, cacheWritesPrice: 2.4, cacheReadsPrice: 9.6, }, "qwq-plus-latest": { maxTokens: 8_192, contextWindow: 131_071, supportsImages: false, supportsPromptCache: false, inputPrice: 0.0, outputPrice: 0.0, cacheWritesPrice: 0.0, cacheReadsPrice: 0.0, }, "qwq-plus": { maxTokens: 8_192, contextWindow: 131_071, supportsImages: false, supportsPromptCache: false, inputPrice: 0.0, outputPrice: 0.0, cacheWritesPrice: 0.0, cacheReadsPrice: 0.0, }, "qwen-coder-plus": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 3.5, outputPrice: 7, cacheWritesPrice: 3.5, cacheReadsPrice: 7, }, "qwen-plus": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, inputPrice: 0.8, outputPrice: 2, cacheWritesPrice: 0.8, cacheReadsPrice: 0.2, }, "qwen-turbo": { maxTokens: 1_000_000, contextWindow: 1_000_000, supportsImages: false, supportsPromptCache: false, inputPrice: 0.3, outputPrice: 0.6, cacheWritesPrice: 0.3, cacheReadsPrice: 0.6, }, "qwen-max": { maxTokens: 30_720, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 2.4, outputPrice: 9.6, cacheWritesPrice: 2.4, cacheReadsPrice: 9.6, }, "deepseek-v3": { maxTokens: 8_000, contextWindow: 64_000, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.28, cacheWritesPrice: 0.14, cacheReadsPrice: 0.014, }, "deepseek-r1": { maxTokens: 8_000, contextWindow: 64_000, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 2.19, cacheWritesPrice: 0.55, cacheReadsPrice: 0.14, }, "qwen-vl-max": { maxTokens: 30_720, contextWindow: 32_768, supportsImages: true, supportsPromptCache: false, inputPrice: 3, outputPrice: 9, cacheWritesPrice: 3, cacheReadsPrice: 9, }, "qwen-vl-max-latest": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: true, supportsPromptCache: false, inputPrice: 3, outputPrice: 9, cacheWritesPrice: 3, cacheReadsPrice: 9, }, "qwen-vl-plus": { maxTokens: 6_000, contextWindow: 8_000, supportsImages: true, supportsPromptCache: false, inputPrice: 1.5, outputPrice: 4.5, cacheWritesPrice: 1.5, cacheReadsPrice: 4.5, }, "qwen-vl-plus-latest": { maxTokens: 129_024, contextWindow: 131_072, supportsImages: true, supportsPromptCache: false, inputPrice: 1.5, outputPrice: 4.5, cacheWritesPrice: 1.5, cacheReadsPrice: 4.5, }, } as const satisfies Record export const qwenEmbeddingModels = { "text-embedding-v4": { dimensions: 1024, description: "支持50+主流语种,包括中文、英语、西班牙语、法语、葡萄牙语、印尼语、日语、韩语、德语、俄罗斯语等。最大行数20,单行最大处理8,192 Token。支持可选维度:1,024(默认)、768或512。单价:0.0007元/千Token。免费额度:50万Token(有效期180天)。" }, "text-embedding-v3": { dimensions: 1024, description: "支持50+主流语种,包括中文、英语、西班牙语、法语、葡萄牙语、印尼语、日语、韩语、德语、俄罗斯语等。最大行数20,单行最大处理8,192 Token。支持可选维度:1,024(默认)、768或512。单价:0.0007元/千Token。免费额度:50万Token(有效期180天)。" }, "text-embedding-v2": { dimensions: 1536, description: "支持多种语言,包括中文、英语、西班牙语、法语、葡萄牙语、印尼语、日语、韩语、德语、俄罗斯语。最大行数25,单行最大处理2,048 Token。" }, "text-embedding-v1": { dimensions: 1536, description: "支持中文、英语、西班牙语、法语、葡萄牙语、印尼语。" }, "text-embedding-async-v2": { dimensions: 1536, description: "异步处理大规模文本。支持中文、英语、西班牙语、法语、葡萄牙语、印尼语、日语、韩语、德语、俄罗斯语。" }, "text-embedding-async-v1": { dimensions: 1536, description: "异步处理大规模文本。支持中文、英语、西班牙语、法语、葡萄牙语、印尼语。" } } as const satisfies Record // bytedance volcengine //https://api.volcengine.com/api-docs/view/overview // SiliconFlow // https://docs.siliconflow.cn/ export type SiliconFlowModelId = keyof typeof siliconFlowModels export const siliconFlowDefaultModelId: SiliconFlowModelId = "deepseek-ai/DeepSeek-V3" export const siliconFlowDefaultInsightModelId: SiliconFlowModelId = "deepseek-ai/DeepSeek-V3" export const siliconFlowDefaultAutoCompleteModelId: SiliconFlowModelId = "deepseek-ai/DeepSeek-V3" export const siliconFlowDefaultEmbeddingModelId: keyof typeof siliconFlowEmbeddingModels = "BAAI/bge-m3" export const siliconFlowModels = { "01-ai/Yi-1.5-9B-Chat-16K": { maxTokens: 8192, contextWindow: 16_384, supportsImages: false, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "01-ai/Yi-1.5-34B-Chat-16K": { maxTokens: 8192, contextWindow: 16_384, supportsImages: false, supportsPromptCache: false, inputPrice: 1.0, outputPrice: 2.0, }, "google/gemma-2-9b-it": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "google/gemma-2-27b-it": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 1.0, outputPrice: 2.0, }, "Pro/google/gemma-2-9b-it": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "meta-llama/Meta-Llama-3.1-8B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "Pro/meta-llama/Meta-Llama-3.1-8B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "meta-llama/Meta-Llama-3.1-70B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 2.0, outputPrice: 4.0, }, "meta-llama/Meta-Llama-3.1-405B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 5.0, outputPrice: 10.0, }, "internlm/internlm2_5-20b-chat": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.8, outputPrice: 1.6, }, "Qwen/Qwen2.5-72B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 2.0, outputPrice: 4.0, }, "Qwen/Qwen2.5-7B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.4, outputPrice: 0.8, }, "Qwen/Qwen2.5-14B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.6, outputPrice: 1.2, }, "Qwen/Qwen2.5-32B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 1.0, outputPrice: 2.0, }, "Qwen/Qwen2.5-Coder-7B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.4, outputPrice: 0.8, }, "Qwen/Qwen2.5-VL-32B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: true, supportsPromptCache: false, inputPrice: 0.4, outputPrice: 0.8, }, "Qwen/Qwen2.5-VL-72B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: true, supportsPromptCache: false, inputPrice: 0.4, outputPrice: 0.8, }, "TeleAI/TeleChat2": { maxTokens: 4096, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.3, outputPrice: 0.6, }, "Pro/Qwen/Qwen2.5-7B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.4, outputPrice: 0.8, }, "Qwen/Qwen2.5-72B-Instruct-128K": { maxTokens: 8192, contextWindow: 128_000, supportsImages: false, supportsPromptCache: false, inputPrice: 2.0, outputPrice: 4.0, }, "Qwen/Qwen2-VL-72B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: true, supportsPromptCache: false, inputPrice: 2.5, outputPrice: 5.0, }, "OpenGVLab/InternVL2-26B": { maxTokens: 8192, contextWindow: 32_768, supportsImages: true, supportsPromptCache: false, inputPrice: 1.0, outputPrice: 2.0, }, "Pro/BAAI/bge-m3": { maxTokens: 4096, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.3, outputPrice: 0.6, }, "Pro/OpenGVLab/InternVL2-8B": { maxTokens: 8192, contextWindow: 32_768, supportsImages: true, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "Vendor-A/Qwen/Qwen2.5-72B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 2.0, outputPrice: 4.0, }, "Pro/Qwen/Qwen2-VL-7B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: true, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "LoRA/Qwen/Qwen2.5-7B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.4, outputPrice: 0.8, }, "Pro/Qwen/Qwen2.5-Coder-7B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.4, outputPrice: 0.8, }, "LoRA/Qwen/Qwen2.5-72B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 2.0, outputPrice: 4.0, }, "Qwen/Qwen2.5-Coder-32B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 1.0, outputPrice: 2.0, }, "Pro/BAAI/bge-reranker-v2-m3": { maxTokens: 4096, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.3, outputPrice: 0.6, }, "Qwen/QwQ-32B": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 1.0, outputPrice: 2.0, }, "Qwen/QwQ-32B-Preview": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 1.0, outputPrice: 2.0, }, "AIDC-AI/Marco-o1": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "LoRA/Qwen/Qwen2.5-14B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.6, outputPrice: 1.2, }, "LoRA/Qwen/Qwen2.5-32B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 1.0, outputPrice: 2.0, }, "meta-llama/Llama-3.3-70B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 2.0, outputPrice: 4.0, }, "LoRA/meta-llama/Meta-Llama-3.1-8B-Instruct": { maxTokens: 8192, contextWindow: 32_768, supportsImages: false, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "deepseek-ai/deepseek-vl2": { maxTokens: 8192, contextWindow: 32_768, supportsImages: true, supportsPromptCache: false, inputPrice: 0.5, outputPrice: 1.0, }, "Qwen/QVQ-72B-Preview": { maxTokens: 8192, contextWindow: 32_768, supportsImages: true, supportsPromptCache: false, inputPrice: 2.5, outputPrice: 5.0, }, "deepseek-ai/DeepSeek-V3": { maxTokens: 8000, contextWindow: 64000, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.28, cacheWritesPrice: 0.14, cacheReadsPrice: 0.014, }, "deepseek-ai/DeepSeek-R1": { maxTokens: 8000, contextWindow: 64000, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 2.19, cacheWritesPrice: 0.55, cacheReadsPrice: 0.14, }, "Pro/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": { maxTokens: 4096, contextWindow: 16_384, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.2, cacheWritesPrice: 0.1, cacheReadsPrice: 0.01, }, "Pro/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": { maxTokens: 8000, contextWindow: 32_768, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.4, cacheWritesPrice: 0.2, cacheReadsPrice: 0.02, }, "Pro/deepseek-ai/DeepSeek-R1-Distill-Llama-8B": { maxTokens: 8000, contextWindow: 32_768, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.4, cacheWritesPrice: 0.2, cacheReadsPrice: 0.02, }, "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": { maxTokens: 8000, contextWindow: 32_768, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.6, cacheWritesPrice: 0.3, cacheReadsPrice: 0.03, }, "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": { maxTokens: 8000, contextWindow: 32_768, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 1.0, cacheWritesPrice: 0.5, cacheReadsPrice: 0.05, }, "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { maxTokens: 8000, contextWindow: 32_768, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 2.0, cacheWritesPrice: 1.0, cacheReadsPrice: 0.1, }, "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": { maxTokens: 4096, contextWindow: 16_384, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.2, cacheWritesPrice: 0.1, cacheReadsPrice: 0.01, }, "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": { maxTokens: 8000, contextWindow: 32_768, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.4, cacheWritesPrice: 0.2, cacheReadsPrice: 0.02, }, "deepseek-ai/DeepSeek-R1-Distill-Llama-8B": { maxTokens: 8000, contextWindow: 32_768, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.4, cacheWritesPrice: 0.2, cacheReadsPrice: 0.02, }, "Pro/deepseek-ai/DeepSeek-R1": { maxTokens: 8000, contextWindow: 64000, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 2.19, cacheWritesPrice: 0.55, cacheReadsPrice: 0.14, }, "Pro/deepseek-ai/DeepSeek-V3": { maxTokens: 8000, contextWindow: 64000, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0.28, cacheWritesPrice: 0.14, cacheReadsPrice: 0.014, } } as const satisfies Record export const siliconFlowEmbeddingModels = { "BAAI/bge-m3": { dimensions: 1024, description: "BGE-M3 是一个多功能、多语言、多粒度的文本嵌入模型。它支持三种常见的检索功能:密集检索、多向量检索和稀疏检索。该模型可以处理超过100种语言,并且能够处理从短句到长达8192个词元的长文档等不同粒度的输入。BGE-M3在多语言和跨语言检索任务中表现出色,在 MIRACL 和 MKQA 等基准测试中取得了领先结果。它还具有处理长文档检索的能力,在 MLDR 和 NarritiveQA 等数据集上展现了优秀性能" }, "netease-youdao/bce-embedding-base_v1": { dimensions: 768, description: "bce-embedding-base_v1 是由网易有道开发的双语和跨语言嵌入模型。该模型在中英文语义表示和检索任务中表现出色,尤其擅长跨语言场景。它是为检索增强生成(RAG)系统优化的,可以直接应用于教育、医疗、法律等多个领域。该模型不需要特定指令即可使用,能够高效地生成语义向量,为语义搜索和问答系统提供关键支持" }, "BAAI/bge-large-zh-v1.5": { dimensions: 1024, description: "BAAI/bge-large-zh-v1.5 是一个大型中文文本嵌入模型,是 BGE (BAAI General Embedding) 系列的一部分。该模型在 C-MTEB 基准测试中表现出色,在 31 个数据集上的平均得分为 64.53,在检索、语义相似度、文本对分类等多个任务中都取得了优异成绩。它支持最大 512 个 token 的输入长度,适用于各种中文自然语言处理任务,如文本检索、语义相似度计算等" }, "BAAI/bge-large-en-v1.5": { dimensions: 1024, description: "BAAI/bge-large-en-v1.5 是一个大型英文文本嵌入模型,是 BGE (BAAI General Embedding) 系列的一部分。它在 MTEB 基准测试中取得了优异的表现,在 56 个数据集上的平均得分为 64.23,在检索、聚类、文本对分类等多个任务中表现出色。该模型支持最大 512 个 token 的输入长度,适用于各种自然语言处理任务,如文本检索、语义相似度计算等" }, "Pro/BAAI/bge-m3": { dimensions: 1024, description: "BGE-M3 是一个多功能、多语言、多粒度的文本嵌入模型。它支持三种常见的检索功能:密集检索、多向量检索和稀疏检索。该模型可以处理超过100种语言,并且能够处理从短句到长达8192个词元的长文档等不同粒度的输入。BGE-M3在多语言和跨语言检索任务中表现出色,在 MIRACL 和 MKQA 等基准测试中取得了领先结果。它还具有处理长文档检索的能力,在 MLDR 和 NarritiveQA 等数据集上展现了优秀性能" } } as const satisfies Record // Groq // https://console.groq.com/docs/overview export type GroqModelId = keyof typeof groqModels export const groqDefaultModelId: GroqModelId = "llama-3.3-70b-versatile" export const groqDefaultInsightModelId: GroqModelId = "llama-3.3-70b-versatile" export const groqDefaultAutoCompleteModelId: GroqModelId = "llama-3.3-70b-versatile" export const groqDefaultEmbeddingModelId = null // this is not supported embedding model export const groqModels = { "meta-llama/llama-4-scout-17b-16e-instruct": { maxTokens: 8192, contextWindow: 131072, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "meta-llama/llama-4-maverick-17b-128e-instruct": { maxTokens: 8192, contextWindow: 131072, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "qwen-qwq-32b": { maxTokens: 8192, contextWindow: 131072, supportsImages: false, supportsPromptCache: false, inputPrice: 0, }, "llama-3.2-1b-preview": { maxTokens: 4096, contextWindow: 8192, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "llama-3.1-8b-instant": { maxTokens: 4096, contextWindow: 131072, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "mixtral-8x7b-32768": { maxTokens: 4096, contextWindow: 32768, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "llama-3.3-70b-versatile": { maxTokens: 4096, contextWindow: 32768, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "llama-guard-3-8b": { maxTokens: 4096, contextWindow: 8192, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "llama3-70b-8192": { maxTokens: 4096, contextWindow: 8192, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "llama-3.3-70b-specdec": { maxTokens: 4096, contextWindow: 8192, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "llama3-8b-8192": { maxTokens: 4096, contextWindow: 8192, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "llama-3.2-11b-vision-preview": { maxTokens: 4096, contextWindow: 8192, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "deepseek-r1-distill-llama-70b": { maxTokens: 4096, contextWindow: 131072, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "llama-3.2-3b-preview": { maxTokens: 4096, contextWindow: 8192, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "qwen-2.5-coder-32b": { maxTokens: 4096, contextWindow: 131072, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "gemma2-9b-it": { maxTokens: 4096, contextWindow: 8192, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "deepseek-r1-distill-qwen-32b": { maxTokens: 4096, contextWindow: 131072, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "qwen-2.5-32b": { maxTokens: 4096, contextWindow: 131072, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, "llama-3.2-90b-vision-preview": { maxTokens: 4096, contextWindow: 8192, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, }, } as const satisfies Record // Grok // https://docs.x.ai/docs/models export type GrokModelId = keyof typeof grokModels export const grokDefaultModelId: GrokModelId = "grok-3" export const grokDefaultInsightModelId: GrokModelId = "grok-3-mini" export const grokDefaultAutoCompleteModelId: GrokModelId = "grok-3-mini-fast" export const grokDefaultEmbeddingModelId = null // this is not supported embedding model export const grokModels = { "grok-3": { maxTokens: 8192, contextWindow: 131072, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0, }, "grok-3-fast": { maxTokens: 8192, contextWindow: 131072, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0, }, "grok-3-mini": { maxTokens: 8192, contextWindow: 131072, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0, }, "grok-3-mini-fast": { maxTokens: 8192, contextWindow: 131072, supportsImages: false, supportsPromptCache: true, inputPrice: 0, outputPrice: 0, }, "grok-2-latest": { maxTokens: 8192, contextWindow: 131072, supportsImages: true, supportsPromptCache: true, inputPrice: 0, outputPrice: 0, }, "grok-2": { maxTokens: 8192, contextWindow: 131072, supportsImages: true, supportsPromptCache: true, inputPrice: 0, outputPrice: 0, } } as const satisfies Record // LocalProvider (本地嵌入模型) export const localProviderDefaultModelId = null // this is not supported for chat/autocomplete export const localProviderDefaultInsightModelId = null // this is not supported for insight export const localProviderDefaultAutoCompleteModelId = null // this is not supported for chat/autocomplete export const localProviderDefaultEmbeddingModelId: keyof typeof localProviderEmbeddingModels = "TaylorAI/bge-micro-v2" export const localProviderEmbeddingModels = { 'TaylorAI/bge-micro-v2': { dimensions: 384, description: 'BGE-micro-v2 (本地,512令牌,384维)' }, 'Xenova/all-MiniLM-L6-v2': { dimensions: 384, description: 'All-MiniLM-L6-v2 (推荐,轻量级)' }, 'Xenova/bge-small-en-v1.5': { dimensions: 384, description: 'BGE-small-en-v1.5' }, 'Xenova/bge-base-en-v1.5': { dimensions: 768, description: 'BGE-base-en-v1.5 (更高质量)' }, 'Xenova/jina-embeddings-v2-base-zh': { dimensions: 768, description: 'Jina-v2-base-zh (中英双语)' }, 'Xenova/jina-embeddings-v2-small-en': { dimensions: 512, description: 'Jina-v2-small-en' }, 'Xenova/multilingual-e5-small': { dimensions: 384, description: 'E5-small (多语言)' }, 'Xenova/multilingual-e5-base': { dimensions: 768, description: 'E5-base (多语言,更高质量)' }, 'Xenova/gte-small': { dimensions: 384, description: 'GTE-small' }, 'Xenova/e5-small-v2': { dimensions: 384, description: 'E5-small-v2' }, 'Xenova/e5-base-v2': { dimensions: 768, description: 'E5-base-v2 (更高质量)' }, 'Snowflake/snowflake-arctic-embed-xs': { dimensions: 384, description: 'Snowflake Arctic Embed XS (本地,512令牌,384维)' }, 'Snowflake/snowflake-arctic-embed-s': { dimensions: 384, description: 'Snowflake Arctic Embed Small (本地,512令牌,384维)' }, 'Snowflake/snowflake-arctic-embed-m': { dimensions: 768, description: 'Snowflake Arctic Embed Medium (本地,512令牌,768维)' }, 'TaylorAI/gte-tiny': { dimensions: 384, description: 'GTE-tiny (本地,512令牌,384维)' }, 'Mihaiii/Ivysaur': { dimensions: 384, description: 'Ivysaur (本地,512令牌,384维)' }, 'andersonbcdefg/bge-small-4096': { dimensions: 384, description: 'BGE-small-4K (本地,4096令牌,384维)' }, 'nomic-ai/nomic-embed-text-v1.5': { dimensions: 768, description: 'Nomic-embed-text-v1.5 (本地,2048令牌,768维)' }, 'nomic-ai/nomic-embed-text-v1': { dimensions: 768, description: 'Nomic-embed-text (本地,2048令牌,768维)' } } as const satisfies Record /// helper functions // get all providers, used for the provider dropdown export const GetAllProviders = (): ApiProvider[] => { return [ ApiProvider.Infio, ApiProvider.OpenRouter, ApiProvider.Anthropic, ApiProvider.OpenAI, ApiProvider.Google, ApiProvider.Grok, ApiProvider.AlibabaQwen, ApiProvider.SiliconFlow, ApiProvider.Deepseek, ApiProvider.Groq, ApiProvider.Ollama, ApiProvider.OpenAICompatible, ApiProvider.LocalProvider, ] } export const GetEmbeddingProviders = (): ApiProvider[] => { return [ ApiProvider.Infio, ApiProvider.OpenAI, ApiProvider.Google, ApiProvider.AlibabaQwen, ApiProvider.SiliconFlow, ApiProvider.OpenAICompatible, ApiProvider.Ollama, ApiProvider.LocalProvider, ] } // Get all models for a provider export const GetProviderModels = async (provider: ApiProvider, settings?: InfioSettings): Promise> => { switch (provider) { case ApiProvider.Infio: { const apiKey = settings?.infioProvider?.apiKey return await fetchInfioModels(apiKey) } case ApiProvider.OpenRouter: return await fetchOpenRouterModels() case ApiProvider.OpenAI: return openAiNativeModels case ApiProvider.AlibabaQwen: return qwenModels case ApiProvider.SiliconFlow: return siliconFlowModels case ApiProvider.Anthropic: return anthropicModels case ApiProvider.Deepseek: return deepSeekModels case ApiProvider.Google: return geminiModels case ApiProvider.Groq: return groqModels case ApiProvider.Grok: return grokModels case ApiProvider.Ollama: return {} case ApiProvider.OpenAICompatible: return {} case ApiProvider.LocalProvider: return {} default: return {} } } // Get all models for a provider with settings (needed for providers that require API keys) export const GetProviderModelsWithSettings = async (provider: ApiProvider, settings?: InfioSettings): Promise> => { switch (provider) { case ApiProvider.Infio: { const apiKey = settings?.infioProvider?.apiKey return await fetchInfioModels(apiKey) } case ApiProvider.OpenRouter: return await fetchOpenRouterModels() case ApiProvider.OpenAI: return openAiNativeModels case ApiProvider.AlibabaQwen: return qwenModels case ApiProvider.SiliconFlow: return siliconFlowModels case ApiProvider.Anthropic: return anthropicModels case ApiProvider.Deepseek: return deepSeekModels case ApiProvider.Google: return geminiModels case ApiProvider.Groq: return groqModels case ApiProvider.Grok: return grokModels case ApiProvider.Ollama: return {} case ApiProvider.OpenAICompatible: return {} case ApiProvider.LocalProvider: return {} // LocalProvider only supports embedding models default: return {} } } // Get all model ids for a provider export const GetProviderModelIds = async (provider: ApiProvider, settings?: InfioSettings): Promise => { const models = await GetProviderModels(provider, settings) return Object.keys(models) } /// Embedding models // Get all embedding models for a provider export const GetEmbeddingProviderModels = (provider: ApiProvider): Record => { switch (provider) { case ApiProvider.Infio: return infioEmbeddingModels case ApiProvider.Google: return geminiEmbeddingModels case ApiProvider.SiliconFlow: return siliconFlowEmbeddingModels case ApiProvider.OpenAI: return openAINativeEmbeddingModels; case ApiProvider.AlibabaQwen: return qwenEmbeddingModels; case ApiProvider.LocalProvider: return localProviderEmbeddingModels; default: return {} } } // Get all embedding model ids for a provider export const GetEmbeddingProviderModelIds = (provider: ApiProvider): string[] => { return Object.keys(GetEmbeddingProviderModels(provider)) } // Get embedding model info for a provider and model id export const GetEmbeddingModelInfo = (provider: ApiProvider, modelId: string): EmbeddingModelInfo | undefined => { const models = GetEmbeddingProviderModels(provider) return models[modelId] } // Get default model id for a provider export const GetDefaultModelId = (provider: ApiProvider): { chat: string, insight: string, autoComplete: string, embedding: string } => { switch (provider) { case ApiProvider.Infio: return { "chat": infioDefaultModelId, "insight": infioDefaultInsightModelId, "autoComplete": infioDefaultAutoCompleteModelId, "embedding": infioDefaultEmbeddingModelId, } case ApiProvider.OpenRouter: return { "chat": openRouterDefaultModelId, "insight": openRouterDefaultInsightModelId, "autoComplete": openRouterDefaultAutoCompleteModelId, "embedding": openRouterDefaultEmbeddingModelId, } case ApiProvider.Anthropic: return { "chat": anthropicDefaultModelId, "insight": anthropicDefaultInsightModelId, "autoComplete": anthropicDefaultAutoCompleteModelId, "embedding": anthropicDefaultEmbeddingModelId, } case ApiProvider.OpenAI: return { "chat": openAiNativeDefaultModelId, "insight": openAiNativeDefaultInsightModelId, "autoComplete": openAiNativeDefaultAutoCompleteModelId, "embedding": openAiNativeDefaultEmbeddingModelId, } case ApiProvider.Deepseek: return { "chat": deepSeekDefaultModelId, "insight": deepSeekDefaultInsightModelId, "autoComplete": deepSeekDefaultAutoCompleteModelId, "embedding": deepSeekDefaultEmbeddingModelId, } case ApiProvider.Google: return { "chat": geminiDefaultModelId, "insight": geminiDefaultInsightModelId, "autoComplete": geminiDefaultAutoCompleteModelId, "embedding": geminiDefaultEmbeddingModelId, } case ApiProvider.AlibabaQwen: return { "chat": qwenDefaultModelId, "insight": qwenDefaultInsightModelId, "autoComplete": qwenDefaultAutoCompleteModelId, "embedding": qwenDefaultEmbeddingModelId, } case ApiProvider.SiliconFlow: return { "chat": siliconFlowDefaultModelId, "insight": siliconFlowDefaultInsightModelId, "autoComplete": siliconFlowDefaultAutoCompleteModelId, "embedding": siliconFlowDefaultEmbeddingModelId, } case ApiProvider.Groq: return { "chat": groqDefaultModelId, "insight": groqDefaultInsightModelId, "autoComplete": groqDefaultAutoCompleteModelId, "embedding": groqDefaultEmbeddingModelId, } case ApiProvider.Grok: return { "chat": grokDefaultModelId, "insight": grokDefaultInsightModelId, "autoComplete": grokDefaultAutoCompleteModelId, "embedding": grokDefaultEmbeddingModelId, } case ApiProvider.Ollama: return { "chat": null, // user-configured "insight": null, // user-configured "autoComplete": null, // user-configured "embedding": null, // not supported } case ApiProvider.OpenAICompatible: return { "chat": null, // user-configured "insight": null, // user-configured "autoComplete": null, // user-configured "embedding": null, // user-configured } case ApiProvider.LocalProvider: return { "chat": localProviderDefaultModelId, "insight": localProviderDefaultInsightModelId, "autoComplete": localProviderDefaultAutoCompleteModelId, "embedding": localProviderDefaultEmbeddingModelId, } default: return { "chat": null, "insight": null, "autoComplete": null, "embedding": null, } } }