feat: mix search weight (#4170)
* feat: mix search weight * feat: svg render
This commit is contained in:
1
packages/global/core/app/type.d.ts
vendored
1
packages/global/core/app/type.d.ts
vendored
@@ -75,6 +75,7 @@ export type AppDatasetSearchParamsType = {
|
||||
searchMode: `${DatasetSearchModeEnum}`;
|
||||
limit?: number; // limit max tokens
|
||||
similarity?: number;
|
||||
embeddingWeight?: number; // embedding weight, fullText weight = 1 - embeddingWeight
|
||||
|
||||
usingReRank?: boolean;
|
||||
rerankModel?: string;
|
||||
|
||||
@@ -108,6 +108,10 @@ export const appWorkflow2Form = ({
|
||||
defaultAppForm.dataset.searchMode =
|
||||
findInputValueByKey(node.inputs, NodeInputKeyEnum.datasetSearchMode) ||
|
||||
DatasetSearchModeEnum.embedding;
|
||||
defaultAppForm.dataset.embeddingWeight = findInputValueByKey(
|
||||
node.inputs,
|
||||
NodeInputKeyEnum.datasetSearchEmbeddingWeight
|
||||
);
|
||||
// Rerank
|
||||
defaultAppForm.dataset.usingReRank = !!findInputValueByKey(
|
||||
node.inputs,
|
||||
|
||||
@@ -185,7 +185,7 @@ export enum SearchScoreTypeEnum {
|
||||
}
|
||||
export const SearchScoreTypeMap = {
|
||||
[SearchScoreTypeEnum.embedding]: {
|
||||
label: i18nT('common:core.dataset.search.score.embedding'),
|
||||
label: i18nT('common:core.dataset.search.mode.embedding'),
|
||||
desc: i18nT('common:core.dataset.search.score.embedding desc'),
|
||||
showScore: true
|
||||
},
|
||||
|
||||
@@ -154,9 +154,12 @@ export enum NodeInputKeyEnum {
|
||||
datasetSimilarity = 'similarity',
|
||||
datasetMaxTokens = 'limit',
|
||||
datasetSearchMode = 'searchMode',
|
||||
datasetSearchEmbeddingWeight = 'embeddingWeight',
|
||||
|
||||
datasetSearchUsingReRank = 'usingReRank',
|
||||
datasetSearchRerankWeight = 'rerankWeight',
|
||||
datasetSearchRerankModel = 'rerankModel',
|
||||
|
||||
datasetSearchUsingExtensionQuery = 'datasetSearchUsingExtensionQuery',
|
||||
datasetSearchExtensionModel = 'datasetSearchExtensionModel',
|
||||
datasetSearchExtensionBg = 'datasetSearchExtensionBg',
|
||||
|
||||
@@ -133,6 +133,9 @@ export type DispatchNodeResponseType = {
|
||||
similarity?: number;
|
||||
limit?: number;
|
||||
searchMode?: `${DatasetSearchModeEnum}`;
|
||||
embeddingWeight?: number;
|
||||
rerankModel?: string;
|
||||
rerankWeight?: number;
|
||||
searchUsingReRank?: boolean;
|
||||
queryExtensionResult?: {
|
||||
model: string;
|
||||
|
||||
@@ -64,6 +64,13 @@ export const DatasetSearchModule: FlowNodeTemplateType = {
|
||||
valueType: WorkflowIOValueTypeEnum.string,
|
||||
value: DatasetSearchModeEnum.embedding
|
||||
},
|
||||
{
|
||||
key: NodeInputKeyEnum.datasetSearchEmbeddingWeight,
|
||||
renderTypeList: [FlowNodeInputTypeEnum.hidden],
|
||||
label: '',
|
||||
valueType: WorkflowIOValueTypeEnum.number,
|
||||
value: 0.5
|
||||
},
|
||||
// Rerank
|
||||
{
|
||||
key: NodeInputKeyEnum.datasetSearchUsingReRank,
|
||||
|
||||
@@ -40,6 +40,7 @@ export type SearchDatasetDataProps = {
|
||||
[NodeInputKeyEnum.datasetSimilarity]?: number; // min distance
|
||||
[NodeInputKeyEnum.datasetMaxTokens]: number; // max Token limit
|
||||
[NodeInputKeyEnum.datasetSearchMode]?: `${DatasetSearchModeEnum}`;
|
||||
[NodeInputKeyEnum.datasetSearchEmbeddingWeight]?: number;
|
||||
|
||||
[NodeInputKeyEnum.datasetSearchUsingReRank]?: boolean;
|
||||
[NodeInputKeyEnum.datasetSearchRerankModel]?: RerankModelItemType;
|
||||
@@ -161,6 +162,7 @@ export async function searchDatasetData(
|
||||
similarity = 0,
|
||||
limit: maxTokens,
|
||||
searchMode = DatasetSearchModeEnum.embedding,
|
||||
embeddingWeight = 0.5,
|
||||
usingReRank = false,
|
||||
rerankModel,
|
||||
rerankWeight = 0.5,
|
||||
@@ -731,16 +733,20 @@ export async function searchDatasetData(
|
||||
})();
|
||||
|
||||
// embedding recall and fullText recall rrf concat
|
||||
const baseK = 120;
|
||||
const embK = Math.round(baseK * (1 - embeddingWeight)); // 搜索结果的 k 值
|
||||
const fullTextK = Math.round(baseK * embeddingWeight); // rerank 结果的 k 值
|
||||
|
||||
const rrfSearchResult = datasetSearchResultConcat([
|
||||
{ k: 60, list: embeddingRecallResults },
|
||||
{ k: 60, list: fullTextRecallResults }
|
||||
{ k: embK, list: embeddingRecallResults },
|
||||
{ k: fullTextK, list: fullTextRecallResults }
|
||||
]);
|
||||
const rrfConcatResults = (() => {
|
||||
if (reRankResults.length === 0) return rrfSearchResult;
|
||||
if (rerankWeight === 1) return reRankResults;
|
||||
|
||||
const baseK = 30;
|
||||
const searchK = Math.round(baseK / (1 - rerankWeight)); // 搜索结果的 k 值
|
||||
const rerankK = Math.round(baseK / rerankWeight); // rerank 结果的 k 值
|
||||
const searchK = Math.round(baseK * rerankWeight); // 搜索结果的 k 值
|
||||
const rerankK = Math.round(baseK * (1 - rerankWeight)); // rerank 结果的 k 值
|
||||
|
||||
return datasetSearchResultConcat([
|
||||
{ k: searchK, list: rrfSearchResult },
|
||||
|
||||
@@ -22,8 +22,9 @@ type DatasetSearchProps = ModuleDispatchProps<{
|
||||
[NodeInputKeyEnum.datasetSelectList]: SelectedDatasetType;
|
||||
[NodeInputKeyEnum.datasetSimilarity]: number;
|
||||
[NodeInputKeyEnum.datasetMaxTokens]: number;
|
||||
[NodeInputKeyEnum.datasetSearchMode]: `${DatasetSearchModeEnum}`;
|
||||
[NodeInputKeyEnum.userChatInput]?: string;
|
||||
[NodeInputKeyEnum.datasetSearchMode]: `${DatasetSearchModeEnum}`;
|
||||
[NodeInputKeyEnum.datasetSearchEmbeddingWeight]?: number;
|
||||
|
||||
[NodeInputKeyEnum.datasetSearchUsingReRank]: boolean;
|
||||
[NodeInputKeyEnum.datasetSearchRerankModel]?: string;
|
||||
@@ -57,11 +58,11 @@ export async function dispatchDatasetSearch(
|
||||
datasets = [],
|
||||
similarity,
|
||||
limit = 1500,
|
||||
searchMode,
|
||||
userChatInput = '',
|
||||
authTmbId = false,
|
||||
collectionFilterMatch,
|
||||
|
||||
searchMode,
|
||||
embeddingWeight,
|
||||
usingReRank,
|
||||
rerankModel,
|
||||
rerankWeight,
|
||||
@@ -129,6 +130,7 @@ export async function dispatchDatasetSearch(
|
||||
limit,
|
||||
datasetIds,
|
||||
searchMode,
|
||||
embeddingWeight,
|
||||
usingReRank: usingReRank && (await checkTeamReRankPermission(teamId)),
|
||||
rerankModel: getRerankModel(rerankModel),
|
||||
rerankWeight,
|
||||
@@ -228,6 +230,9 @@ export async function dispatchDatasetSearch(
|
||||
similarity: usingSimilarityFilter ? similarity : undefined,
|
||||
limit,
|
||||
searchMode,
|
||||
embeddingWeight: searchMode === DatasetSearchModeEnum.mixedRecall ? embeddingWeight : undefined,
|
||||
rerankModel: usingReRank ? getRerankModel(rerankModel)?.name : undefined,
|
||||
rerankWeight: usingReRank ? rerankWeight : undefined,
|
||||
searchUsingReRank: searchUsingReRank,
|
||||
quoteList: searchRes,
|
||||
queryExtensionResult,
|
||||
|
||||
@@ -429,6 +429,7 @@ export const iconPaths = {
|
||||
'price/bg': () => import('./icons/price/bg.svg'),
|
||||
'price/right': () => import('./icons/price/right.svg'),
|
||||
save: () => import('./icons/save.svg'),
|
||||
sliderTag: () => import('./icons/sliderTag.svg'),
|
||||
stop: () => import('./icons/stop.svg'),
|
||||
'support/account/laf': () => import('./icons/support/account/laf.svg'),
|
||||
'support/account/loginoutLight': () => import('./icons/support/account/loginoutLight.svg'),
|
||||
|
||||
3
packages/web/components/common/Icon/icons/sliderTag.svg
Normal file
3
packages/web/components/common/Icon/icons/sliderTag.svg
Normal file
@@ -0,0 +1,3 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 11 11" >
|
||||
<path d="M5.04123 0.144501L9.47821 4.82132C9.83075 5.19292 10.0273 5.68562 10.0273 6.19784V8.65565C10.0273 9.76022 9.13185 10.6557 8.02728 10.6557H2.05518C0.950606 10.6557 0.0551758 9.76022 0.0551758 8.65565V6.19785C0.0551758 5.68562 0.251705 5.19292 0.604247 4.82132L5.04123 0.144501Z" fill="#3370FF"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 375 B |
@@ -120,7 +120,6 @@
|
||||
"publish_success": "Publish Successful",
|
||||
"question_guide_tip": "After the conversation, 3 guiding questions will be generated for you.",
|
||||
"reasoning_response": "Output thinking",
|
||||
"rerank_weight": "Rearrange weights",
|
||||
"response_format": "Response format",
|
||||
"saved_success": "Saved successfully! \nTo use this version externally, click Save and Publish",
|
||||
"search_app": "Search apps",
|
||||
|
||||
@@ -49,6 +49,7 @@
|
||||
"response.child total points": "Sub-workflow point consumption",
|
||||
"response.dataset_concat_length": "Combined total",
|
||||
"response.node_inputs": "Node Inputs",
|
||||
"response_hybrid_weight": "Embedding : Full text = {{emb}} : {{text}}",
|
||||
"select": "Select",
|
||||
"select_file": "Upload File",
|
||||
"select_file_img": "Upload file / image",
|
||||
|
||||
@@ -1025,6 +1025,7 @@
|
||||
"question_feedback": "Work order",
|
||||
"read_quote": "View citations",
|
||||
"required": "Required",
|
||||
"rerank_weight": "Rearrange weights",
|
||||
"resume_failed": "Resume Failed",
|
||||
"select_reference_variable": "Select Reference Variable",
|
||||
"share_link": "Share Link",
|
||||
|
||||
@@ -120,7 +120,6 @@
|
||||
"publish_success": "发布成功",
|
||||
"question_guide_tip": "对话结束后,会为你生成 3 个引导性问题。",
|
||||
"reasoning_response": "输出思考",
|
||||
"rerank_weight": "重排权重",
|
||||
"response_format": "回复格式",
|
||||
"saved_success": "保存成功!如需在外部使用该版本,请点击“保存并发布”",
|
||||
"search_app": "搜索应用",
|
||||
|
||||
@@ -49,6 +49,7 @@
|
||||
"response.child total points": "子工作流积分消耗",
|
||||
"response.dataset_concat_length": "合并后总数",
|
||||
"response.node_inputs": "节点输入",
|
||||
"response_hybrid_weight": "语义检索 : 全文检索 = {{emb}} : {{text}}",
|
||||
"select": "选择",
|
||||
"select_file": "上传文件",
|
||||
"select_file_img": "上传文件/图片",
|
||||
|
||||
@@ -623,7 +623,6 @@
|
||||
"core.dataset.search.mode.fullTextRecall desc": "使用传统的全文检索,适合查找一些关键词和主谓语特殊的数据",
|
||||
"core.dataset.search.mode.mixedRecall": "混合检索",
|
||||
"core.dataset.search.mode.mixedRecall desc": "使用向量检索与全文检索的综合结果返回,使用 RRF 算法进行排序。",
|
||||
"core.dataset.search.score.embedding": "语义检索",
|
||||
"core.dataset.search.score.embedding desc": "通过计算向量之间的距离获取得分,范围为 0~1。",
|
||||
"core.dataset.search.score.fullText": "全文检索",
|
||||
"core.dataset.search.score.fullText desc": "计算相同关键词的得分,范围为 0~无穷。",
|
||||
@@ -1029,6 +1028,7 @@
|
||||
"question_feedback": "工单咨询",
|
||||
"read_quote": "查看引用",
|
||||
"required": "必须",
|
||||
"rerank_weight": "重排权重",
|
||||
"resume_failed": "恢复失败",
|
||||
"select_reference_variable": "选择引用变量",
|
||||
"share_link": "分享链接",
|
||||
|
||||
@@ -120,7 +120,6 @@
|
||||
"publish_success": "發布成功",
|
||||
"question_guide_tip": "對話結束後,會為你產生 3 個引導性問題。",
|
||||
"reasoning_response": "輸出思考",
|
||||
"rerank_weight": "重排權重",
|
||||
"response_format": "回复格式",
|
||||
"saved_success": "保存成功!\n如需在外部使用該版本,請點擊“儲存並發布”",
|
||||
"search_app": "搜尋應用程式",
|
||||
|
||||
@@ -48,6 +48,7 @@
|
||||
"response.child total points": "子工作流程點數消耗",
|
||||
"response.dataset_concat_length": "合併總數",
|
||||
"response.node_inputs": "節點輸入",
|
||||
"response_hybrid_weight": "語義檢索 : 全文檢索 = {{emb}} : {{text}}",
|
||||
"select": "選取",
|
||||
"select_file": "上傳檔案",
|
||||
"select_file_img": "上傳檔案 / 圖片",
|
||||
|
||||
@@ -1024,6 +1024,7 @@
|
||||
"question_feedback": "工單諮詢",
|
||||
"read_quote": "查看引用",
|
||||
"required": "必填",
|
||||
"rerank_weight": "重排權重",
|
||||
"resume_failed": "恢復失敗",
|
||||
"select_reference_variable": "選擇引用變數",
|
||||
"share_link": "分享連結",
|
||||
|
||||
Reference in New Issue
Block a user