feat: mix search weight (#4170)

* feat: mix search weight

* feat: svg render
This commit is contained in:
Archer
2025-03-14 18:31:37 +08:00
committed by GitHub
parent 6aa5e2c200
commit 93f7747904
29 changed files with 179 additions and 31 deletions

View File

@@ -75,6 +75,7 @@ export type AppDatasetSearchParamsType = {
searchMode: `${DatasetSearchModeEnum}`;
limit?: number; // limit max tokens
similarity?: number;
embeddingWeight?: number; // embedding weight, fullText weight = 1 - embeddingWeight
usingReRank?: boolean;
rerankModel?: string;

View File

@@ -108,6 +108,10 @@ export const appWorkflow2Form = ({
defaultAppForm.dataset.searchMode =
findInputValueByKey(node.inputs, NodeInputKeyEnum.datasetSearchMode) ||
DatasetSearchModeEnum.embedding;
defaultAppForm.dataset.embeddingWeight = findInputValueByKey(
node.inputs,
NodeInputKeyEnum.datasetSearchEmbeddingWeight
);
// Rerank
defaultAppForm.dataset.usingReRank = !!findInputValueByKey(
node.inputs,

View File

@@ -185,7 +185,7 @@ export enum SearchScoreTypeEnum {
}
export const SearchScoreTypeMap = {
[SearchScoreTypeEnum.embedding]: {
label: i18nT('common:core.dataset.search.score.embedding'),
label: i18nT('common:core.dataset.search.mode.embedding'),
desc: i18nT('common:core.dataset.search.score.embedding desc'),
showScore: true
},

View File

@@ -154,9 +154,12 @@ export enum NodeInputKeyEnum {
datasetSimilarity = 'similarity',
datasetMaxTokens = 'limit',
datasetSearchMode = 'searchMode',
datasetSearchEmbeddingWeight = 'embeddingWeight',
datasetSearchUsingReRank = 'usingReRank',
datasetSearchRerankWeight = 'rerankWeight',
datasetSearchRerankModel = 'rerankModel',
datasetSearchUsingExtensionQuery = 'datasetSearchUsingExtensionQuery',
datasetSearchExtensionModel = 'datasetSearchExtensionModel',
datasetSearchExtensionBg = 'datasetSearchExtensionBg',

View File

@@ -133,6 +133,9 @@ export type DispatchNodeResponseType = {
similarity?: number;
limit?: number;
searchMode?: `${DatasetSearchModeEnum}`;
embeddingWeight?: number;
rerankModel?: string;
rerankWeight?: number;
searchUsingReRank?: boolean;
queryExtensionResult?: {
model: string;

View File

@@ -64,6 +64,13 @@ export const DatasetSearchModule: FlowNodeTemplateType = {
valueType: WorkflowIOValueTypeEnum.string,
value: DatasetSearchModeEnum.embedding
},
{
key: NodeInputKeyEnum.datasetSearchEmbeddingWeight,
renderTypeList: [FlowNodeInputTypeEnum.hidden],
label: '',
valueType: WorkflowIOValueTypeEnum.number,
value: 0.5
},
// Rerank
{
key: NodeInputKeyEnum.datasetSearchUsingReRank,

View File

@@ -40,6 +40,7 @@ export type SearchDatasetDataProps = {
[NodeInputKeyEnum.datasetSimilarity]?: number; // min distance
[NodeInputKeyEnum.datasetMaxTokens]: number; // max Token limit
[NodeInputKeyEnum.datasetSearchMode]?: `${DatasetSearchModeEnum}`;
[NodeInputKeyEnum.datasetSearchEmbeddingWeight]?: number;
[NodeInputKeyEnum.datasetSearchUsingReRank]?: boolean;
[NodeInputKeyEnum.datasetSearchRerankModel]?: RerankModelItemType;
@@ -161,6 +162,7 @@ export async function searchDatasetData(
similarity = 0,
limit: maxTokens,
searchMode = DatasetSearchModeEnum.embedding,
embeddingWeight = 0.5,
usingReRank = false,
rerankModel,
rerankWeight = 0.5,
@@ -731,16 +733,20 @@ export async function searchDatasetData(
})();
// embedding recall and fullText recall rrf concat
const baseK = 120;
const embK = Math.round(baseK * (1 - embeddingWeight)); // 搜索结果的 k 值
const fullTextK = Math.round(baseK * embeddingWeight); // rerank 结果的 k 值
const rrfSearchResult = datasetSearchResultConcat([
{ k: 60, list: embeddingRecallResults },
{ k: 60, list: fullTextRecallResults }
{ k: embK, list: embeddingRecallResults },
{ k: fullTextK, list: fullTextRecallResults }
]);
const rrfConcatResults = (() => {
if (reRankResults.length === 0) return rrfSearchResult;
if (rerankWeight === 1) return reRankResults;
const baseK = 30;
const searchK = Math.round(baseK / (1 - rerankWeight)); // 搜索结果的 k 值
const rerankK = Math.round(baseK / rerankWeight); // rerank 结果的 k 值
const searchK = Math.round(baseK * rerankWeight); // 搜索结果的 k 值
const rerankK = Math.round(baseK * (1 - rerankWeight)); // rerank 结果的 k 值
return datasetSearchResultConcat([
{ k: searchK, list: rrfSearchResult },

View File

@@ -22,8 +22,9 @@ type DatasetSearchProps = ModuleDispatchProps<{
[NodeInputKeyEnum.datasetSelectList]: SelectedDatasetType;
[NodeInputKeyEnum.datasetSimilarity]: number;
[NodeInputKeyEnum.datasetMaxTokens]: number;
[NodeInputKeyEnum.datasetSearchMode]: `${DatasetSearchModeEnum}`;
[NodeInputKeyEnum.userChatInput]?: string;
[NodeInputKeyEnum.datasetSearchMode]: `${DatasetSearchModeEnum}`;
[NodeInputKeyEnum.datasetSearchEmbeddingWeight]?: number;
[NodeInputKeyEnum.datasetSearchUsingReRank]: boolean;
[NodeInputKeyEnum.datasetSearchRerankModel]?: string;
@@ -57,11 +58,11 @@ export async function dispatchDatasetSearch(
datasets = [],
similarity,
limit = 1500,
searchMode,
userChatInput = '',
authTmbId = false,
collectionFilterMatch,
searchMode,
embeddingWeight,
usingReRank,
rerankModel,
rerankWeight,
@@ -129,6 +130,7 @@ export async function dispatchDatasetSearch(
limit,
datasetIds,
searchMode,
embeddingWeight,
usingReRank: usingReRank && (await checkTeamReRankPermission(teamId)),
rerankModel: getRerankModel(rerankModel),
rerankWeight,
@@ -228,6 +230,9 @@ export async function dispatchDatasetSearch(
similarity: usingSimilarityFilter ? similarity : undefined,
limit,
searchMode,
embeddingWeight: searchMode === DatasetSearchModeEnum.mixedRecall ? embeddingWeight : undefined,
rerankModel: usingReRank ? getRerankModel(rerankModel)?.name : undefined,
rerankWeight: usingReRank ? rerankWeight : undefined,
searchUsingReRank: searchUsingReRank,
quoteList: searchRes,
queryExtensionResult,

View File

@@ -429,6 +429,7 @@ export const iconPaths = {
'price/bg': () => import('./icons/price/bg.svg'),
'price/right': () => import('./icons/price/right.svg'),
save: () => import('./icons/save.svg'),
sliderTag: () => import('./icons/sliderTag.svg'),
stop: () => import('./icons/stop.svg'),
'support/account/laf': () => import('./icons/support/account/laf.svg'),
'support/account/loginoutLight': () => import('./icons/support/account/loginoutLight.svg'),

View File

@@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 11 11" >
<path d="M5.04123 0.144501L9.47821 4.82132C9.83075 5.19292 10.0273 5.68562 10.0273 6.19784V8.65565C10.0273 9.76022 9.13185 10.6557 8.02728 10.6557H2.05518C0.950606 10.6557 0.0551758 9.76022 0.0551758 8.65565V6.19785C0.0551758 5.68562 0.251705 5.19292 0.604247 4.82132L5.04123 0.144501Z" fill="#3370FF"/>
</svg>

After

Width:  |  Height:  |  Size: 375 B

View File

@@ -120,7 +120,6 @@
"publish_success": "Publish Successful",
"question_guide_tip": "After the conversation, 3 guiding questions will be generated for you.",
"reasoning_response": "Output thinking",
"rerank_weight": "Rearrange weights",
"response_format": "Response format",
"saved_success": "Saved successfully! \nTo use this version externally, click Save and Publish",
"search_app": "Search apps",

View File

@@ -49,6 +49,7 @@
"response.child total points": "Sub-workflow point consumption",
"response.dataset_concat_length": "Combined total",
"response.node_inputs": "Node Inputs",
"response_hybrid_weight": "Embedding : Full text = {{emb}} : {{text}}",
"select": "Select",
"select_file": "Upload File",
"select_file_img": "Upload file / image",

View File

@@ -1025,6 +1025,7 @@
"question_feedback": "Work order",
"read_quote": "View citations",
"required": "Required",
"rerank_weight": "Rearrange weights",
"resume_failed": "Resume Failed",
"select_reference_variable": "Select Reference Variable",
"share_link": "Share Link",

View File

@@ -120,7 +120,6 @@
"publish_success": "发布成功",
"question_guide_tip": "对话结束后,会为你生成 3 个引导性问题。",
"reasoning_response": "输出思考",
"rerank_weight": "重排权重",
"response_format": "回复格式",
"saved_success": "保存成功!如需在外部使用该版本,请点击“保存并发布”",
"search_app": "搜索应用",

View File

@@ -49,6 +49,7 @@
"response.child total points": "子工作流积分消耗",
"response.dataset_concat_length": "合并后总数",
"response.node_inputs": "节点输入",
"response_hybrid_weight": "语义检索 : 全文检索 = {{emb}} : {{text}}",
"select": "选择",
"select_file": "上传文件",
"select_file_img": "上传文件/图片",

View File

@@ -623,7 +623,6 @@
"core.dataset.search.mode.fullTextRecall desc": "使用传统的全文检索,适合查找一些关键词和主谓语特殊的数据",
"core.dataset.search.mode.mixedRecall": "混合检索",
"core.dataset.search.mode.mixedRecall desc": "使用向量检索与全文检索的综合结果返回,使用 RRF 算法进行排序。",
"core.dataset.search.score.embedding": "语义检索",
"core.dataset.search.score.embedding desc": "通过计算向量之间的距离获取得分,范围为 0~1。",
"core.dataset.search.score.fullText": "全文检索",
"core.dataset.search.score.fullText desc": "计算相同关键词的得分,范围为 0~无穷。",
@@ -1029,6 +1028,7 @@
"question_feedback": "工单咨询",
"read_quote": "查看引用",
"required": "必须",
"rerank_weight": "重排权重",
"resume_failed": "恢复失败",
"select_reference_variable": "选择引用变量",
"share_link": "分享链接",

View File

@@ -120,7 +120,6 @@
"publish_success": "發布成功",
"question_guide_tip": "對話結束後,會為你產生 3 個引導性問題。",
"reasoning_response": "輸出思考",
"rerank_weight": "重排權重",
"response_format": "回复格式",
"saved_success": "保存成功!\n如需在外部使用該版本請點擊“儲存並發布”",
"search_app": "搜尋應用程式",

View File

@@ -48,6 +48,7 @@
"response.child total points": "子工作流程點數消耗",
"response.dataset_concat_length": "合併總數",
"response.node_inputs": "節點輸入",
"response_hybrid_weight": "語義檢索 : 全文檢索 = {{emb}} : {{text}}",
"select": "選取",
"select_file": "上傳檔案",
"select_file_img": "上傳檔案 / 圖片",

View File

@@ -1024,6 +1024,7 @@
"question_feedback": "工單諮詢",
"read_quote": "查看引用",
"required": "必填",
"rerank_weight": "重排權重",
"resume_failed": "恢復失敗",
"select_reference_variable": "選擇引用變數",
"share_link": "分享連結",