feat: mix search weight (#4170)

* feat: mix search weight

* feat: svg render
This commit is contained in:
Archer
2025-03-14 18:31:37 +08:00
committed by archer
parent a534b839d7
commit fa0a8dd2da
29 changed files with 179 additions and 31 deletions

View File

@@ -40,6 +40,7 @@ export type SearchDatasetDataProps = {
[NodeInputKeyEnum.datasetSimilarity]?: number; // min distance
[NodeInputKeyEnum.datasetMaxTokens]: number; // max Token limit
[NodeInputKeyEnum.datasetSearchMode]?: `${DatasetSearchModeEnum}`;
[NodeInputKeyEnum.datasetSearchEmbeddingWeight]?: number;
[NodeInputKeyEnum.datasetSearchUsingReRank]?: boolean;
[NodeInputKeyEnum.datasetSearchRerankModel]?: RerankModelItemType;
@@ -161,6 +162,7 @@ export async function searchDatasetData(
similarity = 0,
limit: maxTokens,
searchMode = DatasetSearchModeEnum.embedding,
embeddingWeight = 0.5,
usingReRank = false,
rerankModel,
rerankWeight = 0.5,
@@ -731,16 +733,20 @@ export async function searchDatasetData(
})();
// embedding recall and fullText recall rrf concat
const baseK = 120;
const embK = Math.round(baseK * (1 - embeddingWeight)); // 搜索结果的 k 值
const fullTextK = Math.round(baseK * embeddingWeight); // rerank 结果的 k 值
const rrfSearchResult = datasetSearchResultConcat([
{ k: 60, list: embeddingRecallResults },
{ k: 60, list: fullTextRecallResults }
{ k: embK, list: embeddingRecallResults },
{ k: fullTextK, list: fullTextRecallResults }
]);
const rrfConcatResults = (() => {
if (reRankResults.length === 0) return rrfSearchResult;
if (rerankWeight === 1) return reRankResults;
const baseK = 30;
const searchK = Math.round(baseK / (1 - rerankWeight)); // 搜索结果的 k 值
const rerankK = Math.round(baseK / rerankWeight); // rerank 结果的 k 值
const searchK = Math.round(baseK * rerankWeight); // 搜索结果的 k 值
const rerankK = Math.round(baseK * (1 - rerankWeight)); // rerank 结果的 k 值
return datasetSearchResultConcat([
{ k: searchK, list: rrfSearchResult },

View File

@@ -22,8 +22,9 @@ type DatasetSearchProps = ModuleDispatchProps<{
[NodeInputKeyEnum.datasetSelectList]: SelectedDatasetType;
[NodeInputKeyEnum.datasetSimilarity]: number;
[NodeInputKeyEnum.datasetMaxTokens]: number;
[NodeInputKeyEnum.datasetSearchMode]: `${DatasetSearchModeEnum}`;
[NodeInputKeyEnum.userChatInput]?: string;
[NodeInputKeyEnum.datasetSearchMode]: `${DatasetSearchModeEnum}`;
[NodeInputKeyEnum.datasetSearchEmbeddingWeight]?: number;
[NodeInputKeyEnum.datasetSearchUsingReRank]: boolean;
[NodeInputKeyEnum.datasetSearchRerankModel]?: string;
@@ -57,11 +58,11 @@ export async function dispatchDatasetSearch(
datasets = [],
similarity,
limit = 1500,
searchMode,
userChatInput = '',
authTmbId = false,
collectionFilterMatch,
searchMode,
embeddingWeight,
usingReRank,
rerankModel,
rerankWeight,
@@ -129,6 +130,7 @@ export async function dispatchDatasetSearch(
limit,
datasetIds,
searchMode,
embeddingWeight,
usingReRank: usingReRank && (await checkTeamReRankPermission(teamId)),
rerankModel: getRerankModel(rerankModel),
rerankWeight,
@@ -228,6 +230,9 @@ export async function dispatchDatasetSearch(
similarity: usingSimilarityFilter ? similarity : undefined,
limit,
searchMode,
embeddingWeight: searchMode === DatasetSearchModeEnum.mixedRecall ? embeddingWeight : undefined,
rerankModel: usingReRank ? getRerankModel(rerankModel)?.name : undefined,
rerankWeight: usingReRank ? rerankWeight : undefined,
searchUsingReRank: searchUsingReRank,
quoteList: searchRes,
queryExtensionResult,