feat: mix search weight (#4170)
* feat: mix search weight * feat: svg render
This commit is contained in:
@@ -40,6 +40,7 @@ export type SearchDatasetDataProps = {
|
||||
[NodeInputKeyEnum.datasetSimilarity]?: number; // min distance
|
||||
[NodeInputKeyEnum.datasetMaxTokens]: number; // max Token limit
|
||||
[NodeInputKeyEnum.datasetSearchMode]?: `${DatasetSearchModeEnum}`;
|
||||
[NodeInputKeyEnum.datasetSearchEmbeddingWeight]?: number;
|
||||
|
||||
[NodeInputKeyEnum.datasetSearchUsingReRank]?: boolean;
|
||||
[NodeInputKeyEnum.datasetSearchRerankModel]?: RerankModelItemType;
|
||||
@@ -161,6 +162,7 @@ export async function searchDatasetData(
|
||||
similarity = 0,
|
||||
limit: maxTokens,
|
||||
searchMode = DatasetSearchModeEnum.embedding,
|
||||
embeddingWeight = 0.5,
|
||||
usingReRank = false,
|
||||
rerankModel,
|
||||
rerankWeight = 0.5,
|
||||
@@ -731,16 +733,20 @@ export async function searchDatasetData(
|
||||
})();
|
||||
|
||||
// embedding recall and fullText recall rrf concat
|
||||
const baseK = 120;
|
||||
const embK = Math.round(baseK * (1 - embeddingWeight)); // 搜索结果的 k 值
|
||||
const fullTextK = Math.round(baseK * embeddingWeight); // rerank 结果的 k 值
|
||||
|
||||
const rrfSearchResult = datasetSearchResultConcat([
|
||||
{ k: 60, list: embeddingRecallResults },
|
||||
{ k: 60, list: fullTextRecallResults }
|
||||
{ k: embK, list: embeddingRecallResults },
|
||||
{ k: fullTextK, list: fullTextRecallResults }
|
||||
]);
|
||||
const rrfConcatResults = (() => {
|
||||
if (reRankResults.length === 0) return rrfSearchResult;
|
||||
if (rerankWeight === 1) return reRankResults;
|
||||
|
||||
const baseK = 30;
|
||||
const searchK = Math.round(baseK / (1 - rerankWeight)); // 搜索结果的 k 值
|
||||
const rerankK = Math.round(baseK / rerankWeight); // rerank 结果的 k 值
|
||||
const searchK = Math.round(baseK * rerankWeight); // 搜索结果的 k 值
|
||||
const rerankK = Math.round(baseK * (1 - rerankWeight)); // rerank 结果的 k 值
|
||||
|
||||
return datasetSearchResultConcat([
|
||||
{ k: searchK, list: rrfSearchResult },
|
||||
|
||||
@@ -22,8 +22,9 @@ type DatasetSearchProps = ModuleDispatchProps<{
|
||||
[NodeInputKeyEnum.datasetSelectList]: SelectedDatasetType;
|
||||
[NodeInputKeyEnum.datasetSimilarity]: number;
|
||||
[NodeInputKeyEnum.datasetMaxTokens]: number;
|
||||
[NodeInputKeyEnum.datasetSearchMode]: `${DatasetSearchModeEnum}`;
|
||||
[NodeInputKeyEnum.userChatInput]?: string;
|
||||
[NodeInputKeyEnum.datasetSearchMode]: `${DatasetSearchModeEnum}`;
|
||||
[NodeInputKeyEnum.datasetSearchEmbeddingWeight]?: number;
|
||||
|
||||
[NodeInputKeyEnum.datasetSearchUsingReRank]: boolean;
|
||||
[NodeInputKeyEnum.datasetSearchRerankModel]?: string;
|
||||
@@ -57,11 +58,11 @@ export async function dispatchDatasetSearch(
|
||||
datasets = [],
|
||||
similarity,
|
||||
limit = 1500,
|
||||
searchMode,
|
||||
userChatInput = '',
|
||||
authTmbId = false,
|
||||
collectionFilterMatch,
|
||||
|
||||
searchMode,
|
||||
embeddingWeight,
|
||||
usingReRank,
|
||||
rerankModel,
|
||||
rerankWeight,
|
||||
@@ -129,6 +130,7 @@ export async function dispatchDatasetSearch(
|
||||
limit,
|
||||
datasetIds,
|
||||
searchMode,
|
||||
embeddingWeight,
|
||||
usingReRank: usingReRank && (await checkTeamReRankPermission(teamId)),
|
||||
rerankModel: getRerankModel(rerankModel),
|
||||
rerankWeight,
|
||||
@@ -228,6 +230,9 @@ export async function dispatchDatasetSearch(
|
||||
similarity: usingSimilarityFilter ? similarity : undefined,
|
||||
limit,
|
||||
searchMode,
|
||||
embeddingWeight: searchMode === DatasetSearchModeEnum.mixedRecall ? embeddingWeight : undefined,
|
||||
rerankModel: usingReRank ? getRerankModel(rerankModel)?.name : undefined,
|
||||
rerankWeight: usingReRank ? rerankWeight : undefined,
|
||||
searchUsingReRank: searchUsingReRank,
|
||||
quoteList: searchRes,
|
||||
queryExtensionResult,
|
||||
|
||||
Reference in New Issue
Block a user