This commit is contained in:
gggaaallleee
2025-02-28 19:00:58 +08:00
committed by GitHub
parent cf0aaa1091
commit f7b2a57ca3
29 changed files with 7469 additions and 0 deletions

View File

@@ -0,0 +1,55 @@
import axios from 'axios';
import { URL } from 'url';
import dotenv from 'dotenv';
dotenv.config();
const blacklistDomains = process.env.BLACKLIST ? JSON.parse(process.env.BLACKLIST) : [];
export const fetchSearchResults = async (query: string, pageCount: number, searchUrlBase: string, categories: string) => {
const MAX_PAGES = (pageCount / 10 +1) * 2+1; // 最多搜索的页面数
//如果searchUrlBase为空返回空数组pagecount是需要搜索结果的数量
if (!searchUrlBase) {
return { resultUrls: [], results: new Map() };
}
const resultUrls: string[] = [];
const results = new Map<string, any>();
let fetchedResultsCount = 0;
let pageIndex = 0;
while (fetchedResultsCount < pageCount && pageIndex < MAX_PAGES) {
const searchUrl = new URL(`${searchUrlBase}?q=${encodeURIComponent(query)}&pageno=${pageIndex + 1}&format=json&categories=${categories}`);
console.log(`Fetching page ${pageIndex + 1} from SearchXNG: ${searchUrl.toString()}`);
const response = await axios.get(searchUrl.toString());
const jsonResults = response.data.results;
for (let index = 0; index < jsonResults.length; index++) {
const result = jsonResults[index];
const resultDomain = new URL(result.url).hostname;
if (blacklistDomains.some((domain: string) => resultDomain.endsWith(domain)) || resultDomain.includes('zhihu')) {
continue;
}
resultUrls.push(result.url);
results.set(result.url, {
title: result.title,
url: result.url,
snippet: result.content,
source: result.engine,
crawlStatus: 'Pending',
score: result.score
});
fetchedResultsCount++;
if (fetchedResultsCount >= pageCount) {
break;
}
}
pageIndex++;
if (jsonResults.length === 0) {
break; // 如果没有更多结果,退出循环
}
}
return { resultUrls, results };
};