fix: add order:true to all create transactions (#3948)

This commit is contained in:
Finley Ge
2025-03-03 11:37:51 +08:00
committed by GitHub
parent 113e8f711f
commit 4bc7f21182
30 changed files with 425 additions and 352 deletions

View File

@@ -1,4 +1,3 @@
import NodeCache from 'node-cache';
import { MongoClient } from 'mongodb';
import crypto from 'crypto';
@@ -19,10 +18,15 @@ const connectToMongo = async () => {
const createTTLIndex = async () => {
try {
const db = await connectToMongo();
await db.collection(collectionName).createIndex({ "updatedAt": 1 }, { expireAfterSeconds: parseInt(process.env.EXPIRE_AFTER_SECONDS || '9000') });
console.log("TTL index created successfully");
await db
.collection(collectionName)
.createIndex(
{ updatedAt: 1 },
{ expireAfterSeconds: parseInt(process.env.EXPIRE_AFTER_SECONDS || '9000') }
);
console.log('TTL index created successfully');
} catch (error) {
console.error("Error creating TTL index:", error);
console.error('Error creating TTL index:', error);
}
};
@@ -53,11 +57,7 @@ const savePageToCache = async (url: string, content: string) => {
try {
const db = await connectToMongo();
await db.collection(collectionName).updateOne(
{ url },
{ $set: page },
{ upsert: true }
); // 更新持久化缓存
await db.collection(collectionName).updateOne({ url }, { $set: page }, { upsert: true }); // 更新持久化缓存
} catch (error) {
console.error('Error saving page to cache:', error);
throw error;
@@ -74,4 +74,4 @@ process.on('SIGINT', async () => {
});
// 在应用启动时创建 TTL 索引
createTTLIndex();
createTTLIndex();

View File

@@ -13,12 +13,19 @@ interface CachedPage {
updatedAt: Date;
}
export const performDeepSearch = async (clusterInstance: Cluster, resultUrls: string[], results: Map<string, any>, strategies: any[], detectWebsites: string[], pageCount: number) => {
export const performDeepSearch = async (
clusterInstance: Cluster,
resultUrls: string[],
results: Map<string, any>,
strategies: any[],
detectWebsites: string[],
pageCount: number
) => {
const tasks = [];
await clusterInstance.task(async ({ page, data: { searchUrl } }) => {
try {
const cachedPage = await getCachedPage(searchUrl) as CachedPage | null;
const cachedPage = (await getCachedPage(searchUrl)) as CachedPage | null;
if (cachedPage) {
const result = results.get(searchUrl);
if (result) {
@@ -29,18 +36,25 @@ export const performDeepSearch = async (clusterInstance: Cluster, resultUrls: st
}
} catch (error) {
console.error(`从缓存获取页面 ${searchUrl} 时发生错误:`, error);
results.set(searchUrl, { url: searchUrl, error: (error as Error).message, crawlStatus: 'Failed' });
results.set(searchUrl, {
url: searchUrl,
error: (error as Error).message,
crawlStatus: 'Failed'
});
return;
}
try {
const response = await fetch(searchUrl, {
headers: {
'User-Agent': new UserAgent({ deviceCategory: 'desktop', platform: 'Linux x86_64' }).toString(),
'Referer': 'https://www.google.com/',
'User-Agent': new UserAgent({
deviceCategory: 'desktop',
platform: 'Linux x86_64'
}).toString(),
Referer: 'https://www.google.com/',
'Accept-Language': 'en-US,en;q=0.9',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Connection': 'keep-alive',
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
Connection: 'keep-alive',
'Cache-Control': 'no-cache'
}
});
@@ -66,7 +80,7 @@ export const performDeepSearch = async (clusterInstance: Cluster, resultUrls: st
}
try {
if (detectWebsites.some(website => searchUrl.includes(website))) {
if (detectWebsites.some((website) => searchUrl.includes(website))) {
await setupPage(page);
} else {
const userAgent = new UserAgent({ deviceCategory: 'desktop', platform: 'Linux x86_64' });
@@ -118,7 +132,11 @@ export const performDeepSearch = async (clusterInstance: Cluster, resultUrls: st
await updateCacheAsync(searchUrl, cleanedContent || '');
} catch (error) {
results.set(searchUrl, { url: searchUrl, error: (error as Error).message, crawlStatus: 'Failed' });
results.set(searchUrl, {
url: searchUrl,
error: (error as Error).message,
crawlStatus: 'Failed'
});
} finally {
await page.close().catch(() => {});
}
@@ -137,4 +155,4 @@ export const performDeepSearch = async (clusterInstance: Cluster, resultUrls: st
await clusterInstance.close();
return Array.from(results.values()).sort((a, b) => b.score - a.score);
};
};

View File

@@ -8,7 +8,7 @@ const getRandomUserAgent = () => {
};
const getRandomPlatform = () => {
const platforms = ["Win32", "MacIntel", "Linux x86_64"];
const platforms = ['Win32', 'MacIntel', 'Linux x86_64'];
return platforms[Math.floor(Math.random() * platforms.length)];
};
@@ -16,14 +16,16 @@ const getRandomPlatform = () => {
const validateproxy = process.env.VALIDATE_PROXY ? JSON.parse(process.env.VALIDATE_PROXY) : [];
const getRandomProxy = () => {
return validateproxy.length > 0 ? validateproxy[Math.floor(Math.random() * validateproxy.length)] : null;
return validateproxy.length > 0
? validateproxy[Math.floor(Math.random() * validateproxy.length)]
: null;
};
const getRandomLanguages = () => {
const languages = [
["zh-CN", "zh", "en"],
["en-US", "en", "fr"],
["es-ES", "es", "en"]
['zh-CN', 'zh', 'en'],
['en-US', 'en', 'fr'],
['es-ES', 'es', 'en']
];
return languages[Math.floor(Math.random() * languages.length)];
};
@@ -42,30 +44,38 @@ export const setupPage = async (page: Page): Promise<void> => {
delete newProto.webdriver;
(navigator as any).__proto__ = newProto;
(window as any).chrome = {};
(window as any).chrome.app = {"InstallState":"testt", "RunningState":"estt", "getDetails":"stte", "getIsInstalled":"ttes"};
(window as any).chrome.csi = function(){};
(window as any).chrome.loadTimes = function(){};
(window as any).chrome.runtime = function(){};
(window as any).chrome.app = {
InstallState: 'testt',
RunningState: 'estt',
getDetails: 'stte',
getIsInstalled: 'ttes'
};
(window as any).chrome.csi = function () {};
(window as any).chrome.loadTimes = function () {};
(window as any).chrome.runtime = function () {};
Object.defineProperty(navigator, 'userAgent', {
get: () => getRandomUserAgent(),
get: () => getRandomUserAgent()
});
Object.defineProperty(navigator, 'platform', {
get: () => getRandomPlatform(),
get: () => getRandomPlatform()
});
Object.defineProperty(navigator, 'plugins', {
get: () => [{"description": "Shockwave Flash",
"filename": "pepflashplayer.dll",
"length": 1,
"name": "Shockwave Flash"}]
get: () => [
{
description: 'Shockwave Flash',
filename: 'pepflashplayer.dll',
length: 1,
name: 'Shockwave Flash'
}
]
});
Object.defineProperty(navigator, 'languages', {
get: () => getRandomLanguages(),
get: () => getRandomLanguages()
});
const originalQuery = (window.navigator.permissions as any).query;
(window.navigator.permissions as any).query = (parameters: any) => (
parameters.name === 'notifications' ?
Promise.resolve({ state: Notification.permission } as PermissionStatus) :
originalQuery(parameters)
);
(window.navigator.permissions as any).query = (parameters: any) =>
parameters.name === 'notifications'
? Promise.resolve({ state: Notification.permission } as PermissionStatus)
: originalQuery(parameters);
});
};
};