From 1e4b02fbf733164bfbd77d46c6e67bc7b10aa2fe Mon Sep 17 00:00:00 2001
From: NaturalSelect <2145973003@qq.com>
Date: Fri, 28 Jun 2024 23:17:02 +0800
Subject: [PATCH] feat(download): download all content support batch download
Signed-off-by: NaturalSelect <2145973003@qq.com>
---
DownloadAllContent/DownloadAllContent.user.js | 104 +++++++++++++-----
1 file changed, 79 insertions(+), 25 deletions(-)
diff --git a/DownloadAllContent/DownloadAllContent.user.js b/DownloadAllContent/DownloadAllContent.user.js
index 5b2314a1959..6ccb3e37ffa 100644
--- a/DownloadAllContent/DownloadAllContent.user.js
+++ b/DownloadAllContent/DownloadAllContent.user.js
@@ -263,6 +263,7 @@ if (window.top != window.self) {
dacSortByName:"按章节名排序",
reverse:"反选",
dacUseIframe:"使用 iframe 后台加载内容(慢速)",
+ dacBatchDownload: "分批下载 (用于绕过网站的反爬虫)",
dacSaveAsZip:"下载为 zip",
dacSetCustomRule:"修改规则",
dacAddUrl:"添加章节",
@@ -309,6 +310,7 @@ if (window.top != window.self) {
dacSortByName:"依章節名排序",
reverse:"反選",
dacUseIframe:"使用 iframe 背景載入內容(慢速)",
+ dacBatchDownload: "分批下載 (用於繞過網站的反爬蟲)",
dacSaveAsZip:"下載為 zip",
dacSetCustomRule:"修改規則",
dacAddUrl:"新增章節",
@@ -369,6 +371,7 @@ if (window.top != window.self) {
dacSortByName: "الترتيب حسب الاسم",
reverse: "عكس الاختيار",
dacUseIframe: "لتحميل المحتوى (بطيء) iframe استخدام",
+ dacBatchDownload: "Download in batches (used to bypass anti-scraping on websites)",
dacSaveAsZip: "zip حفظ كـ",
dacSetCustomRule: "تعديل القواعد",
dacAddUrl: "إضافة فصل",
@@ -413,6 +416,7 @@ if (window.top != window.self) {
dacSortByName:"Sort by name",
reverse:"Reverse selection",
dacUseIframe: "Use iframe to load content (slow)",
+ dacBatchDownload: "Download in batches (used to bypass anti-scraping on websites)",
dacSaveAsZip: "Save as zip",
dacSetCustomRule:"Modify rules",
dacAddUrl:"Add Chapter",
@@ -423,8 +427,8 @@ if (window.top != window.self) {
};
break;
}
- var firefox=navigator.userAgent.toLowerCase().indexOf('firefox')!=-1,curRequests=[],useIframe=false,iframeSandbox=false,iframeInit=false;
- var filterListContainer,txtDownContent,txtDownWords,txtDownQuit,dacLinksCon,dacUseIframe,shadowContainer;
+ var firefox=navigator.userAgent.toLowerCase().indexOf('firefox')!=-1,curRequests=[],useIframe=false,iframeSandbox=false,iframeInit=false,batchDownload=false;
+ var filterListContainer,txtDownContent,txtDownWords,txtDownQuit,dacLinksCon,dacUseIframe,dacBatchDownload,shadowContainer;
const escapeHTMLPolicy = (win.trustedTypes && win.trustedTypes.createPolicy) ? win.trustedTypes.createPolicy('dac_default', {
createHTML: (string, sink) => string
@@ -528,7 +532,7 @@ if (window.top != window.self) {
-
+
@@ -550,6 +554,7 @@ if (window.top != window.self) {
let dacFilterBg = filterListContainer.querySelector("#dacFilterBg");
let dacSaveAsZip = filterListContainer.querySelector("#dacSaveAsZip");
dacUseIframe = filterListContainer.querySelector("#dacUseIframe");
+ dacBatchDownload = filterListContainer.querySelector("#dacBatchDownload")
dacSaveAsZip.onchange = e => {
saveAsZip = dacSaveAsZip.checked;
};
@@ -658,7 +663,8 @@ if (window.top != window.self) {
dacStartDownload.onclick = e => {
let linkList = [].slice.call(dacLinksCon.querySelectorAll("input:checked+.dacLink"));
useIframe = !!dacUseIframe.checked;
- indexDownload(linkList, true);
+ batchDownload = !!dacBatchDownload.checked;
+ indexDownload(linkList, true,batchDownload);
};
dacLinksClose.onclick = e => {
filterListContainer.style.display = "none";
@@ -795,6 +801,7 @@ if (window.top != window.self) {
createLinkItem(a);
});
dacUseIframe.checked = useIframe;
+ dacBatchDownload.checked = batchDownload;
document.body.appendChild(shadowContainer);
}
@@ -918,8 +925,10 @@ if (window.top != window.self) {
charsetValid = false;
}
} else charsetValid = false;
- function indexDownload(aEles, noSort){
- if(aEles.length<1)return;
+ function indexDownload(aEles, noSort,batchDownload){
+ if(aEles.length<1) {
+ return;
+ }
initTxtDownDiv();
if(!noSort) {
if(GM_getValue("contentSort")){
@@ -958,8 +967,26 @@ if (window.top != window.self) {
}
var insertSigns=[];
// var j=0,rCats=[];
- var downIndex=0,downNum=0,downOnce=function(wait){
- if(downNum>=aEles.length)return;
+ var downIndex=0,downNum=0
+ var downOnce = function(wait,downloadCnt,downloadLimit,downloadBarrier,downloadSleepInterval){
+ if(downNum>=aEles.length) {
+ return;
+ }
+ // NOTE: if enable batch download
+ if (downloadSleepInterval != 0) {
+ if (wait != 0 && downloadLimit) {
+ // NOTE: if prev status is limited
+ // set unlimited
+ wait = 0;
+ downloadLimit = false;
+ } else if(wait == 0 && downloadCnt == downloadBarrier) {
+ downloadCnt = 0;
+ downloadLimit = true;
+ wait = downloadSleepInterval;
+ }
+ }
+
+ downloadCnt += 1;
let curIndex=downIndex;
let aTag=aEles[curIndex];
let request=(aTag, curIndex)=>{
@@ -1058,11 +1085,13 @@ if (window.top != window.self) {
}, Math.random() * 500 + validTimes * 1000);
return;
}
- if (wait) {
+ if (wait != 0) {
setTimeout(() => {
- downOnce(wait);
+ downOnce(wait,downloadCnt,downloadLimit,downloadBarrier,downloadSleepInterval);
}, wait);
- } else downOnce();
+ return;
+ }
+ downOnce(0,downloadCnt,downloadLimit,downloadBarrier,downloadSleepInterval);
},
onerror: function(e) {
console.warn("error:", e, aTag.href);
@@ -1075,11 +1104,13 @@ if (window.top != window.self) {
downIndex++;
downNum++;
processDoc(curIndex, aTag, null, ` NETWORK ERROR: ${(e.response||e.responseText)} from: ${aTag.href} `);
- if (wait) {
+ if (wait != 0) {
setTimeout(() => {
- downOnce(wait);
+ downOnce(wait,downloadCnt,downloadLimit,downloadBarrier,downloadSleepInterval);
}, wait);
- } else downOnce();
+ return
+ }
+ downOnce(0,downloadCnt,downloadLimit,downloadBarrier,downloadSleepInterval);
},
ontimeout: function(e) {
console.warn("timeout: times="+(tryTimes+1)+" url="+aTag.href);
@@ -1093,11 +1124,13 @@ if (window.top != window.self) {
downIndex++;
downNum++;
processDoc(curIndex, aTag, null, ` TIMEOUT: ${aTag.href} `);
- if (wait) {
+ if (wait != 0) {
setTimeout(() => {
- downOnce(wait);
+ downOnce(wait,downloadCnt,downloadLimit,downloadBarrier,downloadSleepInterval);
}, wait);
- } else downOnce();
+ return
+ }
+ downOnce(0,downloadCnt,downloadLimit,downloadBarrier,downloadSleepInterval);
}
});
};
@@ -1187,11 +1220,13 @@ if (window.top != window.self) {
}, 1000);
return;
}
- if (wait) {
+ if (wait != 0) {
setTimeout(() => {
- downOnce(wait);
+ downOnce(wait,downloadCnt,downloadLimit,downloadBarrier,downloadSleepInterval);
}, wait);
- } else downOnce();
+ return;
+ }
+ downOnce(0,downloadCnt,downloadLimit,downloadBarrier,downloadSleepInterval);
} catch(e) {
console.debug("Stop as cors");
}
@@ -1309,15 +1344,34 @@ if (window.top != window.self) {
if (useIframe && downThreadNum > 5) {
downThreadNum = 5;
}
+
+ // NOTE: sleep once every 100 downloads
+ const downloadCntLimit = 100;
+ // NOTE: 120 seconds
+ const downloadSleepTime = 120*1000;
+
if (downThreadNum > 0) {
for (var i = 0; i < downThreadNum; i++) {
- downOnce();
- if (downIndex >= aEles.length - 1 || downIndex >= downThreadNum - 1) break;
- else downIndex++;
+ var limit = downloadCntLimit/downThreadNum;
+ if (limit < 1) {
+ limit = 1;
+ }
+ var sleepTime = 0;
+ if (batchDownload) {
+ sleepTime = downloadSleepTime;
+ }
+ downOnce(0,0,false,limit,sleepTime);
+ if (downIndex >= aEles.length - 1 || downIndex >= downThreadNum - 1) {
+ break;
+ }
+ downIndex++;
}
} else {
- downOnce(-downThreadNum * 1000);
- if (downIndex < aEles.length - 1 && downIndex < downThreadNum - 1) downIndex++;
+ // NOTE: single thread always disable batch download
+ downOnce(-downThreadNum * 1000,0,false,downloadCntLimit,0);
+ if (downIndex < aEles.length - 1 && downIndex < downThreadNum - 1) {
+ downIndex++;
+ }
}
/*for(let i=0;i