Skip to content

Commit adb332f

Browse files
committed
Adjust WebScrapper
1 parent 3cae3be commit adb332f

1 file changed

Lines changed: 19 additions & 17 deletions

File tree

Tool/Sources/WebScrapper/WebScrapper.swift

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@ import WebKit
66
public final class WebScrapper {
77
final class NavigationDelegate: NSObject, WKNavigationDelegate {
88
weak var scrapper: WebScrapper?
9-
10-
public nonisolated func webView(_: WKWebView, didFinish _: WKNavigation!) {
9+
10+
public nonisolated func webView(_ webView: WKWebView, didFinish _: WKNavigation!) {
1111
Task { @MainActor in
12+
let scrollToBottomScript = "window.scrollTo(0, document.body.scrollHeight);"
13+
_ = try? await webView.evaluateJavaScript(scrollToBottomScript)
1214
self.scrapper?.webViewDidFinishLoading = true
1315
}
1416
}
@@ -29,7 +31,7 @@ public final class WebScrapper {
2931

3032
var webViewDidFinishLoading = false
3133
var navigationError: (any Error)?
32-
let navigationDelegate: NavigationDelegate = NavigationDelegate()
34+
let navigationDelegate: NavigationDelegate = .init()
3335

3436
enum WebScrapperError: Error {
3537
case retry
@@ -38,15 +40,6 @@ public final class WebScrapper {
3840
public init() async {
3941
let jsonRuleList = ###"""
4042
[
41-
{
42-
"trigger": {
43-
"url-filter": ".*",
44-
"resource-type": ["style-sheet"]
45-
},
46-
"action": {
47-
"type": "block"
48-
}
49-
},
5043
{
5144
"trigger": {
5245
"url-filter": ".*",
@@ -91,9 +84,8 @@ public final class WebScrapper {
9184
configuration.defaultWebpagePreferences.preferredContentMode = .desktop
9285
configuration.defaultWebpagePreferences.allowsContentJavaScript = true
9386
configuration.websiteDataStore = .nonPersistent()
94-
configuration
95-
.applicationNameForUserAgent =
96-
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15"
87+
configuration.applicationNameForUserAgent =
88+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/26.0.1 Safari/605.1.15"
9789

9890
if #available(iOS 17.0, macOS 14.0, *) {
9991
configuration.allowsInlinePredictions = false
@@ -134,8 +126,18 @@ public final class WebScrapper {
134126
retryCount += 1
135127
try await Task.sleep(nanoseconds: 100_000_000)
136128
}
137-
138-
throw CancellationError()
129+
130+
enum Error: Swift.Error, LocalizedError {
131+
case failToValidate
132+
133+
var errorDescription: String? {
134+
switch self {
135+
case .failToValidate:
136+
return "Failed to validate the HTML content within the given timeout and retry limit."
137+
}
138+
}
139+
}
140+
throw Error.failToValidate
139141
}
140142

141143
func getHTML() async throws -> String {

0 commit comments

Comments
 (0)