Manual mode for archiving (Cloudflare)

This commit is contained in:
David Eisinger
2026-04-08 01:24:16 -04:00
parent c8ca51a602
commit 94bec0f042

View File

@@ -3,6 +3,7 @@
require "uri"
require "digest"
require "time"
require "optparse"
require "nokogiri"
require "ferrum"
@@ -47,7 +48,24 @@ def text_from_html(html)
end
end
*urls = ARGV
options = {
manual: false,
browser_path: nil
}
OptionParser.new do |parser|
parser.banner = "Usage: bin/archive [--manual] [--browser-path PATH] URL [URL ...]"
parser.on("--manual", "Open a visible browser window so you can complete anti-bot challenges") do
options[:manual] = true
end
parser.on("--browser-path PATH", "Path to the browser binary to launch") do |path|
options[:browser_path] = path
end
end.parse!
urls = ARGV
clipboard = ""
unless urls.any?
@@ -57,9 +75,16 @@ end
puts "references:"
if options[:manual] && options[:browser_path].nil?
brave_path = "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser"
options[:browser_path] = brave_path if File.exist?(brave_path)
end
browser = Ferrum::Browser.new(
headless: !options[:manual],
timeout: 30,
process_timeout: 30,
process_timeout: options[:manual] ? 60 : 30,
browser_path: options[:browser_path],
browser_options: { "no-sandbox": nil }
)
page = browser.create_page
@@ -68,7 +93,15 @@ begin
urls.each do |url|
begin
page.goto(url)
page.network.wait_for_idle(timeout: 10)
if options[:manual]
warn "Manual mode: finish any challenge in the browser window for #{url}"
warn "Press Enter here once the page is fully loaded."
$stdin.gets
warn "Capturing page..."
else
page.network.wait_for_idle(timeout: 10)
end
html = page.body.encode("UTF-8", invalid: :replace, undef: :replace, replace: "")
doc = Nokogiri::HTML(html)