Manual mode for archiving (Cloudflare)
This commit is contained in:
39
bin/archive
39
bin/archive
@@ -3,6 +3,7 @@
|
||||
require "uri"
|
||||
require "digest"
|
||||
require "time"
|
||||
require "optparse"
|
||||
require "nokogiri"
|
||||
require "ferrum"
|
||||
|
||||
@@ -47,7 +48,24 @@ def text_from_html(html)
|
||||
end
|
||||
end
|
||||
|
||||
*urls = ARGV
|
||||
options = {
|
||||
manual: false,
|
||||
browser_path: nil
|
||||
}
|
||||
|
||||
OptionParser.new do |parser|
|
||||
parser.banner = "Usage: bin/archive [--manual] [--browser-path PATH] URL [URL ...]"
|
||||
|
||||
parser.on("--manual", "Open a visible browser window so you can complete anti-bot challenges") do
|
||||
options[:manual] = true
|
||||
end
|
||||
|
||||
parser.on("--browser-path PATH", "Path to the browser binary to launch") do |path|
|
||||
options[:browser_path] = path
|
||||
end
|
||||
end.parse!
|
||||
|
||||
urls = ARGV
|
||||
clipboard = ""
|
||||
|
||||
unless urls.any?
|
||||
@@ -57,9 +75,16 @@ end
|
||||
|
||||
puts "references:"
|
||||
|
||||
if options[:manual] && options[:browser_path].nil?
|
||||
brave_path = "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser"
|
||||
options[:browser_path] = brave_path if File.exist?(brave_path)
|
||||
end
|
||||
|
||||
browser = Ferrum::Browser.new(
|
||||
headless: !options[:manual],
|
||||
timeout: 30,
|
||||
process_timeout: 30,
|
||||
process_timeout: options[:manual] ? 60 : 30,
|
||||
browser_path: options[:browser_path],
|
||||
browser_options: { "no-sandbox": nil }
|
||||
)
|
||||
page = browser.create_page
|
||||
@@ -68,7 +93,15 @@ begin
|
||||
urls.each do |url|
|
||||
begin
|
||||
page.goto(url)
|
||||
page.network.wait_for_idle(timeout: 10)
|
||||
|
||||
if options[:manual]
|
||||
warn "Manual mode: finish any challenge in the browser window for #{url}"
|
||||
warn "Press Enter here once the page is fully loaded."
|
||||
$stdin.gets
|
||||
warn "Capturing page..."
|
||||
else
|
||||
page.network.wait_for_idle(timeout: 10)
|
||||
end
|
||||
|
||||
html = page.body.encode("UTF-8", invalid: :replace, undef: :replace, replace: "")
|
||||
doc = Nokogiri::HTML(html)
|
||||
|
||||
Reference in New Issue
Block a user