diff --git a/bin/archive b/bin/archive index 47c23f6..5dddd85 100755 --- a/bin/archive +++ b/bin/archive @@ -3,6 +3,7 @@ require "uri" require "digest" require "time" +require "optparse" require "nokogiri" require "ferrum" @@ -47,7 +48,24 @@ def text_from_html(html) end end -*urls = ARGV +options = { + manual: false, + browser_path: nil +} + +OptionParser.new do |parser| + parser.banner = "Usage: bin/archive [--manual] [--browser-path PATH] URL [URL ...]" + + parser.on("--manual", "Open a visible browser window so you can complete anti-bot challenges") do + options[:manual] = true + end + + parser.on("--browser-path PATH", "Path to the browser binary to launch") do |path| + options[:browser_path] = path + end +end.parse! + +urls = ARGV clipboard = "" unless urls.any? @@ -57,9 +75,16 @@ end puts "references:" +if options[:manual] && options[:browser_path].nil? + brave_path = "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser" + options[:browser_path] = brave_path if File.exist?(brave_path) +end + browser = Ferrum::Browser.new( + headless: !options[:manual], timeout: 30, - process_timeout: 30, + process_timeout: options[:manual] ? 60 : 30, + browser_path: options[:browser_path], browser_options: { "no-sandbox": nil } ) page = browser.create_page @@ -68,7 +93,15 @@ begin urls.each do |url| begin page.goto(url) - page.network.wait_for_idle(timeout: 10) + + if options[:manual] + warn "Manual mode: finish any challenge in the browser window for #{url}" + warn "Press Enter here once the page is fully loaded." + $stdin.gets + warn "Capturing page..." + else + page.network.wait_for_idle(timeout: 10) + end html = page.body.encode("UTF-8", invalid: :replace, undef: :replace, replace: "") doc = Nokogiri::HTML(html)