#!/usr/bin/env ruby require "uri" require "digest" filename, *urls = ARGV unless filename && urls.any? warn "Please supply a filename and url" exit 1 end path = filename.sub(File.basename(filename), "") content = File.read(filename) urls.each do |url| link_id = content.scan(/\[(\d+)\]: #{url}/).first.first unless link_id warn "Link not found" exit 1 end page_content = `curl -s #{url}` text_content = `w3m #{url}` begin title = page_content .scan(/]*>(.*?)<\/title>/mi) .first .first .strip rescue => ex warn "Title error (#{ex}; #{url})" exit 1 end hash = Digest::MD5.base64digest(url + text_content) .scan(/[a-z0-9]/i) .first(6) .join .downcase link_filename = "#{URI.parse(url).host.gsub(".", "-")}-#{hash}.txt" File.write("#{path}/#{link_filename}", text_content) content.gsub!(/\[#{link_id}\]([^:])/, "[#{link_id}][^#{link_id}-backup]\\1") content << "\n[^#{link_id}-backup]: \"#{title}\"; backed up #{Time.now.utc}" end File.write(filename, content)