Continue iterating on backup script

This commit is contained in:
David Eisinger
2023-04-04 11:48:25 -04:00
parent 4f7b335bfc
commit cbea2b356d

View File

@@ -3,9 +3,9 @@
require "uri"
require "digest"
filename, url = ARGV
filename, *urls = ARGV
unless filename && url
unless filename && urls.any?
warn "Please supply a filename and url"
exit 1
end
@@ -13,6 +13,7 @@ end
path = filename.sub(File.basename(filename), "")
content = File.read(filename)
urls.each do |url|
link_id = content.scan(/\[(\d+)\]: #{url}/).first.first
unless link_id
@@ -23,10 +24,16 @@ end
page_content = `curl -s #{url}`
text_content = `w3m #{url}`
begin
title = page_content
.scan(/<title>(.*)<\/title>/)
.scan(/<title>(.*?)<\/title>/mi)
.first
.first
.strip
rescue => ex
warn "Title error (#{ex}; #{url})"
exit 1
end
hash = Digest::MD5.base64digest(url + text_content)
.scan(/[a-z0-9]/i)
@@ -39,6 +46,7 @@ link_filename = "#{URI.parse(url).host.gsub(".", "-")}-#{hash}.txt"
File.write("#{path}/#{link_filename}", text_content)
content.gsub!(/\[#{link_id}\]([^:])/, "[#{link_id}][^#{link_id}-backup]\\1")
content << "\n[^#{link_id}-backup]: \"<a href=\"#{url}\">#{title}</a>\"; <a href=\"#{link_filename}\">backed up #{Time.now.utc}</a>"
end
File.write(filename, content)
File.write(filename, "\n[^#{link_id}-backup]: "<a href=\"#{url}\">#{title}</a>"; <a href=\"#{link_filename}\">backed up #{Time.now.utc}</a>", mode: "a+")