Move web archives into dedicated directory

This commit is contained in:
David Eisinger
2023-04-29 20:47:05 -04:00
parent f5e5481eb6
commit b7b58cf3ab
8 changed files with 5 additions and 5 deletions

47
bin/archive Executable file
View File

@@ -0,0 +1,47 @@
#!/usr/bin/env ruby
require "uri"
require "digest"
require "time"
*urls = ARGV
unless urls.any?
warn "Please supply one or more URLs"
exit 1
end
puts "references:"
urls.each do |url|
page_content = `curl -s #{url}`
text_content = `lynx -dump --display_charset=utf-8 #{url}`
begin
title = page_content
.scan(/<title[^>]*>(.*?)<\/title>/mi)
.first
.first
.strip
rescue => ex
warn "Title error (#{ex}; #{url})"
exit 1
end
hash = Digest::MD5.base64digest(url + text_content)
.scan(/[a-z0-9]/i)
.first(6)
.join
.downcase
filename = "#{URI.parse(url).host.gsub(".", "-")}-#{hash}.txt"
File.write("static/archive/#{filename}", text_content)
puts <<~STR
- title: "#{title}"
url: #{url}
date: #{Time.now.utc.iso8601}
file: #{filename}
STR
end