Move web archives into dedicated directory
This commit is contained in:
47
bin/archive
Executable file
47
bin/archive
Executable file
@@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
require "uri"
|
||||
require "digest"
|
||||
require "time"
|
||||
|
||||
*urls = ARGV
|
||||
|
||||
unless urls.any?
|
||||
warn "Please supply one or more URLs"
|
||||
exit 1
|
||||
end
|
||||
|
||||
puts "references:"
|
||||
|
||||
urls.each do |url|
|
||||
page_content = `curl -s #{url}`
|
||||
text_content = `lynx -dump --display_charset=utf-8 #{url}`
|
||||
|
||||
begin
|
||||
title = page_content
|
||||
.scan(/<title[^>]*>(.*?)<\/title>/mi)
|
||||
.first
|
||||
.first
|
||||
.strip
|
||||
rescue => ex
|
||||
warn "Title error (#{ex}; #{url})"
|
||||
exit 1
|
||||
end
|
||||
|
||||
hash = Digest::MD5.base64digest(url + text_content)
|
||||
.scan(/[a-z0-9]/i)
|
||||
.first(6)
|
||||
.join
|
||||
.downcase
|
||||
|
||||
filename = "#{URI.parse(url).host.gsub(".", "-")}-#{hash}.txt"
|
||||
|
||||
File.write("static/archive/#{filename}", text_content)
|
||||
|
||||
puts <<~STR
|
||||
- title: "#{title}"
|
||||
url: #{url}
|
||||
date: #{Time.now.utc.iso8601}
|
||||
file: #{filename}
|
||||
STR
|
||||
end
|
||||
Reference in New Issue
Block a user