51 lines
909 B
Ruby
Executable File
51 lines
909 B
Ruby
Executable File
#!/usr/bin/env ruby
|
|
|
|
require "cgi"
|
|
require "uri"
|
|
require "digest"
|
|
require "time"
|
|
|
|
*urls = ARGV
|
|
|
|
unless urls.any?
|
|
warn "Please supply one or more URLs"
|
|
exit 1
|
|
end
|
|
|
|
puts "references:"
|
|
|
|
urls.each do |url|
|
|
page_content = `curl -s #{url}`
|
|
text_content = `w3m -dump -T text/html -o display_link_number=1 #{url}`
|
|
|
|
begin
|
|
title = CGI.unescapeHTML(
|
|
page_content
|
|
.scan(/<title[^>]*>(.*?)<\/title>/mi)
|
|
.first
|
|
.first
|
|
.strip
|
|
)
|
|
rescue => ex
|
|
warn "Title error (#{ex}; #{url})"
|
|
exit 1
|
|
end
|
|
|
|
hash = Digest::MD5.base64digest(url + text_content)
|
|
.scan(/[a-z0-9]/i)
|
|
.first(6)
|
|
.join
|
|
.downcase
|
|
|
|
filename = "#{URI.parse(url).host.gsub(".", "-")}-#{hash}.txt"
|
|
|
|
File.write("static/archive/#{filename}", text_content)
|
|
|
|
puts <<~STR
|
|
- title: "#{title}"
|
|
url: #{url}
|
|
date: #{Time.now.utc.iso8601}
|
|
file: #{filename}
|
|
STR
|
|
end
|