48 lines
847 B
Ruby
Executable File
48 lines
847 B
Ruby
Executable File
#!/usr/bin/env ruby
|
|
|
|
require "uri"
|
|
require "digest"
|
|
require "time"
|
|
|
|
path, *urls = ARGV
|
|
|
|
unless path && urls.any?
|
|
warn "Please supply a path and url"
|
|
exit 1
|
|
end
|
|
|
|
puts "references:"
|
|
|
|
urls.each do |url|
|
|
page_content = `curl -s #{url}`
|
|
text_content = `lynx -dump --display_charset=utf-8 #{url}`
|
|
|
|
begin
|
|
title = page_content
|
|
.scan(/<title[^>]*>(.*?)<\/title>/mi)
|
|
.first
|
|
.first
|
|
.strip
|
|
rescue => ex
|
|
warn "Title error (#{ex}; #{url})"
|
|
exit 1
|
|
end
|
|
|
|
hash = Digest::MD5.base64digest(url + text_content)
|
|
.scan(/[a-z0-9]/i)
|
|
.first(6)
|
|
.join
|
|
.downcase
|
|
|
|
filename = "#{URI.parse(url).host.gsub(".", "-")}-#{hash}.txt"
|
|
|
|
File.write("#{path}/#{filename}", text_content)
|
|
|
|
puts <<~STR
|
|
- title: "#{title}"
|
|
url: #{url}
|
|
date: #{Time.now.utc.iso8601}
|
|
file: #{filename}
|
|
STR
|
|
end
|