Add elsewhere script

This commit is contained in:
David Eisinger
2024-05-09 10:07:55 -04:00
parent 2792210c3c
commit d2bf0140a6
3 changed files with 97 additions and 0 deletions

7
bin/elsewhere/Gemfile Normal file
View File

@@ -0,0 +1,7 @@
# frozen_string_literal: true
source "https://rubygems.org"
# gem "rails"
gem "nokogiri"
gem "pandoc-ruby"

View File

@@ -0,0 +1,17 @@
GEM
remote: https://rubygems.org/
specs:
nokogiri (1.16.4-arm64-darwin)
racc (~> 1.4)
pandoc-ruby (2.1.10)
racc (1.7.3)
PLATFORMS
arm64-darwin-22
DEPENDENCIES
nokogiri
pandoc-ruby
BUNDLED WITH
2.4.8

73
bin/elsewhere/elsewhere Executable file
View File

@@ -0,0 +1,73 @@
#!/usr/bin/env ruby
require "date"
require "net/http"
require "nokogiri"
require "pandoc-ruby"
require "pry"
require "uri"
BASEDIR = "content/elsewhere"
def download(uri, filename)
if File.exist?(filename)
puts "#{filename} already exists"
else
puts "Downloading #{uri}..."
`curl -s "#{uri}" > #{filename}`
end
end
*urls = ARGV
unless urls.any?
warn "Please supply one or more URLs"
exit 1
end
FileUtils.mkdir_p "tmp"
urls.each do |url|
uri = URI.parse(url)
name = File.basename(uri.path)
filename = "tmp/#{name}.html"
download(uri, filename)
article = Nokogiri::HTML(File.open(filename))
content = article.css("div.page-blocks").first
FileUtils.mkdir_p "#{BASEDIR}/#{name}"
md = PandocRuby.convert(content.to_html, from: :html, to: :markdown)
# strip weird ::: directives
md.gsub!(/^:{3}.*\n/, "")
# strip trailing "\"
md.gsub!(/\\$/, "")
md.gsub!(/^!\[image\]\((.*?)\)\{.*?\}/m) do
img_uri = URI.parse($1)
img_filename = File.basename(img_uri.path)
download(img_uri, "#{BASEDIR}/#{name}/#{img_filename}")
"![image](#{img_filename})"
end
title = article.css("title").text.gsub(" | Viget", "").gsub('"', '\"')
date = Date.parse(article.css("time").attr("datetime").value).to_datetime.to_s
File.open("#{BASEDIR}/#{name}/index.md", "w") do |f|
f.write "---\n"
f.write "title: \"#{title}\"\n"
f.write "date: #{date}\n"
f.write "draft: false\n"
f.write "needs_review: true\n"
f.write "canonical_url: #{uri}\n"
f.write "---\n"
f.write "\n"
f.write md
end
end