From db9cfa7ecd633e3da29ccef457c3f07be95c1ebb Mon Sep 17 00:00:00 2001 From: David Eisinger Date: Wed, 20 Nov 2024 18:05:39 -0500 Subject: [PATCH] finish spelling post --- .../index.md | 103 +++++++----------- 1 file changed, 40 insertions(+), 63 deletions(-) diff --git a/content/journal/spellcheck-your-hugo-site-with-cspell/index.md b/content/journal/spellcheck-your-hugo-site-with-cspell/index.md index f16bf84..e378ff7 100644 --- a/content/journal/spellcheck-your-hugo-site-with-cspell/index.md +++ b/content/journal/spellcheck-your-hugo-site-with-cspell/index.md @@ -1,20 +1,27 @@ --- title: "Spellcheck Your Hugo Site With CSpell" -date: 2024-11-20T09:49:51-05:00 +date: 2024-11-20T18:03:32-05:00 draft: false tags: - meta +references: +- title: "The Static Site Paradox | Loris Cro's Blog" + url: https://kristoff.it/blog/static-site-paradox/ + date: 2024-10-31T03:33:40Z + file: kristoff-it-edtlns.txt --- -Bla bla bla +I edit these posts pretty carefully before publishing, but I inevitably find a misspelling or two after the fact. In the spirit of continuous improvement, I decided to see what kind of automated solutions are out there for spellchecking Markdown files, and found [CSpell][1]. It works well, but its default configuration found a ton of false positives that I had to scroll past to find the actual errors. -[5]: https://cspell.org/ +[1]: https://cspell.org/ +Fortunately, it's quite configurable, and I've gotten it to where it only flags actual misspelled words. Here's how. + ### 1. Install CSpell -Assuming a modern version of Node.js (>= 18), you can use [npx][1] to download and run CSpell in a single command: +Assuming a modern version of Node.js (>= 18), you can use [npx][2] to download and run CSpell in a single command: ```sh npx cspell content/**/*.md @@ -22,7 +29,7 @@ npx cspell content/**/*.md You'll see a ton of spelling errors -- ignore them for now. -[1]: https://docs.npmjs.com/cli/v10/commands/npx +[2]: https://docs.npmjs.com/cli/v10/commands/npx ### 2. Add config file @@ -40,13 +47,15 @@ Next, let's create a basic config file. In the root of your site, put the follow ### 3. Add additional languages -My site (especially the stuff in [/elsewhere][2] that I've mirrored from my company's website) has code snippets that the English dictionary doesn't recognize. Fortunately, CSpell ships with a bunch of [additional dictionaries][3]. Adding `"ruby"`, `"golang"`, and `"java"` to the `"dictionaries"` array makes a bunch of misspellings go away. +My site (especially the stuff in [/elsewhere][3] that I've mirrored from my company's website) has code snippets that the English dictionary doesn't recognize. Fortunately, CSpell ships with a bunch of [additional dictionaries][4]. Adding `"ruby"`, `"golang"`, and `"java"` to the `"dictionaries"` array makes a bunch of misspellings go away. -[2]: /elsewhere -[3]: https://github.com/streetsidesoftware/cspell-dicts/tree/main/dictionaries +[3]: /elsewhere +[4]: https://github.com/streetsidesoftware/cspell-dicts/tree/main/dictionaries ### 4. Ignore front matter +This first one may or may not apply to your site, so feel free to ignore, but I see a lot of false positives in the [front matter][5] of my posts, mostly around the lists of [references][6]. To ignore the front matter section entirely, add the following to your config file (credit to [this helpful GitHub comment][7]): + ```json "patterns": [ { @@ -64,11 +73,16 @@ My site (especially the stuff in [/elsewhere][2] that I've mirrored from my comp ] ``` -[6]: https://gohugo.io/content-management/front-matter/ +Note that you'll no longer catch misspellings in post titles, so it might make sense to use a more targeted regular expression. + +[5]: https://gohugo.io/content-management/front-matter/ +[6]: https://git.sr.ht/~dce/davideisinger.com/tree/main/item/content/journal/dispatch-21-november-2024/index.md?view-source#L7-11 [7]: https://github.com/streetsidesoftware/cspell/discussions/3456#discussioncomment-3438647 ### 5. Ignore proper nouns +I also see a lot of proper nouns being flagged as misspellings, so I decided to just ignore any word that begins with a capital letter. Create a new entry in the `"patterns"` array: + ```json { "name": "proper_nouns", @@ -76,6 +90,8 @@ My site (especially the stuff in [/elsewhere][2] that I've mirrored from my comp } ``` +That's any non-word character (or an underscore), followed by a capital letter, followed by one or more non-space characters. I'm sure that's not perfect, but it's good enough for my content. Add the new pattern to the `"ignoreRegExpList"`: + ```json "languageSettings": [ { @@ -90,12 +106,18 @@ My site (especially the stuff in [/elsewhere][2] that I've mirrored from my comp ### 6. Fix spelling -### 7. Create custom dictionary +Now comes the hard part: run CSpell again (`npx cspell content/**/*.md`), look at all the misspellings it finds, and fix all the ones you consider to be valid. Computers can't help us here, friend. + +### 7. Create a custom dictionary + +Now we'll add all the unrecognized words to a custom dictionary so that CSpell will stop flagging them. First, create the list of words: ```sh -npx cspell --words-only --unique content/**/*.md >> .dictionary +npx cspell --words-only --unique content/**/*.md | sort > .dictionary ``` +Then add a new `"dictionaryDefinitions"` array in your config file: + ```json "dictionaryDefinitions": [ { @@ -106,68 +128,23 @@ npx cspell --words-only --unique content/**/*.md >> .dictionary ], ``` -```json -"dictionaries": [ - "english", - "ruby", - "golang", - "exceptions" -] -``` +Finally, add `"exceptions"` to the `"dictionaries"` array. At this point, CSpell should find zero misspellings. To add new exceptions to the list in the future, you can run: ```sh npx cspell --words-only --unique content/**/*.md >> .dictionary sort -o .dictionary .dictionary ``` - ### 8. Add to build pipeline +With all this stuff set up, it's dead simple to add spellchecking to the build pipeline to ensure you never publish misspellings. As long as your job runner has `npx` available, you can just run the same `npx cspell content/**/*.md` command you've been running locally in a build step. [Here's where I do it.][8] + [8]: https://git.sr.ht/~dce/davideisinger.com/tree/main/item/.build.yml#L23-24 --- -[Here's the final `.cspell.json` config file.][4] +[Here's the final `.cspell.json` config file.][9] I'm super happy with this setup -- it's already catching misspellings in the process of writing these words. I'm reminded of [a post][10] I read a few weeks ago, about the irony of how good and simple website publishing has become for technical people, and how complex it is for the less technically-inclined. Imagine trying to accomplish this same functionality in a typical CMS -- [it would not work well, if it worked at all][11]. -[4]: https://git.sr.ht/~dce/davideisinger.com/tree/main/item/.cspell.json - ---- - -```json -{ - "$schema": "https://raw.githubusercontent.com/streetsidesoftware/cspell/main/cspell.schema.json", - "version": "0.2", - "dictionaryDefinitions": [ - { - "name": "exceptions", - "path": ".dictionary", - "addWords": true - } - ], - "dictionaries": [ - "english", - "ruby", - "golang", - "exceptions" - ], - "patterns": [ - { - "name": "front_matter", - "pattern": "/^(-{3}|[+]{3})$(\\s|\\S)*?^\\1$/gm" - }, - { - "name": "proper_nouns", - "pattern": "/[\\W_][A-Z][\\S]+/g" - } - ], - "languageSettings": [ - { - "languageId": "markdown", - "ignoreRegExpList": [ - "front_matter", - "proper_nouns" - ] - } - ] -} -``` +[9]: https://git.sr.ht/~dce/davideisinger.com/tree/main/item/.cspell.json +[10]: https://kristoff.it/blog/static-site-paradox/ +[11]: https://wordpress.org/support/topic/garbage-170/