From bca6bbe4b7dd75ca95fa1bff23334360c77c321c Mon Sep 17 00:00:00 2001 From: jdutant <34026710+jdutant@users.noreply.github.com> Date: Fri, 12 Feb 2021 21:17:11 +0000 Subject: [PATCH] adds process-anyway option In standalone mode, setting a `wordcount` variable to `process-anyway` allows the pandoc process to continue in addition to counting words. --- wordcount/README.md | 30 +++++++++++++++++----- wordcount/wordcount.lua | 57 ++++++++++++++++++++++++----------------- 2 files changed, 58 insertions(+), 29 deletions(-) diff --git a/wordcount/README.md b/wordcount/README.md index e21f044f..800a387b 100644 --- a/wordcount/README.md +++ b/wordcount/README.md @@ -1,11 +1,29 @@ # wordcount -This filter counts the words and characters in the body of a document (omitting -metadata like titles and abstracts), including words in code. -It should be more accurate than `wc -w` or `wc -m` run directly on a -Markdown document, since `wc` will also count markup -characters, like the `#` in front of an ATX header, or -tags in HTML documents. +This filter counts the words and characters in the body of a document +(omitting metadata like titles and abstracts), including words in +code. It should be more accurate than `wc -w` or `wc -m` run directly +on a Markdown document, since `wc` will also count markup characters, +like the `#` in front of an ATX header, or tags in HTML documents. To run it, `pandoc --lua-filter wordcount.lua myfile.md`. The word count will be printed to stdout. + +If you want to process the document as well as printing the word count +set the variable `wordcount` to `process` (or `process-anyway` or `convert`). +This works only in conjunction with the standalone document option (`-s`). +This can be done through the command line: + +``` +pandoc -s -L wordcount.lua -M wordcount=process sample.md -o output.html +``` + +Or the document's metadata block: + +``` +--- +title: My Long Book +wordcount: process-anyway +--- +``` + diff --git a/wordcount/wordcount.lua b/wordcount/wordcount.lua index e3761429..1406f886 100644 --- a/wordcount/wordcount.lua +++ b/wordcount/wordcount.lua @@ -1,45 +1,56 @@ --- counts words in a document +-- counts words in a document -words = 0 +words = 0 characters = 0 characters_and_spaces = 0 +process_anyway = false -wordcount = { - Str = function(el) - -- we don't count a word if it's entirely punctuation: - if el.text:match("%P") then - words = words + 1 - end +wordcount = { + Str = function(el) + -- we don't count a word if it's entirely punctuation: + if el.text:match("%P") then + words = words + 1 + end characters = characters + utf8.len(el.text) characters_and_spaces = characters_and_spaces + utf8.len(el.text) - end, + end, Space = function(el) characters_and_spaces = characters_and_spaces + 1 end, - Code = function(el) - _,n = el.text:gsub("%S+","") - words = words + n + Code = function(el) + _,n = el.text:gsub("%S+","") + words = words + n text_nospace = el.text:gsub("%s", "") characters = characters + utf8.len(text_nospace) characters_and_spaces = characters_and_spaces + utf8.len(el.text) - end, + end, - CodeBlock = function(el) - _,n = el.text:gsub("%S+","") - words = words + n + CodeBlock = function(el) + _,n = el.text:gsub("%S+","") + words = words + n text_nospace = el.text:gsub("%s", "") characters = characters + utf8.len(text_nospace) characters_and_spaces = characters_and_spaces + utf8.len(el.text) - end -} + end +} + +-- check if the `wordcount` variable is set to `process-anyway` +function Meta(meta) + if meta.wordcount and (meta.wordcount=="process-anyway" + or meta.wordcount=="process" or meta.wordcount=="convert") then + process_anyway = true + end +end -function Pandoc(el) - -- skip metadata, just count body: - pandoc.walk_block(pandoc.Div(el.blocks), wordcount) - print(words .. " words in body") +function Pandoc(el) + -- skip metadata, just count body: + pandoc.walk_block(pandoc.Div(el.blocks), wordcount) + print(words .. " words in body") print(characters .. " characters in body") print(characters_and_spaces .. " characters in body (including spaces)") - os.exit(0) + if not process_anyway then + os.exit(0) + end end