--- /dev/null
+#!/usr/bin/awk -f
+
+BEGIN {
+ in_yaml = 0; yaml_done = 0
+ in_callout = 0
+ callout_title = ""; callout_glyph = ""
+ current_level = 0
+ in_bullet_paragraph = 0
+ in_paragraph = 0
+ indent_step = 2
+ in_callout_para = 0
+ in_callout_para_rs = 0
+ in_bold = 0
+ in_ital = 0
+ in_bi = 0
+ in_code = 0
+ in_table = 0
+ table_rowc = 0
+}
+
+function trim(s) { sub(/^[ \t\r\n]+/, "", s); sub(/[ \t\r\n]+$/, "", s); return s }
+function escape_quotes(s) { gsub(/"/, "\\\"", s); return s }
+
+function map_glyph(type) {
+ if (type == "CHECK") return "\\[OK]"
+ if (type == "NOTE") return "!"
+ if (type == "TIP") return "\\[rh]"
+ if (type == "WARNING") return "!!"
+ if (type == "INFO") return "\\[rh]"
+ if (type == "SUCCESS") return "\\o'\\[pc]\\[ci]'"
+ if (type == "SUMMARY") return "\\[sc]"
+ if (type == "BIB") return "\\[ps]"
+}
+
+function emit_header( s) {
+ if (yaml["style"] != "") printf ".mso %s\n", yaml["style"]
+ if (yaml["left_footer"] != "") { printf ".as LF \\v'0.5' %s\n", yaml["left_footer"] }
+ if (yaml["right_footer"] != "") { printf ".as RF Revised %s\n", yaml["right_footer"] }
+ if (yaml["title"] != "") { print ".TL"; print yaml["title"] }
+ if (yaml["author"] != "") { print ".AU"; print yaml["author"] }
+ if (yaml["date"] != "") { print ".ND"; print yaml["date"] }
+ if (yaml["institution"] != "") { print ".AI"; print yaml["institution"] }
+}
+
+function neutralize_leading_control(s) {
+ if (s ~ /^[\.\']/) s = "\\&" s
+ return s
+}
+
+function fmt_inline_stream(s, i, n, out, seg) {
+ out = ""
+ n = length(s)
+ i = 1
+
+ while (i <= n) {
+ if (substr(s, i, 1) == "`") {
+ if (!in_code) { out = out "\\f(CW"; in_code = 1 } else { out = out "\\fP"; in_code = 0 }
+ i++
+ continue
+ }
+
+ if (!in_code) {
+ if (i+2 <= n && substr(s, i, 3) == "***") {
+ if (!in_bi && !in_bold && !in_ital) { out = out "\\fB\\fI"; in_bi = 1 }
+ else if (in_bi) { out = out "\\fR"; in_bi = 0 }
+ i += 3
+ continue
+ }
+ if (i+1 <= n && substr(s, i, 2) == "**" && !in_bi && !in_ital) {
+ if (!in_bold) { out = out "\\fB"; in_bold = 1 } else { out = out "\\fR"; in_bold = 0 }
+ i += 2
+ continue
+ }
+ if (substr(s, i, 1) == "*" && !in_bi && !in_bold) {
+ if (!in_ital) { out = out "\\fI"; in_ital = 1 } else { out = out "\\fR"; in_ital = 0 }
+ i++
+ continue
+ }
+ }
+
+ out = out substr(s, i, 1)
+ i++
+ }
+ return neutralize_leading_control(out)
+}
+
+function fmt_inline_once(s, i, n, out) {
+ _b=0; _i=0; _bi=0; _c=0
+ out=""; n=length(s); i=1
+ while (i<=n) {
+ if (substr(s,i,1)=="`") { if(!_c){out=out"\\f(CW";_c=1}else{out=out"\\fP";_c=0}; i++; continue }
+ if(!_c){
+ if(i+2<=n && substr(s,i,3)=="***"){ if(!_bi&&!_b&&!_i){out=out"\\fB\\fI";_bi=1}else if(_bi){out=out"\\fR";_bi=0}; i+=3; continue }
+ if(i+1<=n && substr(s,i,2)=="**" && !_bi && !_i){ if(!_b){out=out"\\fB";_b=1}else{out=out"\\fR";_b=0}; i+=2; continue }
+ if(substr(s,i,1)=="*" && !_bi && !_b){ if(!_i){out=out"\\fI";_i=1}else{out=out"\\fR";_i=0}; i++; continue }
+ }
+ out=out substr(s,i,1); i++
+ }
+ return neutralize_leading_control(out)
+}
+
+function emit_with_links_and_format(s, pre, mac, suf) {
+ if (!has_md_link(s)) { print fmt_inline(s); return }
+ while (split_one_link(s, pre, mac, suf)) {
+ if (length(pre)) print fmt_inline(pre)
+ print mac
+ s = suf
+ }
+ if (length(s)) print fmt_inline(s)
+}
+
+function has_md_link(s) { return match(s, /\[[^][]+\]\([^()]+\)/) }
+
+function split_one_link(s, out, m, label, url) {
+ delete out
+ if (match(s, /\[([^][]+)\]\(([^)]+)\)/, m)) {
+ out["pre"] = substr(s, 1, RSTART-1)
+
+ label = fmt_inline_once(m[1])
+ url = m[2]
+ sub(/^[ \t\r\n]+/, "", url)
+ sub(/[ \t\r\n]+$/, "", url)
+
+ out["mac"] = ".pdfhref W -D " url " \"" escape_quotes(label) "\""
+ out["suf"] = substr(s, RSTART+RLENGTH)
+ return 1
+ }
+ return 0
+}
+
+function emit_with_links_and_stream_format(s, seg, parts) {
+ if (match(s, /\[[^][]+\]\([^()]+\)/) == 0) {
+ seg = fmt_inline_stream(s)
+ print seg
+ return
+ }
+
+ while (split_one_link(s, parts)) {
+ if (length(parts["pre"])) print fmt_inline_stream(parts["pre"])
+ print parts["mac"] # macro line at column 1
+ s = parts["suf"]
+ }
+
+ if (length(s)) print fmt_inline_stream(s)
+}
+
+function is_pipe_row(line) {
+ return line ~ /^[ \t]*\|.*\|[ \t]*$/
+}
+
+function is_divider_row(line, s, parts, n, i, cell) {
+ if (!is_pipe_row(line)) return 0
+ s = line
+ sub(/^[ \t]*\|/, "", s)
+ sub(/\|[ \t]*$/, "", s)
+ n = split(s, parts, /\|/)
+ for (i = 1; i <= n; i++) {
+ cell = trim(parts[i])
+ if (cell !~ /^:?-{3,}:?$/) return 0
+ }
+ return 1
+}
+
+function count_table_cols(line, s) {
+ s = line
+ sub(/^[ \t]*\|/, "", s)
+ sub(/\|[ \t]*$/, "", s)
+ return gsub(/\|/, "|", s) + 1
+}
+
+function split_table_row(line, cells, s, parts, n, i) {
+ s = line
+ sub(/^[ \t]*\|/, "", s)
+ sub(/\|[ \t]*$/, "", s)
+ n = split(s, parts, /\|/)
+ for (i = 1; i <= n; i++) cells[i] = trim(parts[i])
+ return n
+}
+
+function parse_alignment(divider_line, ncols, align, tmp, parts, i, cell) {
+ for (i = 1; i <= ncols; i++) align[i] = "l"
+ if (!is_divider_row(divider_line)) return
+ tmp = divider_line
+ sub(/^[ \t]*\|/, "", tmp)
+ sub(/\|[ \t]*$/, "", tmp)
+ n = split(tmp, parts, /\|/)
+ for (i = 1; i <= ncols; i++) {
+ cell = (i <= n) ? trim(parts[i]) : "---"
+ if (cell ~ /^:.*:$/) align[i] = "c"
+ else if (cell ~ /:$/) align[i] = "r"
+ else align[i] = "l"
+ }
+}
+
+function build_tbl_format_from_align(ncols, align, i, spec) {
+ spec = ""
+ for (i = 1; i <= ncols; i++) {
+ spec = spec ((i == ncols) ? align[i] "|." : align[i] " ")
+ }
+ return spec
+}
+
+function strip_links_to_label_and_format(s, m, pre, lab, url) {
+ while (match(s, /\[([^][]+)\]\(([^)]+)\)/, m)) {
+ pre = substr(s, 1, RSTART-1)
+ lab = fmt_inline_once(m[1])
+ s = pre lab substr(s, RSTART+RLENGTH)
+ }
+ return fmt_inline_stream(s)
+}
+
+function join_cells(cells, n, i, out) {
+ out = ""
+ for (i = 1; i <= n; i++) out = out ((i == 1) ? "" : "|") cells[i]
+ return out
+}
+
+{
+ line = $0
+
+ if (!yaml_done && NR == 1 && line ~ /^---[ \t]*$/) { in_yaml = 1; next }
+
+ if (in_yaml) {
+ if (line ~ /^---[ \t]*$/) { in_yaml = 0; yaml_done = 1; emit_header(); next }
+ if (match(line, /^([A-Za-z0-9_-]+):[ \t]*(.*)$/, m)) {
+ key = m[1]; val = trim(m[2]); yaml[key] = val
+ }
+ next
+ }
+
+ # ================== call-out handling ==================
+ if (match(line, /^> \[!([A-Z]+)\][ \t]+(.*)$/, mh)) {
+ type = mh[1]
+ callout_title = mh[2]
+ callout_glyph = map_glyph(type)
+
+ printf ".IP %s 3\n", callout_glyph
+ print ".B"
+ printf ".UL \"%s\"\n", escape_quotes(fmt_inline_once(callout_title))
+ print ".R"
+
+ in_callout = 1
+ current_level = 0
+ in_bullet_paragraph = 0
+ in_callout_para = 0
+ in_callout_para_rs = 0
+ next
+ }
+
+ if (in_callout && match(line, /^> ([ \t]*)-[ \t]+(.*)$/, mb)) {
+ indent = mb[1]
+ text = mb[2]
+
+ if (in_callout_para_rs) {
+ in_callout_para_rs = 0
+ in_callout_para = 0
+ }
+
+ gsub(/\t/, " ", indent)
+
+ nspaces = length(indent)
+ new_level = int(nspaces / indent_step) + 1
+ if (new_level < 1) new_level = 1
+
+ while (current_level < new_level) { print ".RS"; current_level++ }
+ while (current_level > new_level) { print ".RE"; current_level-- }
+
+ if (new_level == 1) print ".IP \\[bu] 2"
+ else if (new_level == 2) print ".IP \\[hy] 2"
+ else if (new_level == 3) print ".IP \\[pc] 2"
+ else print ".IP \\[bu] 2"
+
+ emit_with_links_and_stream_format(text)
+ in_bullet_paragraph = 1
+ in_callout_para = 0
+ next
+ }
+
+ if (in_callout && in_bullet_paragraph && match(line, /^> +([^-\*0-9].*)$/, cont)) {
+ emit_with_links_and_stream_format(cont[1])
+ next
+ }
+
+ if (in_callout && line ~ /^> *$/) {
+ if (in_callout_para_rs) {
+ print ".RE"
+ in_callout_para_rs = 0
+ }
+ in_callout_para = 0
+ in_bullet_paragraph = 0
+ next
+ }
+
+ if (in_callout && match(line, /^> +(.*)$/, mtxt)) {
+ if (mtxt[1] ~ /^[-\*][ \t]+/ || mtxt[1] ~ /^[0-9]+\.[ \t]+/) {
+ } else {
+ if (current_level > 0) {
+ while (current_level > 0) { print ".RE"; current_level-- }
+ }
+ if (!in_callout_para_rs) {
+ print ".RS"
+ print ".LP"
+ in_callout_para_rs = 1
+ in_callout_para = 1
+ in_bullet_paragraph = 0
+ }
+ emit_with_links_and_stream_format(mtxt[1])
+ next
+ }
+ }
+
+ if (in_callout && line !~ /^>/) {
+ if (in_callout_para_rs) {
+ print ".RE"
+ in_callout_para_rs = 0
+ }
+ while (current_level > 0) { print ".RE"; current_level-- }
+ in_callout = 0
+ in_bullet_paragraph = 0
+ in_callout_para = 0
+ next
+ }
+
+ # ================== table handling (buffered) ==================
+ if (!in_table && is_pipe_row(line)) {
+ in_table = 1
+ table_rowc = 0
+ table_rows[++table_rowc] = line
+ next
+ }
+
+ if (in_table && is_pipe_row(line)) {
+ table_rows[++table_rowc] = line
+ next
+ }
+
+ if (in_table && !is_pipe_row(line)) {
+ header_line = table_rows[1]
+ divider_line = ""
+ data_start = 2
+
+ if (table_rowc >= 2 && is_divider_row(table_rows[2])) {
+ divider_line = table_rows[2]
+ data_start = 3
+ }
+
+ maxcols = 0
+ for (i = 1; i <= table_rowc; i++) {
+ if (is_pipe_row(table_rows[i])) {
+ c = count_table_cols(table_rows[i])
+ if (c > maxcols) maxcols = c
+ }
+ }
+
+ delete align
+ parse_alignment(divider_line, maxcols, align)
+
+ print ".KS"
+ print ".TS"
+ print "tab(|) center;"
+ printf "|"
+ print build_tbl_format_from_align(maxcols, align)
+ print "="
+
+ delete cells
+ n = split_table_row(header_line, cells)
+ if (n < maxcols) for (i = n + 1; i <= maxcols; i++) cells[i] = ""
+ for (i = 1; i <= maxcols; i++) {
+ cells[i] = "\\fB" strip_links_to_label_and_format(cells[i]) "\\fR"
+ }
+ print join_cells(cells, maxcols)
+ print "_"
+
+ for (r = data_start; r <= table_rowc; r++) {
+ if (is_divider_row(table_rows[r])) continue
+ delete cells
+ n = split_table_row(table_rows[r], cells)
+ if (n < maxcols) for (i = n + 1; i <= maxcols; i++) cells[i] = ""
+ if (n > maxcols) n = maxcols
+ for (i = 1; i <= maxcols; i++) {
+ cells[i] = strip_links_to_label_and_format(cells[i])
+ }
+ print join_cells(cells, maxcols)
+ }
+
+ print "="
+ print ".TE"
+ print ".KE"
+
+ in_table = 0
+ table_rowc = 0
+ }
+
+ # ================== Markdown Classics (headers, etc,) ==================
+ if (match(line, /^#([#]*)[ \t]+(.*)$/, mh2)) {
+ level = 1 + length(mh2[1])
+ printf ".HEADSTART %d\n%s\n", level, mh2[2]
+ print ".HEADEND"
+ next
+ }
+
+ if (match(line, /^([ \t]*)-[ \t]+(.*)$/, mb)) {
+ indent = mb[1]
+ text = mb[2]
+
+ gsub(/\t/, " ", indent)
+
+ nspaces = length(indent)
+ new_level = int(nspaces / indent_step)
+ if (new_level < 0) new_level = 0
+
+ while (current_level < new_level) { print ".RS"; current_level++ }
+ while (current_level > new_level) { print ".RE"; current_level-- }
+
+ if (new_level == 0) print ".IP \\[bu] 2"
+ else if (new_level == 1) print ".IP \\[hy] 2"
+ else if (new_level == 2) print ".IP \\[pc] 2"
+ else print ".IP \\[bu] 2"
+
+ emit_with_links_and_stream_format(text)
+ in_bullet_paragraph = 1
+ next
+ }
+
+ if (in_bullet_paragraph && match(line, /^ +([^-\*0-9].*)$/, cont)) {
+ emit_with_links_and_stream_format(cont[1])
+ next
+ }
+
+ if (match(line, /^$/)) in_paragraph = 0
+
+ if (line !~ /^[ \t]*$/) {
+ if (in_bullet_paragraph) {
+ while (current_level > 0) { print ".RE"; current_level-- }
+ in_bullet_paragraph = 0
+ }
+ if (in_paragraph == 1) {
+ emit_with_links_and_stream_format(line)
+ } else {
+ print ".PARA"
+ emit_with_links_and_stream_format(line)
+ in_paragraph = 1
+ }
+ next
+ }
+}
+
+END {
+ if (in_callout) {
+ if (in_callout_para_rs) {
+ print ".RE"
+ in_callout_para_rs = 0
+ }
+ while (current_level > 0) { print ".RE"; current_level-- }
+ }
+}