From: Tucker Johnson Date: Thu, 22 Jan 2026 10:46:28 +0000 (-0500) Subject: init X-Git-Url: https://git.newer.systems/?a=commitdiff_plain;h=HEAD;p=md2ms.git init --- 273478362e78b6da8bd76f1a0d52bf20e736fc72 diff --git a/md2ms b/md2ms new file mode 100755 index 0000000..6ba71ea --- /dev/null +++ b/md2ms @@ -0,0 +1,456 @@ +#!/usr/bin/awk -f + +BEGIN { + in_yaml = 0; yaml_done = 0 + in_callout = 0 + callout_title = ""; callout_glyph = "" + current_level = 0 + in_bullet_paragraph = 0 + in_paragraph = 0 + indent_step = 2 + in_callout_para = 0 + in_callout_para_rs = 0 + in_bold = 0 + in_ital = 0 + in_bi = 0 + in_code = 0 + in_table = 0 + table_rowc = 0 +} + +function trim(s) { sub(/^[ \t\r\n]+/, "", s); sub(/[ \t\r\n]+$/, "", s); return s } +function escape_quotes(s) { gsub(/"/, "\\\"", s); return s } + +function map_glyph(type) { + if (type == "CHECK") return "\\[OK]" + if (type == "NOTE") return "!" + if (type == "TIP") return "\\[rh]" + if (type == "WARNING") return "!!" + if (type == "INFO") return "\\[rh]" + if (type == "SUCCESS") return "\\o'\\[pc]\\[ci]'" + if (type == "SUMMARY") return "\\[sc]" + if (type == "BIB") return "\\[ps]" +} + +function emit_header( s) { + if (yaml["style"] != "") printf ".mso %s\n", yaml["style"] + if (yaml["left_footer"] != "") { printf ".as LF \\v'0.5' %s\n", yaml["left_footer"] } + if (yaml["right_footer"] != "") { printf ".as RF Revised %s\n", yaml["right_footer"] } + if (yaml["title"] != "") { print ".TL"; print yaml["title"] } + if (yaml["author"] != "") { print ".AU"; print yaml["author"] } + if (yaml["date"] != "") { print ".ND"; print yaml["date"] } + if (yaml["institution"] != "") { print ".AI"; print yaml["institution"] } +} + +function neutralize_leading_control(s) { + if (s ~ /^[\.\']/) s = "\\&" s + return s +} + +function fmt_inline_stream(s, i, n, out, seg) { + out = "" + n = length(s) + i = 1 + + while (i <= n) { + if (substr(s, i, 1) == "`") { + if (!in_code) { out = out "\\f(CW"; in_code = 1 } else { out = out "\\fP"; in_code = 0 } + i++ + continue + } + + if (!in_code) { + if (i+2 <= n && substr(s, i, 3) == "***") { + if (!in_bi && !in_bold && !in_ital) { out = out "\\fB\\fI"; in_bi = 1 } + else if (in_bi) { out = out "\\fR"; in_bi = 0 } + i += 3 + continue + } + if (i+1 <= n && substr(s, i, 2) == "**" && !in_bi && !in_ital) { + if (!in_bold) { out = out "\\fB"; in_bold = 1 } else { out = out "\\fR"; in_bold = 0 } + i += 2 + continue + } + if (substr(s, i, 1) == "*" && !in_bi && !in_bold) { + if (!in_ital) { out = out "\\fI"; in_ital = 1 } else { out = out "\\fR"; in_ital = 0 } + i++ + continue + } + } + + out = out substr(s, i, 1) + i++ + } + return neutralize_leading_control(out) +} + +function fmt_inline_once(s, i, n, out) { + _b=0; _i=0; _bi=0; _c=0 + out=""; n=length(s); i=1 + while (i<=n) { + if (substr(s,i,1)=="`") { if(!_c){out=out"\\f(CW";_c=1}else{out=out"\\fP";_c=0}; i++; continue } + if(!_c){ + if(i+2<=n && substr(s,i,3)=="***"){ if(!_bi&&!_b&&!_i){out=out"\\fB\\fI";_bi=1}else if(_bi){out=out"\\fR";_bi=0}; i+=3; continue } + if(i+1<=n && substr(s,i,2)=="**" && !_bi && !_i){ if(!_b){out=out"\\fB";_b=1}else{out=out"\\fR";_b=0}; i+=2; continue } + if(substr(s,i,1)=="*" && !_bi && !_b){ if(!_i){out=out"\\fI";_i=1}else{out=out"\\fR";_i=0}; i++; continue } + } + out=out substr(s,i,1); i++ + } + return neutralize_leading_control(out) +} + +function emit_with_links_and_format(s, pre, mac, suf) { + if (!has_md_link(s)) { print fmt_inline(s); return } + while (split_one_link(s, pre, mac, suf)) { + if (length(pre)) print fmt_inline(pre) + print mac + s = suf + } + if (length(s)) print fmt_inline(s) +} + +function has_md_link(s) { return match(s, /\[[^][]+\]\([^()]+\)/) } + +function split_one_link(s, out, m, label, url) { + delete out + if (match(s, /\[([^][]+)\]\(([^)]+)\)/, m)) { + out["pre"] = substr(s, 1, RSTART-1) + + label = fmt_inline_once(m[1]) + url = m[2] + sub(/^[ \t\r\n]+/, "", url) + sub(/[ \t\r\n]+$/, "", url) + + out["mac"] = ".pdfhref W -D " url " \"" escape_quotes(label) "\"" + out["suf"] = substr(s, RSTART+RLENGTH) + return 1 + } + return 0 +} + +function emit_with_links_and_stream_format(s, seg, parts) { + if (match(s, /\[[^][]+\]\([^()]+\)/) == 0) { + seg = fmt_inline_stream(s) + print seg + return + } + + while (split_one_link(s, parts)) { + if (length(parts["pre"])) print fmt_inline_stream(parts["pre"]) + print parts["mac"] # macro line at column 1 + s = parts["suf"] + } + + if (length(s)) print fmt_inline_stream(s) +} + +function is_pipe_row(line) { + return line ~ /^[ \t]*\|.*\|[ \t]*$/ +} + +function is_divider_row(line, s, parts, n, i, cell) { + if (!is_pipe_row(line)) return 0 + s = line + sub(/^[ \t]*\|/, "", s) + sub(/\|[ \t]*$/, "", s) + n = split(s, parts, /\|/) + for (i = 1; i <= n; i++) { + cell = trim(parts[i]) + if (cell !~ /^:?-{3,}:?$/) return 0 + } + return 1 +} + +function count_table_cols(line, s) { + s = line + sub(/^[ \t]*\|/, "", s) + sub(/\|[ \t]*$/, "", s) + return gsub(/\|/, "|", s) + 1 +} + +function split_table_row(line, cells, s, parts, n, i) { + s = line + sub(/^[ \t]*\|/, "", s) + sub(/\|[ \t]*$/, "", s) + n = split(s, parts, /\|/) + for (i = 1; i <= n; i++) cells[i] = trim(parts[i]) + return n +} + +function parse_alignment(divider_line, ncols, align, tmp, parts, i, cell) { + for (i = 1; i <= ncols; i++) align[i] = "l" + if (!is_divider_row(divider_line)) return + tmp = divider_line + sub(/^[ \t]*\|/, "", tmp) + sub(/\|[ \t]*$/, "", tmp) + n = split(tmp, parts, /\|/) + for (i = 1; i <= ncols; i++) { + cell = (i <= n) ? trim(parts[i]) : "---" + if (cell ~ /^:.*:$/) align[i] = "c" + else if (cell ~ /:$/) align[i] = "r" + else align[i] = "l" + } +} + +function build_tbl_format_from_align(ncols, align, i, spec) { + spec = "" + for (i = 1; i <= ncols; i++) { + spec = spec ((i == ncols) ? align[i] "|." : align[i] " ") + } + return spec +} + +function strip_links_to_label_and_format(s, m, pre, lab, url) { + while (match(s, /\[([^][]+)\]\(([^)]+)\)/, m)) { + pre = substr(s, 1, RSTART-1) + lab = fmt_inline_once(m[1]) + s = pre lab substr(s, RSTART+RLENGTH) + } + return fmt_inline_stream(s) +} + +function join_cells(cells, n, i, out) { + out = "" + for (i = 1; i <= n; i++) out = out ((i == 1) ? "" : "|") cells[i] + return out +} + +{ + line = $0 + + if (!yaml_done && NR == 1 && line ~ /^---[ \t]*$/) { in_yaml = 1; next } + + if (in_yaml) { + if (line ~ /^---[ \t]*$/) { in_yaml = 0; yaml_done = 1; emit_header(); next } + if (match(line, /^([A-Za-z0-9_-]+):[ \t]*(.*)$/, m)) { + key = m[1]; val = trim(m[2]); yaml[key] = val + } + next + } + + # ================== call-out handling ================== + if (match(line, /^> \[!([A-Z]+)\][ \t]+(.*)$/, mh)) { + type = mh[1] + callout_title = mh[2] + callout_glyph = map_glyph(type) + + printf ".IP %s 3\n", callout_glyph + print ".B" + printf ".UL \"%s\"\n", escape_quotes(fmt_inline_once(callout_title)) + print ".R" + + in_callout = 1 + current_level = 0 + in_bullet_paragraph = 0 + in_callout_para = 0 + in_callout_para_rs = 0 + next + } + + if (in_callout && match(line, /^> ([ \t]*)-[ \t]+(.*)$/, mb)) { + indent = mb[1] + text = mb[2] + + if (in_callout_para_rs) { + in_callout_para_rs = 0 + in_callout_para = 0 + } + + gsub(/\t/, " ", indent) + + nspaces = length(indent) + new_level = int(nspaces / indent_step) + 1 + if (new_level < 1) new_level = 1 + + while (current_level < new_level) { print ".RS"; current_level++ } + while (current_level > new_level) { print ".RE"; current_level-- } + + if (new_level == 1) print ".IP \\[bu] 2" + else if (new_level == 2) print ".IP \\[hy] 2" + else if (new_level == 3) print ".IP \\[pc] 2" + else print ".IP \\[bu] 2" + + emit_with_links_and_stream_format(text) + in_bullet_paragraph = 1 + in_callout_para = 0 + next + } + + if (in_callout && in_bullet_paragraph && match(line, /^> +([^-\*0-9].*)$/, cont)) { + emit_with_links_and_stream_format(cont[1]) + next + } + + if (in_callout && line ~ /^> *$/) { + if (in_callout_para_rs) { + print ".RE" + in_callout_para_rs = 0 + } + in_callout_para = 0 + in_bullet_paragraph = 0 + next + } + + if (in_callout && match(line, /^> +(.*)$/, mtxt)) { + if (mtxt[1] ~ /^[-\*][ \t]+/ || mtxt[1] ~ /^[0-9]+\.[ \t]+/) { + } else { + if (current_level > 0) { + while (current_level > 0) { print ".RE"; current_level-- } + } + if (!in_callout_para_rs) { + print ".RS" + print ".LP" + in_callout_para_rs = 1 + in_callout_para = 1 + in_bullet_paragraph = 0 + } + emit_with_links_and_stream_format(mtxt[1]) + next + } + } + + if (in_callout && line !~ /^>/) { + if (in_callout_para_rs) { + print ".RE" + in_callout_para_rs = 0 + } + while (current_level > 0) { print ".RE"; current_level-- } + in_callout = 0 + in_bullet_paragraph = 0 + in_callout_para = 0 + next + } + + # ================== table handling (buffered) ================== + if (!in_table && is_pipe_row(line)) { + in_table = 1 + table_rowc = 0 + table_rows[++table_rowc] = line + next + } + + if (in_table && is_pipe_row(line)) { + table_rows[++table_rowc] = line + next + } + + if (in_table && !is_pipe_row(line)) { + header_line = table_rows[1] + divider_line = "" + data_start = 2 + + if (table_rowc >= 2 && is_divider_row(table_rows[2])) { + divider_line = table_rows[2] + data_start = 3 + } + + maxcols = 0 + for (i = 1; i <= table_rowc; i++) { + if (is_pipe_row(table_rows[i])) { + c = count_table_cols(table_rows[i]) + if (c > maxcols) maxcols = c + } + } + + delete align + parse_alignment(divider_line, maxcols, align) + + print ".KS" + print ".TS" + print "tab(|) center;" + printf "|" + print build_tbl_format_from_align(maxcols, align) + print "=" + + delete cells + n = split_table_row(header_line, cells) + if (n < maxcols) for (i = n + 1; i <= maxcols; i++) cells[i] = "" + for (i = 1; i <= maxcols; i++) { + cells[i] = "\\fB" strip_links_to_label_and_format(cells[i]) "\\fR" + } + print join_cells(cells, maxcols) + print "_" + + for (r = data_start; r <= table_rowc; r++) { + if (is_divider_row(table_rows[r])) continue + delete cells + n = split_table_row(table_rows[r], cells) + if (n < maxcols) for (i = n + 1; i <= maxcols; i++) cells[i] = "" + if (n > maxcols) n = maxcols + for (i = 1; i <= maxcols; i++) { + cells[i] = strip_links_to_label_and_format(cells[i]) + } + print join_cells(cells, maxcols) + } + + print "=" + print ".TE" + print ".KE" + + in_table = 0 + table_rowc = 0 + } + + # ================== Markdown Classics (headers, etc,) ================== + if (match(line, /^#([#]*)[ \t]+(.*)$/, mh2)) { + level = 1 + length(mh2[1]) + printf ".HEADSTART %d\n%s\n", level, mh2[2] + print ".HEADEND" + next + } + + if (match(line, /^([ \t]*)-[ \t]+(.*)$/, mb)) { + indent = mb[1] + text = mb[2] + + gsub(/\t/, " ", indent) + + nspaces = length(indent) + new_level = int(nspaces / indent_step) + if (new_level < 0) new_level = 0 + + while (current_level < new_level) { print ".RS"; current_level++ } + while (current_level > new_level) { print ".RE"; current_level-- } + + if (new_level == 0) print ".IP \\[bu] 2" + else if (new_level == 1) print ".IP \\[hy] 2" + else if (new_level == 2) print ".IP \\[pc] 2" + else print ".IP \\[bu] 2" + + emit_with_links_and_stream_format(text) + in_bullet_paragraph = 1 + next + } + + if (in_bullet_paragraph && match(line, /^ +([^-\*0-9].*)$/, cont)) { + emit_with_links_and_stream_format(cont[1]) + next + } + + if (match(line, /^$/)) in_paragraph = 0 + + if (line !~ /^[ \t]*$/) { + if (in_bullet_paragraph) { + while (current_level > 0) { print ".RE"; current_level-- } + in_bullet_paragraph = 0 + } + if (in_paragraph == 1) { + emit_with_links_and_stream_format(line) + } else { + print ".PARA" + emit_with_links_and_stream_format(line) + in_paragraph = 1 + } + next + } +} + +END { + if (in_callout) { + if (in_callout_para_rs) { + print ".RE" + in_callout_para_rs = 0 + } + while (current_level > 0) { print ".RE"; current_level-- } + } +}