HILEX(1) General
Commands Manual HILEX(1)
hilex
—
syntax highlighter
hilex |
[-t ] [-f
format] [-l
lexer] [-n
name] [-o
opts] [file] |
The hilex
utility syntax highlights the
contents of file or standard input and formats it on
standard output.
The arguments are as follows:
-f
format
- Set the output format. See Output
Formats. The default format is
ansi
.
-l
lexer
- Set the input lexer. See Input
Lexers. The default input lexer is inferred from
name or the first line of input.
-n
name
- Set the name used to infer the input lexer. The default is the final
component of file.
-o
opts
- Set output format options. opts is a comma-separated
list of options. Options for each output format are documented in
Output Formats.
-t
- Default to the
text
input lexer if one cannot be
inferred.
ansi
- Output ANSI terminal control sequences. If standard output is a terminal
and standard input is not being read, output is piped to
PAGER
with LESS=FRX
if it
is not already set.
html
- Output HTML
span
elements with the following classes:
- Op
- operators
- Nu
- numbers
- Ke
- keywords
- Id
- identifiers
- Ma
- macros
- Co
- comments
- St
- strings
- Es
- character escapes
- Fo
- format strings
- Su
- variable substitutions
The options are as follows:
document
- Output an HTML document containing a pre
element.
inline
- Output inline style attributes rather than classes.
pre
- Wrap the output in a pre element with the class
hilex.
style
=url
- With
document
, use the external stylesheet
url. If unset, default styles are included in a
style element.
tab
=n
- With
document
, inline
or pre
, set the
tab-size
property to n.
title
=...
- With
document
, set the title
element text. The default title is the same as
name.
irc
- Output IRC formatting codes. The options are as follows:
monospace
- Use the IRCCloud monospace formatting code.
c
- The C11 language, with minimal support for lex(1), yacc(1) and Objective-C input. Inferred for
*.[chlmy] files.
make
- BSD make(1). Inferred for
Makefile, *.mk and
*.am files.
mdoc
- The mdoc(7) language.
Inferred for *.[1-9] files and files starting with
“.Dd”.
sh
- POSIX sh(1). Since lexical
analysis of the shell command language is effectively impossible, this is
best-effort only. Inferred for *.sh,
.profile, .shrc files and
files starting with “#!/bin/sh”.
text
- Plain text. Inferred for *.txt files.
- The command to pipe ANSI output to if standard output is a terminal. The
default is
PAGER=less
.
hilex.c in git
/* Copyright (C) 2020 June McEnroe <june@causal.agency>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <assert.h>
#include <ctype.h>
#include <err.h>
#include <regex.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/wait.h>
#include <unistd.h>
#include "hilex.h"
#define ARRAY_LEN(a) (sizeof(a) / sizeof(a[0]))
static const char *Class[] = {
#define X(class) [class] = #class,
ENUM_CLASS
#undef X
};
static FILE *yyin;
static char *yytext;
static int yylex(void) {
static size_t cap = 0;
return (getline(&yytext, &cap, yyin) < 0 ? None : Normal);
}
static const struct Lexer LexText = { yylex, &yyin, &yytext };
static const struct {
const struct Lexer *lexer;
const char *name;
const char *namePatt;
const char *linePatt;
} Lexers[] = {
{ &LexC, "c", "[.][chlmy]$", NULL },
{ &LexMake, "make", "[.](mk|am)$|^Makefile$", NULL },
{ &LexMdoc, "mdoc", "[.][1-9]$", "^[.]Dd" },
{ &LexSh, "sh", "[.]sh$|^[.](profile|shrc)$", "^#![ ]?/bin/k?sh" },
{ &LexText, "text", "[.]txt$", NULL },
};
static const struct Lexer *parseLexer(const char *name) {
for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) {
if (!strcmp(name, Lexers[i].name)) return Lexers[i].lexer;
}
errx(1, "unknown lexer %s", name);
}
static void ungets(const char *str, FILE *file) {
size_t len = strlen(str);
for (size_t i = len-1; i < len; --i) {
int ch = ungetc(str[i], file);
if (ch == EOF) errx(1, "cannot push back string");
}
}
static const struct Lexer *matchLexer(const char *name, FILE *file) {
char buf[256];
regex_t regex;
for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) {
int error = regcomp(
®ex, Lexers[i].namePatt, REG_EXTENDED | REG_NOSUB
);
assert(!error);
error = regexec(®ex, name, 0, NULL, 0);
regfree(®ex);
if (!error) return Lexers[i].lexer;
}
char *line = fgets(buf, sizeof(buf), file);
if (!line) return NULL;
for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) {
if (!Lexers[i].linePatt) continue;
int error = regcomp(
®ex, Lexers[i].linePatt, REG_EXTENDED | REG_NOSUB
);
assert(!error);
error = regexec(®ex, line, 0, NULL, 0);
regfree(®ex);
if (!error) {
ungets(line, file);
return Lexers[i].lexer;
}
}
ungets(line, file);
return NULL;
}
#define ENUM_OPTION \
X(Document, "document") \
X(Inline, "inline") \
X(Monospace, "monospace") \
X(Pre, "pre") \
X(Style, "style") \
X(Tab, "tab") \
X(Title, "title")
enum Option {
#define X(option, key) option,
ENUM_OPTION
#undef X
OptionCap,
};
typedef void (const char *opts[]);
typedef void Output(const char *opts[], enum Class class, const char *text);
static bool pager;
static void (const char *opts[]) {
(void)opts;
if (!pager) return;
const char *shell = getenv("SHELL");
const char *pager = getenv("PAGER");
if (!shell) shell = "/bin/sh";
if (!pager) pager = "less";
setenv("LESS", "FRX", 0);
int rw[2];
int error = pipe(rw);
if (error) err(1, "pipe");
pid_t pid = fork();
if (pid < 0) err(1, "fork");
if (!pid) {
dup2(rw[0], STDIN_FILENO);
close(rw[0]);
close(rw[1]);
execl(shell, shell, "-c", pager, NULL);
err(127, "%s", shell);
}
dup2(rw[1], STDOUT_FILENO);
close(rw[0]);
close(rw[1]);
setlinebuf(stdout);
#ifdef __OpenBSD__
error = pledge("stdio", NULL);
if (error) err(1, "pledge");
#endif
}
static void (const char *opts[]) {
(void)opts;
if (!pager) return;
int status;
fclose(stdout);
wait(&status);
}
static const char *SGR[ClassCap] = {
[Keyword] = "37",
[Macro] = "32",
[Comment] = "34",
[String] = "36",
[Format] = "36;1;96",
[Subst] = "33",
};
static void ansiFormat(const char *opts[], enum Class class, const char *text) {
(void)opts;
if (!SGR[class]) {
printf("%s", text);
return;
}
// Set color on each line for piping to less -R:
for (const char *nl; (nl = strchr(text, '\n')); text = &nl[1]) {
printf("\33[%sm%.*s\33[m\n", SGR[class], (int)(nl - text), text);
}
if (*text) printf("\33[%sm%s\33[m", SGR[class], text);
}
static void
debugFormat(const char *opts[], enum Class class, const char *text) {
if (class != Normal) {
printf("%s(", Class[class]);
ansiFormat(opts, class, text);
printf(")");
} else {
printf("%s", text);
}
}
static const char *IRC[ClassCap] = {
[Keyword] = "\00315",
[Macro] = "\0033",
[Comment] = "\0032",
[String] = "\00310",
[Format] = "\00311",
[Subst] = "\0037",
};
static void (const char *opts[]) {
if (opts[Monospace]) printf("\21");
}
static const char *stop(const char *text) {
return (*text == ',' || isdigit(*text) ? "\2\2" : "");
}
static void ircFormat(const char *opts[], enum Class class, const char *text) {
for (const char *nl; (nl = strchr(text, '\n')); text = &nl[1]) {
if (IRC[class]) printf("%s%s", IRC[class], stop(text));
printf("%.*s\n", (int)(nl - text), text);
if (opts[Monospace]) printf("\21");
}
if (*text) {
if (IRC[class]) {
printf("%s%s%s\17", IRC[class], stop(text), text);
if (opts[Monospace]) printf("\21");
} else {
printf("%s", text);
}
}
}
static void htmlEscape(const char *text) {
while (*text) {
switch (*text) {
break; case '"': text++; printf(""");
break; case '&': text++; printf("&");
break; case '<': text++; printf("<");
}
size_t len = strcspn(text, "\"&<");
if (len) fwrite(text, len, 1, stdout);
text += len;
}
}
static const char *Styles[ClassCap] = {
[Keyword] = "color: dimgray;",
[Macro] = "color: green;",
[Comment] = "color: navy;",
[String] = "color: teal;",
[Format] = "color: teal; font-weight: bold;",
[Subst] = "color: olive;",
};
static void styleTabSize(const char *tab) {
printf("-moz-tab-size: ");
htmlEscape(tab);
printf("; tab-size: ");
htmlEscape(tab);
printf(";");
}
static void (const char *opts[]) {
if (!opts[Document]) goto body;
printf("<!DOCTYPE html>\n<title>");
if (opts[Title]) htmlEscape(opts[Title]);
printf("</title>\n");
if (opts[Style]) {
printf("<link rel=\"stylesheet\" href=\"");
htmlEscape(opts[Style]);
printf("\">\n");
} else if (!opts[Inline]) {
printf("<style>\n");
if (opts[Tab]) {
printf("pre.hilex { ");
styleTabSize(opts[Tab]);
printf(" }\n");
}
for (enum Class class = 0; class < ClassCap; ++class) {
if (!Styles[class]) continue;
printf("pre.hilex .%.2s { %s }\n", Class[class], Styles[class]);
}
printf("</style>\n");
}
body:
if ((opts[Document] || opts[Pre]) && opts[Inline] && opts[Tab]) {
printf("<pre class=\"hilex\" style=\"");
styleTabSize(opts[Tab]);
printf("\">");
} else if (opts[Document] || opts[Pre]) {
printf("<pre class=\"hilex\">");
}
}
static void (const char *opts[]) {
if (opts[Document] || opts[Pre]) printf("</pre>");
if (opts[Document]) printf("\n");
}
static void htmlFormat(const char *opts[], enum Class class, const char *text) {
if (class != Normal) {
if (opts[Inline]) {
printf("<span style=\"%s\">", Styles[class] ? Styles[class] : "");
} else {
printf("<span class=\"%.2s\">", Class[class]);
}
htmlEscape(text);
printf("</span>");
} else {
htmlEscape(text);
}
}
static const struct Formatter {
const char *name;
Header *header;
Output *format;
Header *footer;
} Formatters[] = {
{ "ansi", ansiHeader, ansiFormat, ansiFooter },
{ "debug", NULL, debugFormat, NULL },
{ "html", htmlHeader, htmlFormat, htmlFooter },
{ "irc", ircHeader, ircFormat, NULL },
};
static const struct Formatter *parseFormatter(const char *name) {
for (size_t i = 0; i < ARRAY_LEN(Formatters); ++i) {
if (!strcmp(name, Formatters[i].name)) return &Formatters[i];
}
errx(1, "unknown formatter %s", name);
}
static char *const OptionKeys[OptionCap + 1] = {
#define X(option, key) [option] = key,
ENUM_OPTION
#undef X
NULL,
};
int main(int argc, char *argv[]) {
bool text = false;
const char *name = NULL;
const struct Lexer *lexer = NULL;
const struct Formatter *formatter = &Formatters[0];
const char *opts[OptionCap] = {0};
for (int opt; 0 < (opt = getopt(argc, argv, "f:l:n:o:t"));) {
switch (opt) {
break; case 'f': formatter = parseFormatter(optarg);
break; case 'l': lexer = parseLexer(optarg);
break; case 'n': name = optarg;
break; case 'o': {
while (*optarg) {
char *val;
int key = getsubopt(&optarg, OptionKeys, &val);
if (key < 0) errx(1, "no such option %s", val);
opts[key] = (val ? val : "");
}
}
break; case 't': text = true;
break; default: return 1;
}
}
const char *path = "(stdin)";
FILE *file = stdin;
if (optind < argc) {
path = argv[optind];
file = fopen(path, "r");
if (!file) err(1, "%s", path);
pager = isatty(STDOUT_FILENO);
}
#ifdef __OpenBSD__
int error;
if (formatter->header == ansiHeader && pager) {
error = pledge("stdio proc exec", NULL);
} else {
error = pledge("stdio", NULL);
}
if (error) err(1, "pledge");
#endif
if (!name) {
if (NULL != (name = strrchr(path, '/'))) {
name++;
} else {
name = path;
}
}
if (!opts[Title]) opts[Title] = name;
if (!lexer) lexer = matchLexer(name, file);
if (!lexer && text) lexer = &LexText;
if (!lexer) errx(1, "cannot infer lexer for %s", name);
*lexer->in = file;
if (formatter->header) formatter->header(opts);
for (enum Class class; None != (class = lexer->lex());) {
assert(class < ClassCap);
formatter->format(opts, class, *lexer->text);
}
if (formatter->footer) formatter->footer(opts);
}