hare::lex — Hare documentation

hare::lex+x86_64 +linux

lex: lexical analysis for the Hare grammar

hare::lex provides a lexer for Hare source code. A lexer takes an io::handle and returns a series of Hare tokens. See the Hare specification for more details:

https://harelang.org/specification

Index

Types

type annotatefn = fn(id: []str, lex: *lexer, user: nullable *opaque) (void | error);
type flag = enum uint {
	NONE = 0,
	COMMENTS = 1 << 0, // Enables lexing comments.
};
type location = struct {
	path: str,
	line: uint,
	col: uint,
	off: io::off,
};
type ltok = enum uint {
	ATTR_FINI,
	ATTR_INIT,
	ATTR_OFFSET,
	ATTR_PACKED,
	ATTR_SYMBOL,
	ATTR_TEST,
	ATTR_THREADLOCAL,
	UNDERSCORE,
	ABORT,
	ALIGN,
	ALLOC,
	APPEND,
	AS,
	ASSERT,
	BOOL,
	BREAK,
	CASE,
	CONST,
	CONTINUE,
	DEF,
	DEFER,
	DELETE,
	DONE,
	ELSE,
	ENUM,
	EXPORT,
	F32,
	F64,
	FALSE,
	FN,
	FOR,
	FREE,
	I16,
	I32,
	I64,
	I8,
	IF,
	INSERT,
	INT,
	IS,
	LEN,
	LET,
	MATCH,
	NEVER,
	NOMEM,
	NULL,
	NULLABLE,
	OFFSET,
	OPAQUE,
	RETURN,
	RUNE,
	SIZE,
	STATIC,
	STR,
	STRUCT,
	SWITCH,
	TRUE,
	TYPE,
	U16,
	U32,
	U64,
	U8,
	UINT,
	UINTPTR,
	UNION,
	USE,
	VAARG,
	VAEND,
	VALIST,
	VASTART,
	VOID,
	YIELD,
	LAST_KEYWORD = YIELD,
	ARROW,
	BAND,
	BANDEQ,
	BNOT,
	BOR,
	BOREQ,
	BXOR,
	BXOREQ,
	COLON,
	COMMA,
	DIV,
	DIVEQ,
	DOT,
	DOUBLE_COLON,
	DOUBLE_DOT,
	ELLIPSIS,
	EQUAL,
	GT,
	GTEQ,
	LAND,
	LANDEQ,
	LBRACE,
	LBRACKET,
	LEQUAL,
	LESS,
	LESSEQ,
	LNOT,
	LOR,
	LOREQ,
	LPAREN,
	LSHIFT,
	LSHIFTEQ,
	LXOR,
	LXOREQ,
	MINUS,
	MINUSEQ,
	MODEQ,
	MODULO,
	NEQUAL,
	PLUS,
	PLUSEQ,
	QUESTION,
	RBRACE,
	RBRACKET,
	RPAREN,
	RSHIFT,
	RSHIFTEQ,
	SEMICOLON,
	TIMES,
	TIMESEQ,
	LAST_BTOK = TIMESEQ,
	LIT_U8,
	LIT_U16,
	LIT_U32,
	LIT_U64,
	LIT_UINT,
	LIT_SIZE,
	LIT_I8,
	LIT_I16,
	LIT_I32,
	LIT_I64,
	LIT_INT,
	LIT_ICONST,
	LIT_F32,
	LIT_F64,
	LIT_FCONST,
	LIT_RCONST,
	LIT_STR,
	LAST_LITERAL = LIT_STR,
	NAME,
	EOF,
};
type restore_point = struct {
	off: io::off,
	state: lexer,
};
type token = (ltok, value, location);
type value = (str | rune | u64 | f64 | void);

// Undocumented types:
type lexer = struct {
	in: *bufio::scanner,
	path: str,
	loc: (uint, uint, io::off),
	prevrloc: (uint, uint, io::off),
	// ltok::EOF when no token was unlexed
	un: token,
	prevunlocs: [2]((uint, uint, io::off), (uint, uint, io::off)),
	flags: flag,
	comment: str,
	require_int: bool,
	annotations: []([]str, *annotatefn, nullable *opaque),
};

Errors

type error = !(io::error | syntax);
type syntax = !(location, str);

Functions

fn comment(lex: *lexer) str;
fn init(in: *bufio::scanner, path: str, flags: flag = flag::NONE) lexer;
fn lex(lex: *lexer) (token | error);
fn mkloc(lex: *lexer) location;
fn prevloc(lex: *lexer) location;
fn register_annotation(lex: *lexer, id: []str, cb: *annotatefn, user: nullable *opaque) void;
fn restore(lex: *lexer, rp: *restore_point) (void | io::error);
fn save(lex: *lexer) (restore_point | io::error);
fn strerror(err: error) const str;
fn syntaxerr(loc: location, why: str) error;
fn tokstr(tok: token) const str;
fn unlex(lex: *lexer, tok: token) void;

Types

type annotatefn[permalink] [source]

type annotatefn = fn(id: []str, lex: *lexer, user: nullable *opaque) (void | error);

A function which implements an annotation. The state of the lexer at the time of the callback will be immediately following the identifier, and the callback should lex all tokens up to and including the terminating ']'.

#[example::annotation(...)]
                     ^------- start of callback
                           ^- end of callback

The lifetime of the id parameter is only that of the callback's execution duration.

type flag[permalink] [source]

type flag = enum uint {
	NONE = 0,
	COMMENTS = 1 << 0, // Enables lexing comments.
};

Flags which apply to this lexer.

type location[permalink] [source]

type location = struct {
	path: str,
	line: uint,
	col: uint,
	off: io::off,
};

A location within a source file. The path is borrowed from the file name given to the lexer.

type ltok[permalink] [source]

type ltok = enum uint {
	ATTR_FINI,
	ATTR_INIT,
	ATTR_OFFSET,
	ATTR_PACKED,
	ATTR_SYMBOL,
	ATTR_TEST,
	ATTR_THREADLOCAL,
	UNDERSCORE,
	ABORT,
	ALIGN,
	ALLOC,
	APPEND,
	AS,
	ASSERT,
	BOOL,
	BREAK,
	CASE,
	CONST,
	CONTINUE,
	DEF,
	DEFER,
	DELETE,
	DONE,
	ELSE,
	ENUM,
	EXPORT,
	F32,
	F64,
	FALSE,
	FN,
	FOR,
	FREE,
	I16,
	I32,
	I64,
	I8,
	IF,
	INSERT,
	INT,
	IS,
	LEN,
	LET,
	MATCH,
	NEVER,
	NOMEM,
	NULL,
	NULLABLE,
	OFFSET,
	OPAQUE,
	RETURN,
	RUNE,
	SIZE,
	STATIC,
	STR,
	STRUCT,
	SWITCH,
	TRUE,
	TYPE,
	U16,
	U32,
	U64,
	U8,
	UINT,
	UINTPTR,
	UNION,
	USE,
	VAARG,
	VAEND,
	VALIST,
	VASTART,
	VOID,
	YIELD,
	LAST_KEYWORD = YIELD,
	ARROW,
	BAND,
	BANDEQ,
	BNOT,
	BOR,
	BOREQ,
	BXOR,
	BXOREQ,
	COLON,
	COMMA,
	DIV,
	DIVEQ,
	DOT,
	DOUBLE_COLON,
	DOUBLE_DOT,
	ELLIPSIS,
	EQUAL,
	GT,
	GTEQ,
	LAND,
	LANDEQ,
	LBRACE,
	LBRACKET,
	LEQUAL,
	LESS,
	LESSEQ,
	LNOT,
	LOR,
	LOREQ,
	LPAREN,
	LSHIFT,
	LSHIFTEQ,
	LXOR,
	LXOREQ,
	MINUS,
	MINUSEQ,
	MODEQ,
	MODULO,
	NEQUAL,
	PLUS,
	PLUSEQ,
	QUESTION,
	RBRACE,
	RBRACKET,
	RPAREN,
	RSHIFT,
	RSHIFTEQ,
	SEMICOLON,
	TIMES,
	TIMESEQ,
	LAST_BTOK = TIMESEQ,
	LIT_U8,
	LIT_U16,
	LIT_U32,
	LIT_U64,
	LIT_UINT,
	LIT_SIZE,
	LIT_I8,
	LIT_I16,
	LIT_I32,
	LIT_I64,
	LIT_INT,
	LIT_ICONST,
	LIT_F32,
	LIT_F64,
	LIT_FCONST,
	LIT_RCONST,
	LIT_STR,
	LAST_LITERAL = LIT_STR,
	NAME,
	EOF,
};

A lexical token class.

type restore_point[permalink] [source]

type restore_point = struct {
	off: io::off,
	state: lexer,
};

A restore point for a lexer.

type token[permalink] [source]

type token = (ltok, value, location);

A single lexical token.

When ltok is: a literal integer -> value is u64 a literal float -> value is f64 a literal rune -> value is rune a literal str -> value is str a name -> value is str a keyword, operator, symbol, or EOF -> value is void

type value[permalink] [source]

type value = (str | rune | u64 | f64 | void);

A token value, used for tokens such as '1337' (an integer).

type lexer[permalink] [source]

Show undocumented member

type lexer = struct {
	in: *bufio::scanner,
	path: str,
	loc: (uint, uint, io::off),
	prevrloc: (uint, uint, io::off),
	// ltok::EOF when no token was unlexed
	un: token,
	prevunlocs: [2]((uint, uint, io::off), (uint, uint, io::off)),
	flags: flag,
	comment: str,
	require_int: bool,
	annotations: []([]str, *annotatefn, nullable *opaque),
};

Errors

type error[permalink] [source]

type error = !(io::error | syntax);

All possible lexer errors.

type syntax[permalink] [source]

type syntax = !(location, str);

A syntax error.

Functions

fn comment[permalink] [source]

fn comment(lex: *lexer) str;

Returns, and resets, the current value of the comment buffer.

The empty string is returned if the buffer is unset or if flag::COMMENTS was not enabled for this lexer.

fn init[permalink] [source]

fn init(in: *bufio::scanner, path: str, flags: flag = flag::NONE) lexer;

Initializes a new lexer for the given bufio::scanner. The path is borrowed and must remain valid for the lifetime of the lexer.

fn lex[permalink] [source]

fn lex(lex: *lexer) (token | error);

Returns the next token from the lexer.

fn mkloc[permalink] [source]

fn mkloc(lex: *lexer) location;

The lexer's current location.

fn prevloc[permalink] [source]

fn prevloc(lex: *lexer) location;

The location of the previous rune.

fn register_annotation[permalink] [source]

fn register_annotation(lex: *lexer, id: []str, cb: *annotatefn, user: nullable *opaque) void;

Registers an annotation callback with this lexer for the given identifier. The id is borrowed for the lifetime of the lexer. Unregistered annotations are silently discarded by the lexer.

fn restore[permalink] [source]

fn restore(lex: *lexer, rp: *restore_point) (void | io::error);

Restores a lexer to a state previously recorded with save.

fn save[permalink] [source]

fn save(lex: *lexer) (restore_point | io::error);

Saves the state of a lexer, to be restored later with restore. The underlying I/O source must be seekable.

fn strerror[permalink] [source]

fn strerror(err: error) const str;

Returns a human-friendly string for a given error. The result is statically allocated.

fn syntaxerr[permalink] [source]

fn syntaxerr(loc: location, why: str) error;

Builds a lexer syntax error from a location and a reason string.

fn tokstr[permalink] [source]

fn tokstr(tok: token) const str;

Converts a token to its string representation.

fn unlex[permalink] [source]

fn unlex(lex: *lexer, tok: token) void;

Unlex a single token. The next call to lex will return this token. Only one unlex is supported at a time; you must call lex before calling unlex again.