regex — Hare documentation

regex+x86_64 +linux

The regex module provides an implementation of regular expressions which adheres closely to the POSIX Extended Regular Expressions (ERE) specification.

See https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04

This module refers to a regular expression "match" as a result. The POSIX match disambiguation rules are used; the longest of the leftmost matches is returned. This implementation computes matches in linear time.

Compiling an expression:

const re = regex::compile(`[Hh]a(rriet|ppy)`)!;
defer regex::finish(&re);

Testing an expression against a string:

assert(regex::test(&re, "Harriet is happy"));

Finding a match for an expression in a string:

const result = regex::find(&re, "Harriet is happy");
defer regex::result_free(result);
for (let i = 0z; i < len(result); i += 1) {
	fmt::printf("{} ", result[i].content)!;
};
fmt::println()!;
// -> Harriet rriet

Finding all matches for an expression in a string:

const results = regex::findall(&re, "Harriet is happy");
defer regex::result_freeall(results);
for (let i = 0z; i < len(results); i += 1) {
	for (let j = 0z; j < len(results[i]); j += 1) {
		fmt::printf("{} ", results[i][j].content)!;
	};
	fmt::println()!;
};
// -> Harriet rriet; happy ppy

Replacing matches for an expression:

const re = regex::compile(`happy`)!;
const result = regex::replace(&re, "Harriet is happy", `cute`)!;
// -> Harriet is cute

Replacing with capture group references:

const re = regex::compile(`[a-z]+-([a-z]+)-[a-z]+`)!;
const result = regex::replace(&re, "cat-dog-mouse; apple-pear-plum",
	`\1`)!;
// -> dog; pear

Index

Types

type capture = struct {
	content: str,
	start: size,
	start_bytesize: size,
	end: size,
	end_bytesize: size,
};
type result = []capture;

// Undocumented types:
type charclass = enum {
	ALNUM,
	ALPHA,
	BLANK,
	CNTRL,
	DIGIT,
	GRAPH,
	LOWER,
	PRINT,
	PUNCT,
	SPACE,
	UPPER,
	XDIGIT,
};
type charset = [](charset_lit_item | charset_range_item | charset_class_item);
type charset_class_item = *fn(c: rune) bool;
type charset_lit_item = rune;
type charset_range_item = (u32, u32);
type inst = (inst_lit | inst_any | inst_split | inst_jump | inst_skip | inst_match | inst_charset | inst_groupstart | inst_groupend | inst_repeat);
type inst_any = void;
type inst_charset = struct {
	idx: size,
	is_positive: bool,
};
type inst_groupend = void;
type inst_groupstart = void;
type inst_jump = size;
type inst_lit = rune;
type inst_match = bool;
type inst_repeat = struct {
	id: size,
	origin: size,
	min: (void | size),
	max: (void | size),
};
type inst_skip = void;
type inst_split = size;
type regex = struct {
	insts: []inst,
	charsets: []charset,
	n_reps: size,
};

Errors

type error = !str;

Functions

fn compile(expr: str) (regex | error);
fn find(re: *regex, string: str) result;
fn findall(re: *regex, string: str) []result;
fn finish(re: *regex) void;
fn rawreplace(re: *regex, string: str, targetstr: str) str;
fn rawreplacen(re: *regex, string: str, targetstr: str, n: size) str;
fn replace(re: *regex, string: str, targetstr: str) (str | error);
fn replacen(re: *regex, string: str, targetstr: str, n: size) (str | error);
fn result_free(s: result) void;
fn result_freeall(s: []result) void;
fn strerror(err: error) str;
fn test(re: *regex, string: str) bool;

Types

type capture[link]

type capture = struct {
	content: str,
	start: size,
	start_bytesize: size,
	end: size,
	end_bytesize: size,
};

A (sub)match corresponding to a regular expression's capture group.

type result[link]

type result = []capture;

The resulting match of a regex applied to a string.

The first capture corresponds to the implicit zeroth capture group, i.e. the whole expression.

The rest of the captures correspond to the rest of the capture groups, i.e. the sub-expressions.

type charclass[link]

Show undocumented member

type charclass = enum {
	ALNUM,
	ALPHA,
	BLANK,
	CNTRL,
	DIGIT,
	GRAPH,
	LOWER,
	PRINT,
	PUNCT,
	SPACE,
	UPPER,
	XDIGIT,
};

type charset[link]

Show undocumented member

type charset = [](charset_lit_item | charset_range_item | charset_class_item);

type charset_class_item[link]

Show undocumented member

type charset_class_item = *fn(c: rune) bool;

type charset_lit_item[link]

Show undocumented member

type charset_lit_item = rune;

type charset_range_item[link]

Show undocumented member

type charset_range_item = (u32, u32);

type inst[link]

Show undocumented member

type inst = (inst_lit | inst_any | inst_split | inst_jump | inst_skip | inst_match | inst_charset | inst_groupstart | inst_groupend | inst_repeat);

type inst_any[link]

Show undocumented member

type inst_any = void;

type inst_charset[link]

Show undocumented member

type inst_charset = struct {
	idx: size,
	is_positive: bool,
};

type inst_groupend[link]

Show undocumented member

type inst_groupend = void;

type inst_groupstart[link]

Show undocumented member

type inst_groupstart = void;

type inst_jump[link]

Show undocumented member

type inst_jump = size;

type inst_lit[link]

Show undocumented member

type inst_lit = rune;

type inst_match[link]

Show undocumented member

type inst_match = bool;

type inst_repeat[link]

Show undocumented member

type inst_repeat = struct {
	id: size,
	origin: size,
	min: (void | size),
	max: (void | size),
};

type inst_skip[link]

Show undocumented member

type inst_skip = void;

type inst_split[link]

Show undocumented member

type inst_split = size;

type regex[link]

Show undocumented member

type regex = struct {
	insts: []inst,
	charsets: []charset,
	n_reps: size,
};

Errors

type error[link]

type error = !str;

An error string describing a compilation error.

Functions

fn compile[link]

fn compile(expr: str) (regex | error);

Compiles a regular expression string into a regex.

fn find[link]

fn find(re: *regex, string: str) result;

Attempts to match a regex against a string and returns the longest leftmost match as a result. The caller must free the return value with result_free.

fn findall[link]

fn findall(re: *regex, string: str) []result;

Attempts to match a regex against a string and returns all non-overlapping matches as a slice of results. The caller must free the return value with result_freeall.

fn finish[link]

fn finish(re: *regex) void;

Frees resources associated with a regex.

fn rawreplace[link]

fn rawreplace(re: *regex, string: str, targetstr: str) str;

Replaces all non-overlapping matches of a regular expression against a string with 'targetstr'. 'targetstr' is isn't interpreted in any special way; all backslashes are treated literally. The caller must free the return value.

fn rawreplacen[link]

fn rawreplacen(re: *regex, string: str, targetstr: str, n: size) str;

Replaces up to 'n' non-overlapping matches of a regular expression against a string with 'targetstr', in the same manner as rawreplace. The caller must free the return value.

fn replace[link]

fn replace(re: *regex, string: str, targetstr: str) (str | error);

Replaces all non-overlapping matches of a regular expression against a string with 'targetstr'.

A backslash followed by a single decimal number within 'targetstr' is replaced by the capture at that index (starting at 1), or an empty string if no such capture exists. For example, `\1` is replaced with the first capture, `\2` with the second, etc. `\0` is substituted with the entire substring that was matched. `\\` is replaced with a literal backslash. The caller must free the return value.

An error is only returned if 'targetstr' isn't formatted correctly.

fn replacen[link]

fn replacen(re: *regex, string: str, targetstr: str, n: size) (str | error);

Replaces up to 'n' non-overlapping matches of a regular expression against a string with 'targetstr', in the same manner as replace. The caller must free the return value.

fn result_free[link]

fn result_free(s: result) void;

Frees a result.

fn result_freeall[link]

fn result_freeall(s: []result) void;

Frees a slice of results.

fn strerror[link]

fn strerror(err: error) str;

Converts an error into a user-friendly string.

fn test[link]

fn test(re: *regex, string: str) bool;

Returns whether or not a regex matches any part of a given string.