regex
The regex module provides an implementation of regular expressions which adheres closely to the POSIX Extended Regular Expressions (ERE) specification.
See https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04
This module refers to a regular expression "match" as a result. The POSIX match disambiguation rules are used; the longest of the leftmost matches is returned. This implementation computes matches in linear time.
Compiling an expression:
const re = regex::compile(`[Hh]a(rriet|ppy)`)!;
defer regex::finish(&re);
Testing an expression against a string:
assert(regex::test(&re, "Harriet is happy"));
Finding a match for an expression in a string:
const result = regex::find(&re, "Harriet is happy");
defer regex::result_free(result);
for (let i = 0z; i < len(result); i += 1) {
fmt::printf("{} ", result[i].content)!;
};
fmt::println()!;
// -> Harriet rriet
Finding all matches for an expression in a string:
const results = regex::findall(&re, "Harriet is happy");
defer regex::result_freeall(results);
for (let i = 0z; i < len(results); i += 1) {
for (let j = 0z; j < len(results[i]); j += 1) {
fmt::printf("{} ", results[i][j].content)!;
};
fmt::println()!;
};
// -> Harriet rriet; happy ppy
Replacing matches for an expression:
const re = regex::compile(`happy`)!;
const result = regex::replace(&re, "Harriet is happy", `cute`)!;
// -> Harriet is cute
Replacing with capture group references:
const re = regex::compile(`[a-z]+-([a-z]+)-[a-z]+`)!;
const result = regex::replace(&re, "cat-dog-mouse; apple-pear-plum",
`\1`)!;
// -> dog; pear
Index
Types
type capture = struct {
content: str,
start: size,
start_bytesize: size,
end: size,
end_bytesize: size,
};
type result = []capture;
type charclass = enum {
ALNUM,
ALPHA,
BLANK,
CNTRL,
DIGIT,
GRAPH,
LOWER,
PRINT,
PUNCT,
SPACE,
UPPER,
XDIGIT,
};
type charset = [](charset_lit_item | charset_range_item | charset_class_item);
type charset_class_item = *fn(c: rune) bool;
type charset_lit_item = rune;
type charset_range_item = (u32, u32);
type inst = (inst_lit | inst_any | inst_split | inst_jump | inst_skip | inst_match | inst_charset | inst_groupstart | inst_groupend | inst_repeat);
type inst_any = void;
type inst_charset = struct {
idx: size,
is_positive: bool,
};
type inst_groupend = void;
type inst_groupstart = void;
type inst_jump = size;
type inst_lit = rune;
type inst_match = bool;
type inst_repeat = struct {
id: size,
origin: size,
min: (void | size),
max: (void | size),
};
type inst_skip = void;
type inst_split = size;
type regex = struct {
insts: []inst,
charsets: []charset,
n_reps: size,
};
Errors
type error = !str;
Functions
fn compile(expr: str) (regex | error);
fn find(re: *regex, string: str) result;
fn findall(re: *regex, string: str) []result;
fn finish(re: *regex) void;
fn rawreplace(re: *regex, string: str, targetstr: str) str;
fn rawreplacen(re: *regex, string: str, targetstr: str, n: size) str;
fn replace(re: *regex, string: str, targetstr: str) (str | error);
fn replacen(re: *regex, string: str, targetstr: str, n: size) (str | error);
fn result_free(s: result) void;
fn result_freeall(s: []result) void;
fn strerror(err: error) str;
fn test(re: *regex, string: str) bool;
Types
type capture
type capture = struct {
content: str,
start: size,
start_bytesize: size,
end: size,
end_bytesize: size,
};
A (sub)match corresponding to a regular expression's capture group.
type result
type result = []capture;
The resulting match of a regex applied to a string.
The first capture corresponds to the implicit zeroth capture group, i.e. the whole expression.
The rest of the captures correspond to the rest of the capture groups, i.e. the sub-expressions.
type charclass
Show undocumented member
type charclass = enum {
ALNUM,
ALPHA,
BLANK,
CNTRL,
DIGIT,
GRAPH,
LOWER,
PRINT,
PUNCT,
SPACE,
UPPER,
XDIGIT,
};
type charset
Show undocumented member
type charset = [](charset_lit_item | charset_range_item | charset_class_item);
type charset_class_item
Show undocumented member
type charset_class_item = *fn(c: rune) bool;
type charset_lit_item
Show undocumented member
type charset_lit_item = rune;
type charset_range_item
Show undocumented member
type charset_range_item = (u32, u32);
type inst
Show undocumented member
type inst = (inst_lit | inst_any | inst_split | inst_jump | inst_skip | inst_match | inst_charset | inst_groupstart | inst_groupend | inst_repeat);
type inst_any
Show undocumented member
type inst_any = void;
type inst_charset
Show undocumented member
type inst_charset = struct {
idx: size,
is_positive: bool,
};
type inst_groupend
Show undocumented member
type inst_groupend = void;
type inst_groupstart
Show undocumented member
type inst_groupstart = void;
type inst_jump
Show undocumented member
type inst_jump = size;
type inst_lit
Show undocumented member
type inst_lit = rune;
type inst_match
Show undocumented member
type inst_match = bool;
type inst_repeat
Show undocumented member
type inst_repeat = struct {
id: size,
origin: size,
min: (void | size),
max: (void | size),
};
type inst_skip
Show undocumented member
type inst_skip = void;
type inst_split
Show undocumented member
type inst_split = size;
type regex
Show undocumented member
type regex = struct {
insts: []inst,
charsets: []charset,
n_reps: size,
};
Errors
type error
type error = !str;
An error string describing a compilation error.
Functions
fn compile
fn compile(expr: str) (regex | error);
Compiles a regular expression string into a regex.
fn find
fn find(re: *regex, string: str) result;
Attempts to match a regex against a string and returns the longest leftmost match as a result. The caller must free the return value with result_free.
fn findall
fn findall(re: *regex, string: str) []result;
Attempts to match a regex against a string and returns all non-overlapping matches as a slice of results. The caller must free the return value with result_freeall.
fn finish
fn finish(re: *regex) void;
Frees resources associated with a regex.
fn rawreplace
fn rawreplace(re: *regex, string: str, targetstr: str) str;
Replaces all non-overlapping matches of a regular expression against a string with 'targetstr'. 'targetstr' is isn't interpreted in any special way; all backslashes are treated literally. The caller must free the return value.
fn rawreplacen
fn rawreplacen(re: *regex, string: str, targetstr: str, n: size) str;
Replaces up to 'n' non-overlapping matches of a regular expression against a string with 'targetstr', in the same manner as rawreplace. The caller must free the return value.
fn replace
fn replace(re: *regex, string: str, targetstr: str) (str | error);
Replaces all non-overlapping matches of a regular expression against a string with 'targetstr'.
A backslash followed by a single decimal number within 'targetstr' is replaced by the capture at that index (starting at 1), or an empty string if no such capture exists. For example, `\1` is replaced with the first capture, `\2` with the second, etc. `\0` is substituted with the entire substring that was matched. `\\` is replaced with a literal backslash. The caller must free the return value.
An error is only returned if 'targetstr' isn't formatted correctly.
fn replacen
fn replacen(re: *regex, string: str, targetstr: str, n: size) (str | error);
Replaces up to 'n' non-overlapping matches of a regular expression against a string with 'targetstr', in the same manner as replace. The caller must free the return value.
fn result_free
fn result_free(s: result) void;
Frees a result.
fn result_freeall
fn result_freeall(s: []result) void;
Frees a slice of results.
fn strerror
fn strerror(err: error) str;
Converts an error into a user-friendly string.
fn test
fn test(re: *regex, string: str) bool;
Returns whether or not a regex matches any part of a given string.