regex+x86_64 +linux
The regex module provides an implementation of regular expressions which adheres closely to the POSIX Extended Regular Expressions (ERE) specification.
See https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04
This module refers to a regular expression "match" as a result. The POSIX match disambiguation rules are used; the longest of the leftmost matches is returned. This implementation computes matches in linear time.
Compiling an expression:
const re = regex::compile(`[Hh]a(rriet|ppy)`)!; defer regex::finish(&re);
Testing an expression against a string:
assert(regex::test(&re, "Harriet is happy"));
Finding a match for an expression in a string:
const result = regex::find(&re, "Harriet is happy"); defer regex::result_free(result); for (let i = 0z; i < len(result); i += 1) { fmt::printf("{} ", result[i].content)!; }; fmt::println()!; // -> Harriet rriet
Finding all matches for an expression in a string:
const results = regex::findall(&re, "Harriet is happy"); defer regex::result_freeall(results); for (let i = 0z; i < len(results); i += 1) { for (let j = 0z; j < len(results[i]); j += 1) { fmt::printf("{} ", results[i][j].content)!; }; fmt::println()!; }; // -> Harriet rriet; happy ppy
Replacing matches for an expression:
const re = regex::compile(`happy`)!; const result = regex::replace(&re, "Harriet is happy", `cute`)!; // -> Harriet is cute
Replacing with capture group references:
const re = regex::compile(`[a-z]+-([a-z]+)-[a-z]+`)!; const result = regex::replace(&re, "cat-dog-mouse; apple-pear-plum", `\1`)!; // -> dog; pear
Index
Types
type capture = struct { content: str, start: size, start_bytesize: size, end: size, end_bytesize: size, }; type result = []capture; // Undocumented types: type charset = [](charset_lit_item | charset_range_item | charset_class_item); type charset_class_item = (str, *fn(c: rune) bool); type charset_lit_item = rune; type charset_range_item = (u32, u32); type inst = (inst_lit | inst_any | inst_split | inst_jump | inst_skip | inst_match | inst_charset | inst_groupstart | inst_groupend | inst_repeat); type inst_any = void; type inst_charset = struct { idx: size, is_positive: bool, }; type inst_groupend = void; type inst_groupstart = size; type inst_jump = size; type inst_lit = rune; type inst_match = bool; type inst_repeat = struct { id: size, origin: size, min: (void | size), max: (void | size), }; type inst_skip = void; type inst_split = size; type regex = struct { insts: []inst, charsets: []charset, n_reps: size, };
Errors
type error = !str;
Functions
fn compile(expr: str) (regex | error); fn find(re: *regex, string: str) result; fn findall(re: *regex, string: str) []result; fn finish(re: *regex) void; fn rawreplace(re: *regex, string: str, targetstr: str) str; fn rawreplacen(re: *regex, string: str, targetstr: str, n: size) str; fn replace(re: *regex, string: str, targetstr: str) (str | error); fn replacen(re: *regex, string: str, targetstr: str, n: size) (str | error); fn result_free(s: result) void; fn result_freeall(s: []result) void; fn strerror(err: error) str; fn test(re: *regex, string: str) bool;
Types
type capture[link]
type capture = struct { content: str, start: size, start_bytesize: size, end: size, end_bytesize: size, };
A (sub)match corresponding to a regular expression's capture group.
type result[link]
type result = []capture;
The resulting match of a regex applied to a string.
The first capture corresponds to the implicit zeroth capture group, i.e. the whole expression.
The rest of the captures correspond to the rest of the capture groups, i.e. the sub-expressions.
type charset[link]
Show undocumented member
type charset = [](charset_lit_item | charset_range_item | charset_class_item);
type charset_class_item[link]
Show undocumented member
type charset_class_item = (str, *fn(c: rune) bool);
type charset_lit_item[link]
Show undocumented member
type charset_lit_item = rune;
type charset_range_item[link]
Show undocumented member
type charset_range_item = (u32, u32);
type inst[link]
Show undocumented member
type inst = (inst_lit | inst_any | inst_split | inst_jump | inst_skip | inst_match | inst_charset | inst_groupstart | inst_groupend | inst_repeat);
type inst_any[link]
Show undocumented member
type inst_any = void;
type inst_charset[link]
Show undocumented member
type inst_charset = struct { idx: size, is_positive: bool, };
type inst_groupend[link]
Show undocumented member
type inst_groupend = void;
type inst_groupstart[link]
Show undocumented member
type inst_groupstart = size;
type inst_jump[link]
Show undocumented member
type inst_jump = size;
type inst_lit[link]
Show undocumented member
type inst_lit = rune;
type inst_match[link]
Show undocumented member
type inst_match = bool;
type inst_repeat[link]
Show undocumented member
type inst_repeat = struct { id: size, origin: size, min: (void | size), max: (void | size), };
type inst_skip[link]
Show undocumented member
type inst_skip = void;
type inst_split[link]
Show undocumented member
type inst_split = size;
type regex[link]
Show undocumented member
type regex = struct { insts: []inst, charsets: []charset, n_reps: size, };
Errors
type error[link]
type error = !str;
An error string describing a compilation error.
Functions
fn compile[link]
fn compile(expr: str) (regex | error);
Compiles a regular expression string into a regex.
fn find[link]
fn find(re: *regex, string: str) result;
Attempts to match a regex against a string and returns the longest leftmost match as a result. The caller must free the return value with result_free.
fn findall[link]
fn findall(re: *regex, string: str) []result;
Attempts to match a regex against a string and returns all non-overlapping matches as a slice of results. The caller must free the return value with result_freeall.
fn finish[link]
fn finish(re: *regex) void;
Frees resources associated with a regex.
fn rawreplace[link]
fn rawreplace(re: *regex, string: str, targetstr: str) str;
Replaces all non-overlapping matches of a regular expression against a string with 'targetstr'. 'targetstr' is isn't interpreted in any special way; all backslashes are treated literally. The caller must free the return value.
fn rawreplacen[link]
fn rawreplacen(re: *regex, string: str, targetstr: str, n: size) str;
Replaces up to 'n' non-overlapping matches of a regular expression against a string with 'targetstr', in the same manner as rawreplace. The caller must free the return value.
fn replace[link]
fn replace(re: *regex, string: str, targetstr: str) (str | error);
Replaces all non-overlapping matches of a regular expression against a string with 'targetstr'.
A backslash followed by a single decimal number within 'targetstr' is replaced by the capture at that index (starting at 1), or an empty string if no such capture exists. For example, `\1` is replaced with the first capture, `\2` with the second, etc. `\0` is substituted with the entire substring that was matched. `\\` is replaced with a literal backslash. The caller must free the return value.
An error is only returned if 'targetstr' isn't formatted correctly.
fn replacen[link]
fn replacen(re: *regex, string: str, targetstr: str, n: size) (str | error);
Replaces up to 'n' non-overlapping matches of a regular expression against a string with 'targetstr', in the same manner as replace. The caller must free the return value.
fn result_free[link]
fn result_free(s: result) void;
Frees a result.
fn result_freeall[link]
fn result_freeall(s: []result) void;
Frees a slice of results.
fn strerror[link]
fn strerror(err: error) str;
Converts an error into a user-friendly string.
fn test[link]
fn test(re: *regex, string: str) bool;
Returns whether or not a regex matches any part of a given string.