strings
The strings module provides support functions for working with Hare strings.
Submodules
Index
Types
type end = void;
type iterator = struct {
dec: utf8::decoder,
reverse: bool,
};
type tokenizer = bytes::tokenizer;
Functions
fn byteindex(haystack: str, needle: (str | rune)) (size | void);
fn compare(a: str, b: str) int;
fn concat(strs: str...) str;
fn contains(haystack: str, needles: (str | rune)...) bool;
fn cut(in: str, delim: str) (str, str);
fn dup(s: const str) str;
fn dupall(strs: []str) []str;
fn freeall(s: []str) void;
fn fromrunes(runes: []rune) str;
fn fromutf8(in: []u8) (str | utf8::invalid);
fn fromutf8_unsafe(in: []u8) str;
fn hasprefix(in: str, prefix: (str | rune)) bool;
fn hassuffix(in: str, suff: (str | rune)) bool;
fn index(haystack: str, needle: (str | rune)) (size | void);
fn iter(src: str) iterator;
fn iterstr(iter: *iterator) str;
fn join(delim: str, strs: str...) str;
fn lpad(s: str, p: rune, maxlen: size) str;
fn ltrim(input: str, trim: rune...) str;
fn multireplace(s: str, repls: (str, str)...) str;
fn next(iter: *iterator) (rune | done);
fn next_token(s: *tokenizer) (str | done);
fn peek_token(s: *tokenizer) (str | done);
fn prev(iter: *iterator) (rune | done);
fn rbyteindex(haystack: str, needle: (str | rune)) (size | void);
fn rcut(in: str, delim: str) (str, str);
fn remaining_tokens(s: *tokenizer) str;
fn replace(s: str, needle: str, target: str) str;
fn rindex(haystack: str, needle: (str | rune)) (size | void);
fn riter(src: str) iterator;
fn rpad(s: str, p: rune, maxlen: size) str;
fn rsplitn(in: str, delim: str, n: size) []str;
fn rtokenize(s: str, delim: str) tokenizer;
fn rtrim(input: str, trim: rune...) str;
fn slice(begin: *iterator, end: *iterator) str;
fn split(in: str, delim: str) []str;
fn splitn(in: str, delim: str, n: size) []str;
fn sub(s: str, start: size, end: (size | end) = end) str;
fn tokenize(s: str, delim: str) tokenizer;
fn torunes(s: str) []rune;
fn toutf8(in: str) []u8;
fn trim(input: str, exclude: rune...) str;
fn trimprefix(input: str, trim: str) str;
fn trimsuffix(input: str, trim: str) str;
Types
type end
Show undocumented member
type end = void;
type iterator
Show undocumented member
type iterator = struct {
dec: utf8::decoder,
reverse: bool,
};
type tokenizer
Show undocumented member
type tokenizer = bytes::tokenizer;
Functions
fn byteindex
fn byteindex(haystack: str, needle: (str | rune)) (size | void);
Returns the byte-wise index of the first occurance of 'needle' in the 'haystack', or void if not present.
fn compare
fn compare(a: str, b: str) int;
Compares two strings by their Unicode codepoint sort order. Zero is returned if the strings are equal, a negative value if a is less than b, or a positive value if a is greater than b.
If you only want to check two strings for equality, then this function isn't necessary; you can compare the strings directly with ==.
fn concat
fn concat(strs: str...) str;
Concatenates multiple strings. The caller must free the return value.
fn contains
fn contains(haystack: str, needles: (str | rune)...) bool;
Returns true if a string contains a rune or a sub-string, multiple of which can be given.
fn cut
fn cut(in: str, delim: str) (str, str);
Returns a string "cut" along the first instance of a delimiter, returning everything up to the delimiter, and everything after the delimiter, in a tuple.
strings::cut("hello=world=foobar", "=") // ("hello", "world=foobar")
strings::cut("hello world", "=") // ("hello world", "")
The return value is borrowed from the 'in' parameter. The caller must ensure that 'delim' is not an empty string.
fn dup
fn dup(s: const str) str;
Duplicates a string. Aborts on allocation failure.
fn dupall
fn dupall(strs: []str) []str;
Creates a copy of a []str slice with all the strings duplicated. The result must be freed using freeall.
fn freeall
fn freeall(s: []str) void;
Frees all the strings in a slice and the slice itself. Inverse of dupall.
fn fromrunes
fn fromrunes(runes: []rune) str;
Returns a string from a slice of runes. The caller must free the return value.
fn fromutf8
fn fromutf8(in: []u8) (str | utf8::invalid);
Converts a byte slice into a string. The return value is borrowed from the input. If the slice contains invalid UTF-8 sequences, encoding::utf8::invalid is returned instead.
fn fromutf8_unsafe
fn fromutf8_unsafe(in: []u8) str;
Converts a byte slice into a string, but does not test if it is valid UTF-8. This is faster than the safe equivalent, but if the string is not valid UTF-8 it may cause undefined behavior. The return value is borrowed from the input.
fn hasprefix
fn hasprefix(in: str, prefix: (str | rune)) bool;
Returns true if 'in' has the given prefix.
fn hassuffix
fn hassuffix(in: str, suff: (str | rune)) bool;
Returns true if 'in' has the given suffix.
fn index
fn index(haystack: str, needle: (str | rune)) (size | void);
Returns the index of the first occurance of 'needle' in the 'haystack', or void if not present. The index returned is the rune-wise index, not the byte-wise index.
fn iter
fn iter(src: str) iterator;
Initializes a string iterator, starting at the beginning of the string. You may copy the iterator to save its state.
let iter = strings::iter("hi!");
strings::next(&iter); // 'h'
strings::next(&iter); // 'i'
// Copying the iterator copies its state:
let dup = iter;
strings::next(&iter); // '!'
strings::next(&iter); // done
strings::next(&dup); // '!'
strings::next(&dup); // done
fn iterstr
fn iterstr(iter: *iterator) str;
Return a substring from the next rune to the end of the string if initialized with iter, or the beginning of the string if initialized with riter.
fn join
fn join(delim: str, strs: str...) str;
Joins several strings together by placing a delimiter between them. The caller must free the return value.
fn lpad
fn lpad(s: str, p: rune, maxlen: size) str;
Pads the start of a string 's' with rune 'p' until the string reaches length 'maxlen'. The caller must free the return value.
fn ltrim
fn ltrim(input: str, trim: rune...) str;
Returns a string (borrowed from given input string) after trimming off of the start of the input string the characters in the given list of runes. If no runes are given, returns the string with leading whitespace stripped off.
fn multireplace
fn multireplace(s: str, repls: (str, str)...) str;
Performs a replacement in 's' of each tuple given by 'repls'. Replacement occurs in a single pass of 's', and works like in replace, except that replacement pairs found earlier in 'repls' will take precedence over later ones. For example:
assert(multireplace("hello there", ("e", "a"), ("a", "x"), ("ell", "eww")) == "hallo thara");
assert(multireplace("hello there", ("ell", "eww"), ("e", "a")) == "hewwo thara");
The caller must free the return value.
fn next
fn next(iter: *iterator) (rune | done);
Get the next rune from an iterator, or done if there are none left.
Be aware that a rune is not the minimum lexographical unit of language in Unicode strings. If you use these runes to construct a new string, reordering, editing, or omitting any of the runes without careful discretion may cause linguistic errors to arise. To avoid this, you may need to use a third-party Unicode module instead.
fn next_token
fn next_token(s: *tokenizer) (str | done);
Returns the next token from a tokenizer and advances the cursor.
fn peek_token
fn peek_token(s: *tokenizer) (str | done);
Returns the next token from a tokenizer without advancing the cursor.
fn prev
fn prev(iter: *iterator) (rune | done);
Get the previous rune from an iterator, or done when at the start of the string.
fn rbyteindex
fn rbyteindex(haystack: str, needle: (str | rune)) (size | void);
Returns the byte-wise index of the last occurance of 'needle' in the 'haystack', or void if not present.
fn rcut
fn rcut(in: str, delim: str) (str, str);
Returns a string "cut" along the last instance of a delimiter, returning everything up to the delimiter, and everything after the delimiter, in a tuple.
strings::rcut("hello=world=foobar", "=") // ("hello=world", "foobar")
strings::rcut("hello world", "=") // ("hello world", "")
The return value is borrowed from the 'in' parameter. The caller must ensure that 'delim' is not an empty string.
fn remaining_tokens
fn remaining_tokens(s: *tokenizer) str;
Returns the remainder of the input string from a tokenizer ahead of the token cursor.
fn replace
fn replace(s: str, needle: str, target: str) str;
Returns a new string duplicated from 's', but with all instances of 'needle' replaced with 'target'. The caller must free the return value.
fn rindex
fn rindex(haystack: str, needle: (str | rune)) (size | void);
Returns the index of the last occurance of 'needle' in the 'haystack', or void if not present. The index returned is the rune-wise index, not the byte-wise index.
fn riter
fn riter(src: str) iterator;
Initializes a string iterator, starting at the end of the string and moving backwards with each call to next.
fn rpad
fn rpad(s: str, p: rune, maxlen: size) str;
Pads the end of a string 's' with rune 'p' until the string reaches length 'maxlen'. The caller must free the return value.
fn rsplitn
fn rsplitn(in: str, delim: str, n: size) []str;
Splits a string into tokens delimited by 'delim', starting at the end of the string, and returning a slice of up to N tokens. The caller must free this slice. The strings within the slice are borrowed from 'in'.
The caller must ensure that 'delim' is not an empty string.
fn rtokenize
fn rtokenize(s: str, delim: str) tokenizer;
Like tokenize, but tokenizes the string in reverse, such that the first call to next_token returns the last token and the last call returns the first token.
fn rtrim
fn rtrim(input: str, trim: rune...) str;
Returns a string (borrowed from given input string) after trimming off of the end of the input string the characters in the given list of runes. If no runes are given, returns the string with trailing whitespace stripped off.
fn slice
fn slice(begin: *iterator, end: *iterator) str;
Return a substring from the position of the first iterator to the position of the second iterator. The iterators must originate from the same string and the position of the second iterator must not be before the position of the first one.
fn split
fn split(in: str, delim: str) []str;
Splits a string into tokens delimited by 'delim'. The caller must free the returned slice. The strings within the slice are borrowed from 'in'.
The caller must ensure that 'delim' is not an empty string.
fn splitn
fn splitn(in: str, delim: str, n: size) []str;
Splits a string into tokens delimited by 'delim', starting at the beginning of the string, and returning a slice of up to N tokens. The caller must free this slice. The strings within the slice are borrowed from 'in'.
The caller must ensure that 'delim' is not an empty string.
fn sub
fn sub(s: str, start: size, end: (size | end) = end) str;
Returns a substring in the range [start, end - 1], where each argument is the index of the Nth rune. If the end argument is given as end, the end of the substring is the end of the original string. The lifetime of the substring is the same as that of the original string.
Note that substringing runewise is not always the correct thing to do, and it may cause unexpected linguistic errors to arise. You may want to use a third-party Unicode module instead.
fn tokenize
fn tokenize(s: str, delim: str) tokenizer;
Tokenizes a string, returning an iterator that yields substrings separated by one or more delimiters, such that the string will be split along any of the characters found in "delim". If the string begins with or ends with a delimiter, an empty string is returned respectively as the first and last call to next_token.
Each character of the delimiter string must be an ASCII character (see ascii::valid).
The input string and delimiter string are borrowed from the caller for the lifetime of the tokenizer.
The caller must ensure that at least one delimiter is provided and that the length of the input string is less than types::I64_MAX.
const tok = strings::tokenize("Hello world!\tMy name is Harriet.", " \t");
assert(next_token(&tok) as str == "Hello");
assert(next_token(&tok) as str == "world!");
assert(next_token(&tok) as str == "My");
assert(next_token(&tok) as str == "name");
assert(next_token(&tok) as str == "is");
assert(next_token(&tok) as str == "Harriet");
assert(next_token(&tok) is done);
fn torunes
fn torunes(s: str) []rune;
Returns a slice of runes for a string in O(n). The caller must free the return value.
fn toutf8
fn toutf8(in: str) []u8;
Converts a string to a UTF-8 byte slice. The return value is borrowed from the input.
fn trim
fn trim(input: str, exclude: rune...) str;
Returns a string (borrowed from given input string) after trimming off of the both ends of the input string the characters in the given list of runes. If no runes are given, returns the string with both leading and trailing whitespace stripped off.
fn trimprefix
fn trimprefix(input: str, trim: str) str;
Returns a string (borrowed from given input string) after trimming off the given prefix. If the input string doesn't have the given prefix, it is returned unmodified.
fn trimsuffix
fn trimsuffix(input: str, trim: str) str;
Returns a string (borrowed from given input string) after trimming off the given suffix. If the input string doesn't have the given suffix, it is returned unmodified.