strings — Hare documentation

strings+x86_64 +linux

strings: basic analysis and manipulation of strings

Submodules

template

Index

Types

type end = void;

// Undocumented types:
type iterator = struct {
	dec: utf8::decoder,
	reverse: bool,
};
type tokenizer = bytes::tokenizer;

Functions

fn byteindex(haystack: str, needle: (str | rune)) (size | void);
fn bytesub(s: str, start: size, end: (size | end) = end) (str | utf8::invalid);
fn compare(a: str, b: str) int;
fn concat(strs: str...) (str | nomem);
fn contains(haystack: str, needles: (str | rune)...) bool;
fn cut(in: str, delim: str) (str, str);
fn dup(s: const str) (str | nomem);
fn dupall(strs: []str) ([]str | nomem);
fn freeall(s: []str) void;
fn fromrunes(runes: []rune) (str | nomem);
fn fromutf8(in: []u8) (str | utf8::invalid);
fn fromutf8_unsafe(in: []u8) str;
fn hasprefix(in: str, prefix: (str | rune)) bool;
fn hassuffix(in: str, suff: (str | rune)) bool;
fn index(haystack: str, needle: (str | rune)) (size | void);
fn iter(src: str) iterator;
fn iterstr(iter: *iterator) str;
fn join(delim: str, strs: str...) (str | nomem);
fn lpad(s: str, p: rune, maxlen: size) (str | nomem);
fn ltrim(input: str, trim: rune...) str;
fn multireplace(s: str, repls: (str, str)...) (str | nomem);
fn next(iter: *iterator) (rune | done);
fn next_token(s: *tokenizer) (str | done);
fn peek_token(s: *tokenizer) (str | done);
fn position(iter: *iterator) size;
fn prev(iter: *iterator) (rune | done);
fn rbyteindex(haystack: str, needle: (str | rune)) (size | void);
fn rcut(in: str, delim: str) (str, str);
fn remaining_tokens(s: *tokenizer) str;
fn replace(s: str, needle: str, target: str) (str | nomem);
fn rindex(haystack: str, needle: (str | rune)) (size | void);
fn riter(src: str) iterator;
fn rpad(s: str, p: rune, maxlen: size) (str | nomem);
fn rsplitn(in: str, delim: str, n: size) ([]str | nomem);
fn rtokenize(s: str, delim: str) tokenizer;
fn rtrim(input: str, trim: rune...) str;
fn slice(begin: *iterator, end: *iterator) str;
fn split(in: str, delim: str) ([]str | nomem);
fn splitn(in: str, delim: str, n: size) ([]str | nomem);
fn sub(s: str, start: size, end: (size | end) = end) str;
fn tokenize(s: str, delim: str) tokenizer;
fn torunes(s: str) ([]rune | nomem);
fn toutf8(in: str) []u8;
fn trim(input: str, exclude: rune...) str;
fn trimprefix(input: str, trim: str) str;
fn trimsuffix(input: str, trim: str) str;

Types

type end[permalink] [source]

type end = void;

Passed to strings::sub to make the returned substring span to the end of the original string.

type iterator[permalink] [source]

Show undocumented member

type iterator = struct {
	dec: utf8::decoder,
	reverse: bool,
};

type tokenizer[permalink] [source]

Show undocumented member

type tokenizer = bytes::tokenizer;

Functions

fn byteindex[permalink] [source]

fn byteindex(haystack: str, needle: (str | rune)) (size | void);

Returns the byte-wise index of the first occurance of 'needle' in the 'haystack', or void if not present.

fn bytesub[permalink] [source]

fn bytesub(s: str, start: size, end: (size | end) = end) (str | utf8::invalid);

Returns a substring in the range [start, end - 1], where each argument is the index of the rune at the byte-wise index N. encoding::utf8::invalid is returned if it would cut through the middle of a codepoint. An end argment of end is the same as len(s). The lifetime of the substring is the same as that of the original string.

fn compare[permalink] [source]

fn compare(a: str, b: str) int;

Compares two strings by their Unicode codepoint sort order. Zero is returned if the strings are equal, a negative value if a is less than b, or a positive value if a is greater than b.

If you only want to check two strings for equality, then this function isn't necessary; you can compare the strings directly with ==.

fn concat[permalink] [source]

fn concat(strs: str...) (str | nomem);

Concatenates multiple strings. The caller must free the return value.

fn contains[permalink] [source]

fn contains(haystack: str, needles: (str | rune)...) bool;

Returns true if a string contains a rune or a sub-string, multiple of which can be given.

fn cut[permalink] [source]

fn cut(in: str, delim: str) (str, str);

Returns a string "cut" along the first instance of a delimiter, returning everything up to the delimiter, and everything after the delimiter, in a tuple.

strings::cut("hello=world=foobar", "=")	// ("hello", "world=foobar")
strings::cut("hello world", "=")	// ("hello world", "")

The return value is borrowed from the 'in' parameter. The caller must ensure that 'delim' is not an empty string.

fn dup[permalink] [source]

fn dup(s: const str) (str | nomem);

Duplicates a string. The result must be freed after use.

fn dupall[permalink] [source]

fn dupall(strs: []str) ([]str | nomem);

Creates a copy of a []str slice with all the strings duplicated. The result must be freed using freeall.

fn freeall[permalink] [source]

fn freeall(s: []str) void;

Frees all the strings in a slice and the slice itself. Inverse of dupall.

fn fromrunes[permalink] [source]

fn fromrunes(runes: []rune) (str | nomem);

Returns a string from a slice of runes. The caller must free the return value.

fn fromutf8[permalink] [source]

fn fromutf8(in: []u8) (str | utf8::invalid);

Converts a byte slice into a string. The return value is borrowed from the input. If the slice contains invalid UTF-8 sequences, encoding::utf8::invalid is returned instead.

fn fromutf8_unsafe[permalink] [source]

fn fromutf8_unsafe(in: []u8) str;

Converts a byte slice into a string, but does not test if it is valid UTF-8. This is faster than the safe equivalent, but if the string is not valid UTF-8 it may cause undefined behavior. The return value is borrowed from the input.

fn hasprefix[permalink] [source]

fn hasprefix(in: str, prefix: (str | rune)) bool;

Returns true if 'in' has the given prefix.

fn hassuffix[permalink] [source]

fn hassuffix(in: str, suff: (str | rune)) bool;

Returns true if 'in' has the given suffix.

fn index[permalink] [source]

fn index(haystack: str, needle: (str | rune)) (size | void);

Returns the index of the first occurance of 'needle' in the 'haystack', or void if not present. The index returned is the rune-wise index, not the byte-wise index.

fn iter[permalink] [source]

fn iter(src: str) iterator;

Initializes a string iterator, starting at the beginning of the string. You may copy the iterator to save its state.

let iter = strings::iter("hi!");
strings::next(&iter);	// 'h'
strings::next(&iter);	// 'i'

// Copying the iterator copies its state:
let dup = iter;
strings::next(&iter);	// '!'
strings::next(&iter);	// done
strings::next(&dup);	// '!'
strings::next(&dup);	// done

fn iterstr[permalink] [source]

fn iterstr(iter: *iterator) str;

Returns a substring from the next rune to the end of the string if initialized with iter, or the beginning of the string if initialized with riter.

fn join[permalink] [source]

fn join(delim: str, strs: str...) (str | nomem);

Joins several strings together by placing a delimiter between them. The caller must free the return value.

fn lpad[permalink] [source]

fn lpad(s: str, p: rune, maxlen: size) (str | nomem);

Pads the start of a string 's' with rune 'p' until the string reaches length 'maxlen'. The caller must free the return value.

fn ltrim[permalink] [source]

fn ltrim(input: str, trim: rune...) str;

Returns a string (borrowed from given input string) after trimming off of the start of the input string the characters in the given list of runes. If no runes are given, returns the string with leading whitespace stripped off.

fn multireplace[permalink] [source]

fn multireplace(s: str, repls: (str, str)...) (str | nomem);

Performs a replacement in 's' of each tuple given by 'repls'. Replacement occurs in a single pass of 's', and works like in replace, except that replacement pairs found earlier in 'repls' will take precedence over later ones. For example:

assert(multireplace("hello there", ("e", "a"), ("a", "x"), ("ell", "eww")) == "hallo thara");
assert(multireplace("hello there", ("ell", "eww"), ("e", "a")) == "hewwo thara");

The caller must free the return value.

fn next[permalink] [source]

fn next(iter: *iterator) (rune | done);

Gets the next rune from an iterator, or done if there are none left.

Be aware that a rune is not the minimum lexographical unit of language in Unicode strings. If you use these runes to construct a new string, reordering, editing, or omitting any of the runes without careful discretion may cause linguistic errors to arise. To avoid this, you may need to use a third-party Unicode module instead.

fn next_token[permalink] [source]

fn next_token(s: *tokenizer) (str | done);

Returns the next token from a tokenizer and advances the cursor.

fn peek_token[permalink] [source]

fn peek_token(s: *tokenizer) (str | done);

Returns the next token from a tokenizer without advancing the cursor.

fn position[permalink] [source]

fn position(iter: *iterator) size;

Returns the byte-wise position of the iterator. When possible, it's generally considered more idiomatic to use other functions in this module, such as iterstr and slice.

fn prev[permalink] [source]

fn prev(iter: *iterator) (rune | done);

Gets the previous rune from an iterator, or done when at the start of the string.

fn rbyteindex[permalink] [source]

fn rbyteindex(haystack: str, needle: (str | rune)) (size | void);

Returns the byte-wise index of the last occurance of 'needle' in the 'haystack', or void if not present.

fn rcut[permalink] [source]

fn rcut(in: str, delim: str) (str, str);

Returns a string "cut" along the last instance of a delimiter, returning everything up to the delimiter, and everything after the delimiter, in a tuple.

strings::rcut("hello=world=foobar", "=")	// ("hello=world", "foobar")
strings::rcut("hello world", "=")	// ("hello world", "")

The return value is borrowed from the 'in' parameter. The caller must ensure that 'delim' is not an empty string.

fn remaining_tokens[permalink] [source]

fn remaining_tokens(s: *tokenizer) str;

Returns the remainder of the input string from a tokenizer ahead of the token cursor.

fn replace[permalink] [source]

fn replace(s: str, needle: str, target: str) (str | nomem);

Returns a new string duplicated from 's', but with all instances of 'needle' replaced with 'target'. The caller must free the return value.

fn rindex[permalink] [source]

fn rindex(haystack: str, needle: (str | rune)) (size | void);

Returns the index of the last occurance of 'needle' in the 'haystack', or void if not present. The index returned is the rune-wise index, not the byte-wise index.

fn riter[permalink] [source]

fn riter(src: str) iterator;

Initializes a string iterator, starting at the end of the string and moving backwards with each call to next.

fn rpad[permalink] [source]

fn rpad(s: str, p: rune, maxlen: size) (str | nomem);

Pads the end of a string 's' with rune 'p' until the string reaches length 'maxlen'. The caller must free the return value.

fn rsplitn[permalink] [source]

fn rsplitn(in: str, delim: str, n: size) ([]str | nomem);

Splits a string into tokens delimited by 'delim', starting at the end of the string, and returning a slice of up to N tokens. The caller must free this slice. The strings within the slice are borrowed from 'in'.

The caller must ensure that 'delim' is not an empty string.

fn rtokenize[permalink] [source]

fn rtokenize(s: str, delim: str) tokenizer;

Like tokenize, but tokenizes the string in reverse, such that the first call to next_token returns the last token and the last call returns the first token.

fn rtrim[permalink] [source]

fn rtrim(input: str, trim: rune...) str;

Returns a string (borrowed from given input string) after trimming off of the end of the input string the characters in the given list of runes. If no runes are given, returns the string with trailing whitespace stripped off.

fn slice[permalink] [source]

fn slice(begin: *iterator, end: *iterator) str;

Returns a substring from the position of the first iterator to the position of the second iterator. The iterators must originate from the same string and the position of the second iterator must not be before the position of the first one.

fn split[permalink] [source]

fn split(in: str, delim: str) ([]str | nomem);

Splits a string into tokens delimited by any number of ASCII characters in 'delim' (see tokenize for details).

strings::split("hello world", " ");
// ["hello", "world"]
strings::split("hello;world,foo:bar", ",:;");
// ["hello", "world", "foo", "bar"]

The caller must free the returned slice. The strings within the slice are borrowed from 'in'.

The caller must ensure that 'delim' is not an empty string.

fn splitn[permalink] [source]

fn splitn(in: str, delim: str, n: size) ([]str | nomem);

Splits a string into tokens delimited by 'delim', starting at the beginning of the string, and returning a slice of up to N tokens. The caller must free this slice. The strings within the slice are borrowed from 'in'.

The caller must ensure that 'delim' is not an empty string. The caller must free the result after use.

fn sub[permalink] [source]

fn sub(s: str, start: size, end: (size | end) = end) str;

Returns a substring in the range [start, end - 1], where each argument is the index of the Nth rune. If the end argument is given as end, the end of the substring is the end of the original string. The lifetime of the substring is the same as that of the original string.

Note that substringing rune-wise is not always the correct thing to do, and it may cause unexpected linguistic errors to arise. You may want to use a third-party Unicode module instead.

fn tokenize[permalink] [source]

fn tokenize(s: str, delim: str) tokenizer;

Tokenizes a string, returning an iterator that yields substrings separated by one or more delimiters, such that the string will be split along any of the characters found in "delim". If the string begins with or ends with a delimiter, an empty string is returned respectively as the first and last call to next_token.

Each character of the delimiter string must be an ASCII character (see ascii::valid).

The input string and delimiter string are borrowed from the caller for the lifetime of the tokenizer.

The caller must ensure that at least one delimiter is provided and that the length of the input string is less than types::I64_MAX.

const tok = strings::tokenize("Hello world!\tMy name is Harriet.", " \t");
assert(next_token(&tok) as str == "Hello");
assert(next_token(&tok) as str == "world!");
assert(next_token(&tok) as str == "My");
assert(next_token(&tok) as str == "name");
assert(next_token(&tok) as str == "is");
assert(next_token(&tok) as str == "Harriet");
assert(next_token(&tok) is done);

fn torunes[permalink] [source]

fn torunes(s: str) ([]rune | nomem);

Returns a slice of runes for a string in O(n). The caller must free the return value.

fn toutf8[permalink] [source]

fn toutf8(in: str) []u8;

Converts a string to a UTF-8 byte slice. The return value is borrowed from the input.

fn trim[permalink] [source]

fn trim(input: str, exclude: rune...) str;

Returns a string (borrowed from given input string) after trimming off of the both ends of the input string the characters in the given list of runes. If no runes are given, returns the string with both leading and trailing whitespace stripped off.

fn trimprefix[permalink] [source]

fn trimprefix(input: str, trim: str) str;

Returns a string (borrowed from given input string) after trimming off the given prefix. If the input string doesn't have the given prefix, it is returned unmodified.

fn trimsuffix[permalink] [source]

fn trimsuffix(input: str, trim: str) str;

Returns a string (borrowed from given input string) after trimming off the given suffix. If the input string doesn't have the given suffix, it is returned unmodified.