tango.text.Text

License:
BSD style:

Version:
Initial release: December 2005

author:
Kris



Text is a class for managing and manipulating Unicode character arrays.

Text maintains a current "selection", controlled via the select() and search() methods. Each of append(), prepend(), replace() and remove() operate with respect to the selection.

The search() methods also operate with respect to the current selection, providing a means of iterating across matched patterns. To set a selection across the entire content, use the select() method with no arguments.

Indexes and lengths of content always count code units, not code points. This is similar to traditional ascii string handling, yet indexing is rarely used in practice due to the selection idiom: substring indexing is generally implied as opposed to manipulated directly. This allows for a more streamlined model with regard to utf-surrogates.

Strings support a range of functionality, from insert and removal to utf encoding and decoding. There is also an immutable subset called TextView, intended to simplify life in a multi-threaded environment. However, TextView must expose the raw content as needed and thus immutability depends to an extent upon so-called "honour" of a callee. D does not enable immutability enforcement at this time, but this class will be modified to support such a feature when it arrives - via the slice() method.

The class is templated for use with char[], wchar[], and dchar[], and should migrate across encodings seamlessly. In particular, all functions in tango.text.Util are compatible with Text content in any of the supported encodings. In future, this class will become a principal gateway to the extensive ICU unicode library.

Note that several common text operations can be constructed through combining tango.text.Text with tango.text.Util e.g. lines of text can be processed thusly:
auto source = new Text!(char)("one\ntwo\nthree");

foreach (line; Util.lines(source.slice()))
         // do something with line


Speaking a bit like Yoda might be accomplished as follows:
auto dst = new Text!(char);

foreach (element; Util.delims ("all cows eat grass", " "))
         dst.prepend (element);


Below is an overview of the API and class hierarchy:
class Text(T) : TextView!(T)
{
        // set or reset the content
        Text set (T[] chars, bool mutable=true);
        Text set (const(TextView) other, bool mutable=true);

        // retrieve currently selected text
        T[] selection ();

        // set and retrieve current selection point
        Text point (size_t index);
        size_t point ();

        // mark a selection
        Text select (int start=0, int length=int.max);

        // return an iterator to move the selection around.
        // Also exposes "replace all" functionality
        Search search (T chr);
        Search search (T[] pattern);

        // format arguments behind current selection
        Text format (T[] format, ...);

        // append behind current selection
        Text append (T[] text);
        Text append (const(TextView) other);
        Text append (T chr, int count=1);
        Text append (InputStream source);

        // transcode behind current selection
        Text encode (char[]);
        Text encode (wchar[]);
        Text encode (dchar[]);

        // insert before current selection
        Text prepend (T[] text);
        Text prepend (const(TextView) other);
        Text prepend (T chr, int count=1);

        // replace current selection
        Text replace (T chr);
        Text replace (T[] text);
        Text replace (const(TextView) other);

        // remove current selection
        Text remove ();

        // clear content
        Text clear ();

        // trim leading and trailing whitespace
        Text trim ();

        // trim leading and trailing chr instances
        Text strip (T chr);

        // truncate at point, or current selection
        Text truncate (int point = int.max);

        // reserve some space for inserts/additions
        Text reserve (int extra);

        // write content to stream
        Text write (OutputStream sink);
}

class TextView(T) : UniText
{
        // hash content
        hash_t toHash ();

        // return length of content
        size_t length ();

        // compare content
        bool equals  (T[] text);
        bool equals  (const(TextView) other);
        bool ends    (T[] text);
        bool ends    (const(TextView) other);
        bool starts  (T[] text);
        bool starts  (const(TextView) other);
        int compare  (T[] text);
        int compare  (const(TextView) other);
        int opEquals (Object other);
        int opCmp    (Object other);

        // copy content
        T[] copy (T[] dst);

        // return content
        T[] slice ();

        // return data type
        typeinfo encoding ();

        // replace the comparison algorithm
        Comparator comparator (Comparator other);
}

class UniText
{
        // convert content
        abstract char[]  toString   (char[]  dst = null);
        abstract wchar[] toString16 (wchar[] dst = null);
        abstract dchar[] toString32 (dchar[] dst = null);
}

struct Search
{
        // select prior instance
        bool prev();

        // select next instance
        bool next();

        // return instance count
        size_t count();

        // contains instance?
        bool within();

        // replace all with char
        void replace(T);

        // replace all with text (null == remove all)
        void replace(T[]);
}


class Text(T): TextView!(T);
The mutable Text class actually implements the full API, whereas the superclasses are purely abstract (could be interfaces instead).

struct Search(T);
Search Iterator

Search opCall(Text text, const(T)[] match);
Construct a Search instance

bool prev();
Search backward, starting at the character prior to the selection point

bool next();
Search forward, starting just after the currently selected text

bool within();
Returns true if there is a match within the associated text

size_t count();
Returns number of matches within the associated text

void replace(T chr);
Replace all matches with the given character

void replace(const(T)[] sub = null);
Replace all matches with the given substitution

bool locate(Call call, const(T)[] content, size_t from);
locate pattern index and select as appropriate

struct Span;
Selection span

Deprecated:
use point() instead

size_t begin;
index of selection point

size_t length;
length of selection

this(size_t space = 0);
Create an empty Text with the specified available space

Note:
A character like 'a' will be implicitly converted to uint and thus will be accepted for this constructor, making it appear like you can initialize a Text instance with a single character, something which is not supported.

this(T[] content, bool copy);
Create a Text upon the provided content. If said content is immutable (read-only) then you might consider setting the 'copy' parameter to false. Doing so will avoid allocating heap-space for the content until it is modified via Text methods. This can be useful when wrapping an array "temporarily" with a stack-based Text

this(TextViewT other, bool copy = true);
Create a Text via the content of another. If said content is immutable (read-only) then you might consider setting the 'copy' parameter to false. Doing so will avoid allocating heap-space for the content until it is modified via Text methods. This can be useful when wrapping an array temporarily with a stack-based Text

Text set(T[] chars, bool copy);
Set the content to the provided array. Parameter 'copy' specifies whether the given array is likely to change. If not, the array is aliased until such time it is altered via this class. This can be useful when wrapping an array "temporarily" with a stack-based Text.

Also resets the curent selection to null

Text set(TextViewT other, bool copy = true);
Replace the content of this Text. If the new content is immutable (read-only) then you might consider setting the 'copy' parameter to false. Doing so will avoid allocating heap-space for the content until it is modified via one of these methods. This can be useful when wrapping an array "temporarily" with a stack-based Text.

Also resets the curent selection to null

Text select(size_t start = 0, size_t length = (int).max);
Explicitly set the current selection to the given start and length. values are pinned to the content extents

const(T)[] selection();
Return the currently selected content

Span span();
Return the index and length of the current selection

Deprecated:
use point() instead

size_t point();
Return the current selection point

Text point(size_t index);
Set the current selection point, and resets selection length

Search!(T) search(const(T)[] match);
Return a search iterator for a given pattern. The iterator sets the current text selection as appropriate. For example:
auto t = new Text ("hello world");
auto s = t.search ("world");

assert (s.next);
assert (t.selection() == "world");


Replacing patterns operates in a similar fashion:
auto t = new Text ("hello world");
auto s = t.search ("world");

// replace all instances of "world" with "everyone"
assert (s.replace ("everyone"));
assert (s.count is 0);


bool select(T c);
Find and select the next occurrence of a BMP code point in a string. Returns true if found, false otherwise

Deprecated:
use search() instead

bool select(const(TextViewT) other);
Find and select the next substring occurrence. Returns true if found, false otherwise

Deprecated:
use search() instead

bool select(const(T)[] chars);
Find and select the next substring occurrence. Returns true if found, false otherwise

Deprecated:
use search() instead

bool selectPrior(T c);
Find and select a prior occurrence of a BMP code point in a string. Returns true if found, false otherwise

Deprecated:
use search() instead

bool selectPrior(const(TextViewT) other);
Find and select a prior substring occurrence. Returns true if found, false otherwise

Deprecated:
use search() instead

bool selectPrior(const(T)[] chars);
Find and select a prior substring occurrence. Returns true if found, false otherwise

Deprecated:
use search() instead

Text format(const(T)[] format, ...);
Append formatted content to this Text

Text append(const(TextViewT) other);
Append text to this Text

Text append(const(T)[] chars);
Append text to this Text

Text append(T chr, size_t count = 1);
Append a count of characters to this Text

Text append(int v, const(T)[] fmt = null);
Append an integer to this Text

Deprecated:
use format() instead

Text append(long v, const(T)[] fmt = null);
Append a long to this Text

Deprecated:
use format() instead

Text append(double v, int decimals = 2, int e = 10);
Append a double to this Text

Deprecated:
use format() instead

Text append(InputStream source);
Append content from input stream at insertion point. Use tango.io.stream.Utf as a wrapper to perform conversion as necessary

Text prepend(T chr, int count = 1);
Insert characters into this Text

Text prepend(const(T)[] other);
Insert text into this Text

Text prepend(const(TextViewT) other);
Insert another Text into this Text

Text encode(const(char)[] s);
Text encode(const(wchar)[] s);
Text encode(const(dchar)[] s);
Text encode(Object o);
Append encoded text at the current selection point. The text is converted as necessary to the appropritate utf encoding.

Text replace(T chr);
Replace a section of this Text with the specified character

Text replace(const(T)[] chars);
Replace a section of this Text with the specified array

Text replace(const(TextViewT) other);
Replace a section of this Text with another

Text remove();
Remove the selection from this Text and reset the selection to zero length (at the current position)

Text remove(size_t start, size_t count);
Remove the selection from this Text

Text truncate(size_t index = size_t.max);
Truncate this string at an optional index. Default behaviour is to truncate at the current append point. Current selection is moved to the truncation point, with length 0

Text clear();
Clear the string content

Text trim();
Remove leading and trailing whitespace from this Text, and reset the selection to the trimmed content

Text strip(T matches);
Remove leading and trailing matches from this Text, and reset the selection to the stripped content

Text reserve(size_t extra);
Reserve some extra room

Text write(OutputStream sink);
Write content to output stream

TypeInfo encoding();
Get the encoding type

Comparator comparator(Comparator other);
Set the comparator delegate. Where other is null, we behave as a getter only

hash_t toHash();
Hash this Text

size_t length();
Return the length of the valid content

bool equals(const(TextViewT) other);
Is this Text equal to another?

bool equals(const(T)[] other);
Is this Text equal to the provided text?

bool ends(const(TextViewT) other);
Does this Text end with another?

bool ends(const(T)[] chars);
Does this Text end with the specified string?

bool starts(const(TextViewT) other);
Does this Text start with another?

bool starts(const(T)[] chars);
Does this Text start with the specified string?

int compare(const(TextViewT) other);
Compare this Text start with another. Returns 0 if the content matches, less than zero if this Text is "less" than the other, or greater than zero where this Text is "bigger".

int compare(const(T)[] chars);
Compare this Text start with an array. Returns 0 if the content matches, less than zero if this Text is "less" than the other, or greater than zero where this Text is "bigger".

T[] copy(T[] dst);
Return content from this Text

A slice of dst is returned, representing a copy of the content. The slice is clipped to the minimum of either the length of the provided array, or the length of the content minus the stipulated start point

const(T)[] slice();
Return an alias to the content of this TextView. Note that you are bound by honour to leave this content wholly unmolested. D surely needs some way to enforce immutability upon array references

string toString();
wchar[] toString16(wchar[] dst = null);
dchar[] toString32(dchar[] dst = null);
Convert to the UniText types. The optional argument dst will be resized as required to house the conversion. To minimize heap allocation during subsequent conversions, apply the following pattern:
Text  string;

wchar[] buffer;
wchar[] result = string.utf16 (buffer);

if (result.length > buffer.length)
    buffer = result;
You can also provide a buffer from the stack, but the output will be moved to the heap if said buffer is not large enough

int opCmp(Object o);
Compare this Text to another. We compare against other Strings only. Literals and other objects are not supported

bool opEquals(Object o);
bool opEquals(const(T)[] s);
Is this Text equal to the text of something else?

void pinIndex(ref size_t x);
Pin the given index to a valid position.

void pinIndices(ref size_t start, ref size_t length);
Pin the given index and length to a valid position.

int simpleComparator(const(T)[] a, const(T)[] b);
Compare two arrays. Returns 0 if the content matches, less than zero if A is "less" than B, or greater than zero where A is "bigger". Where the substrings match, the shorter is considered "less".

void expand(size_t index, size_t count);
Make room available to insert or append something

Text set(T chr, size_t start, size_t count);
Replace a section of this Text with the specified character

void realloc(size_t count = 0);
Allocate memory due to a change in the content. We handle the distinction between mutable and immutable here.

Text append(const(T)* chars, size_t count);
Internal method to support Text appending

class TextView(T): UniText;
Immutable string

size_t length();
Return the length of the valid content

bool equals(const(TextView) other);
Is this Text equal to another?

bool equals(const(T)[] other);
Is this Text equal to the the provided text?

bool ends(const(TextView) other);
Does this Text end with another?

bool ends(const(T)[] chars);
Does this Text end with the specified string?

bool starts(const(TextView) other);
Does this Text start with another?

bool starts(const(T)[] chars);
Does this Text start with the specified string?

int compare(const(TextView) other);
Compare this Text start with another. Returns 0 if the content matches, less than zero if this Text is "less" than the other, or greater than zero where this Text is "bigger".

int compare(const(T)[] chars);
Compare this Text start with an array. Returns 0 if the content matches, less than zero if this Text is "less" than the other, or greater than zero where this Text is "bigger".

T[] copy(T[] dst);
Return content from this Text. A slice of dst is returned, representing a copy of the content. The slice is clipped to the minimum of either the length of the provided array, or the length of the content minus the stipulated start point

int opCmp(Object o);
Compare this Text to another

bool opEquals(Object other);
Is this Text equal to another?

bool opEquals(const(T)[] other);
Is this Text equal to another?

TypeInfo encoding();
Get the encoding type

Comparator comparator(Comparator other);
Set the comparator delegate

hash_t toHash();
Hash this Text

const(T)[] slice();
Return an alias to the content of this TextView. Note that you are bound by honour to leave this content wholly unmolested. D surely needs some way to enforce immutability upon array references

abstract class UniText;
A string abstraction that converts to anything

Examples:
auto s = new Text!(char);
s = "hello";

auto array = new Array(1024);
s.write (array);
assert (array.slice() == "hello");
s.select (1, 0);
assert (s.append(array) == "hhelloello");

s = "hello";
s.search("hello").next;
assert (s.selection() == "hello");
s.replace ("1");
assert (s.selection() == "1");
assert (s == "1");

assert (s.clear() == "");

assert (s.format("{}", 12345) == "12345");
assert (s.selection() == "12345");

s ~= "fubar";
assert (s.selection() == "12345fubar");
assert (s.search("5").next);
assert (s.selection() == "5");
assert (s.remove() == "1234fubar");
assert (s.search("fubar").next);
assert (s.selection() == "fubar");
assert (s.search("wumpus").next is false);
assert (s.selection() == "");

assert (s.clear().format("{:f4}", 1.2345) == "1.2345");

assert (s.clear().format("{:b}", 0xf0) == "11110000");

assert (s.clear().encode("one"d).toString() == "one");

assert (Util.splitLines(s.clear().append("a\nb").slice()).length is 2);

assert (s.select().replace("almost ") == "almost ");
foreach (element; Util.patterns ("all cows eat grass", "eat", "chew"))
         s.append (element);
assert (s.selection() == "almost all cows chew grass");
assert (s.clear().format("{}:{}", 1, 2) == "1:2");



Page generated by Ddoc. Copyright (c) 2005 Kris Bell. All rights reserved