/**************************************************************************** ** ** This file is part of GAP, a system for computational discrete algebra. ** ** Copyright of GAP belongs to its developers, whose names are too numerous ** to list here. Please refer to the COPYRIGHT file for details. ** ** SPDX-License-Identifier: GPL-2.0-or-later ** ** This file contains the functions which mainly deal with strings. ** ** A *string* is a list that has no holes, and whose elements are all ** characters. For the full definition of strings see chapter "Strings" in ** the {\GAP} manual. Read also "More about Strings" about the string flag ** and the compact representation of strings. ** ** A list that is known to be a string is represented by a bag of type ** 'T_STRING', which has the following format: ** ** +--------+----+----+- - - -+----+----+ ** |length |1st |2nd | |last|null| ** |as UInt |char|char| |char|char| ** +--------+----+----+- - - -+----+----+ ** ** Each entry is a single character (of C type 'unsigned char'). The last ** entry in the bag is the null character ('\0'), which terminates C ** strings. We add this null character although the length is stored in the ** object. This allows to use C routines with strings directly with null ** character free strings (e.g., filenames). ** ** Note that a list represented by a bag of type 'T_PLIST' might still be a ** string. It is just that the kernel does not know this. ** ** This package consists of three parts. ** ** The first part consists of the functions 'NEW_STRING', 'CHARS_STRING' (or ** 'CSTR_STRING'), 'GET_LEN_STRING', 'SET_LEN_STRING', and more. These and ** the functions below use the detailed knowledge about the representation ** of strings. ** ** The second part consists of the functions 'LenString', 'ElmString', ** 'ElmsStrings', 'AssString', 'AsssString', PlainString', ** and 'IsPossString'. They are the functions required by the generic lists ** package. Using these functions the other parts of the {\GAP} kernel can ** access and modify strings without actually being aware that they are ** dealing with a string. ** ** The third part consists of the functions 'PrintString', which is called ** by 'FuncPrint', and 'IsString', which test whether an arbitrary list is a ** string, and if so converts it into the above format.
*/
/**************************************************************************** ** *V ObjsChar[<chr>] . . . . . . . . . . . . . . . . table of character values ** ** 'ObjsChar' contains all the character values. That way we do not need to ** allocate new bags for new characters.
*/
Obj ObjsChar [256];
/**************************************************************************** ** *F TypeChar( <chr> ) . . . . . . . . . . . . . . . type of a character value ** ** 'TypeChar' returns the type of the character <chr>. ** ** 'TypeChar' is the function in 'TypeObjFuncs' for character values.
*/ static Obj TYPE_CHAR;
/**************************************************************************** ** *F FuncSTRING_SINTLIST( <self>, <string> ) string by signed integer list
*/ static Obj FuncSTRING_SINTLIST(Obj self, Obj val)
{
UInt l,i; Int low, inc;
Obj n;
UInt1 *p;
/* there should be a test here, but how do I check cheaply for list of
* integers ? */
// general code if (!IS_RANGE(val) && !IS_PLIST(val)) {
again:
RequireArgument(SELF_NAME, val, "must be a plain list of small integers or a range");
} if (! IS_RANGE(val) ) {
l=LEN_PLIST(val);
n=NEW_STRING(l);
p=CHARS_STRING(n); for (i=1;i<=l;i++) {
Obj x = ELM_PLIST(val,i); if (!IS_INTOBJ(x)) goto again;
*p++=CHAR_SINT(INT_INTOBJ(x));
}
} else {
l=GET_LEN_RANGE(val);
low=GET_LOW_RANGE(val);
inc=GET_INC_RANGE(val);
n=NEW_STRING(l);
p=CHARS_STRING(n); for (i=1;i<=l;i++) {
*p++=CHAR_SINT(low);
low=low+inc;
}
}
return n;
}
/**************************************************************************** ** *F FuncREVNEG_STRING( <self>, <string> ) string by signed integer list
*/ static Obj FuncREVNEG_STRING(Obj self, Obj val)
{
UInt l,i,j;
Obj n; const UInt1 *p;
UInt1 *q;
// test whether val is a string, convert to compact rep if necessary
RequireStringRep(SELF_NAME, val);
/**************************************************************************** ** *F NEW_STRING( <len> ) . . . returns new string with length <len>, first ** character and "first behind last" set to zero **
*/
Obj NEW_STRING(Int len)
{
GAP_ASSERT(len >= 0); if (len > INT_INTOBJ_MAX) {
ErrorQuit("NEW_STRING: length must be a small integer", 0, 0);
}
Obj res = NewBag(T_STRING, SIZEBAG_STRINGLEN(len));
SET_LEN_STRING(res, len); return res;
}
/**************************************************************************** ** *F GrowString(<list>,<len>) . . . . . . make sure a string is large enough ** ** returns the new length, but doesn't set SET_LEN_STRING.
*/ Int GrowString (
Obj list,
UInt need )
{
UInt len; // new physical length
UInt good; // good new physical length
if (need > INT_INTOBJ_MAX)
ErrorMayQuit("GrowString: string length too large", 0, 0);
// find out how large the data area should become
good = 5 * (GET_LEN_STRING(list)+3) / 4 + 1; if (good > INT_INTOBJ_MAX)
good = INT_INTOBJ_MAX;
// but maybe we need more if ( need < good ) { len = good; } else { len = need; }
// resize the bag
ResizeBag( list, SIZEBAG_STRINGLEN(len) );
// return the new maximal length return (Int) len;
}
/**************************************************************************** ** *F TypeString(<list>) . . . . . . . . . . . . . . . . . . type of a string ** ** 'TypeString' returns the type of the string <list>. ** ** 'TypeString' is the function in 'TypeObjFuncs' for strings.
*/ static Obj TYPE_STRING_MUTABLE; static Obj TYPE_STRING_IMMUTABLE; static Obj TYPE_STRING_NSORT_MUTABLE; static Obj TYPE_STRING_NSORT_IMMUTABLE; static Obj TYPE_STRING_SSORT_MUTABLE; static Obj TYPE_STRING_SSORT_IMMUTABLE;
/**************************************************************************** ** *F CopyString( <list>, <mut> ) . . . . . . . . . . . . . . . . copy a string ** ** 'CopyString' returns a structural (deep) copy of the string <list>, i.e., ** a recursive copy that preserves the structure. ** ** If <list> has not yet been copied, it makes a copy, leaves a forward ** pointer to the copy in the first entry of the string, where the size of ** the string usually resides, and copies all the entries. If the string ** has already been copied, it returns the value of the forwarding pointer. ** ** 'CopyString' is the function in 'CopyObjFuncs' for strings.
*/ static Obj CopyString(Obj list, Int mut)
{
Obj copy; // handle of the copy, result
// immutable input is handled by COPY_OBJ
GAP_ASSERT(IS_MUTABLE_OBJ(list));
// make object for copy
copy = NewBag(TNUM_OBJ(list), SIZE_OBJ(list)); if (!mut)
MakeImmutableNoRecurse(copy);
ADDR_OBJ(copy)[0] = CONST_ADDR_OBJ(list)[0];
// leave a forwarding pointer
PrepareCopy(list, copy);
// copy the subvalues
memcpy(ADDR_OBJ(copy)+1, CONST_ADDR_OBJ(list)+1,
SIZE_OBJ(list)-sizeof(Obj) );
/**************************************************************************** ** *F PrintString(<list>) . . . . . . . . . . . . . . . . . . . print a string *F FuncVIEW_STRING_FOR_STRING(<list>) . . . . . . view a string as a string ** ** 'PrintString' prints the string with the handle <list>. ** 'VIEW_STRING_FOR_STRING' returns a string containing what PrintString ** outputs. ** ** No linebreaks are allowed, if one must be inserted anyhow, it must ** be escaped by a backslash '\', which is done in 'Pr'. ** ** The buffer 'PrStrBuf' is used to protect 'Pr' against garbage collections ** caused by printing to string streams, which might move the body of list. ** ** The output uses octal number notation for non-ascii or non-printable ** characters. The function can be used to print *any* string in a way ** which can be read in by GAP afterwards.
*/
// Type of function given to OutputStringGeneric typedefvoid StringOutputterType(void * data, char * strbuf, UInt len);
/**************************************************************************** ** *F PrintString1(<list>) . . . . . . . . . . . . print a string for 'Print' ** ** 'PrintString1' prints the string constant in the format used by the ** 'Print' and 'PrintTo' function.
*/ void PrintString1 (
Obj list )
{
Pr("%g", (Int)list, 0);
}
/**************************************************************************** ** *F EqString(<listL>,<listR>) . . . . . . . . test whether strings are equal ** ** 'EqString' returns 'true' if the two strings <listL> and <listR> are ** equal and 'false' otherwise.
*/ staticInt EqString(Obj listL, Obj listR)
{
UInt lL, lR; const UInt1 *pL, *pR;
lL = GET_LEN_STRING(listL);
lR = GET_LEN_STRING(listR); if (lR != lL) return 0;
pL = CONST_CHARS_STRING(listL);
pR = CONST_CHARS_STRING(listR); return memcmp(pL, pR, lL) == 0;
}
/**************************************************************************** ** *F LtString(<listL>,<listR>) . test whether one string is less than another ** ** 'LtString' returns 'true' if the string <listL> is less than the string ** <listR> and 'false' otherwise.
*/ staticInt LtString(Obj listL, Obj listR)
{
UInt lL, lR; const UInt1 *pL, *pR;
lL = GET_LEN_STRING(listL);
lR = GET_LEN_STRING(listR);
pL = CONST_CHARS_STRING(listL);
pR = CONST_CHARS_STRING(listR);
Int res; if (lL <= lR) {
res = memcmp(pL, pR, lL); if (res == 0) return lL < lR;
} else {
res = memcmp(pL, pR, lR); if (res == 0) return 0;
} return res < 0;
}
/**************************************************************************** ** *F LenString(<list>) . . . . . . . . . . . . . . . . . . length of a string ** ** 'LenString' returns the length of the string <list> as a C integer. ** ** 'LenString' is the function in 'LenListFuncs' for strings.
*/ staticInt LenString(Obj list)
{ return GET_LEN_STRING( list );
}
/**************************************************************************** ** *F IsbString(<list>,<pos>) . . . . . . . . . test for an element of a string ** ** 'IsbString' returns 1 if the string <list> contains ** a character at the position <pos> and 0 otherwise. ** It can rely on <pos> being a positive integer. ** ** 'IsbString' is the function in 'IsbListFuncs' for strings.
*/ staticBOOL IsbString(Obj list, Int pos)
{ // since strings are dense, this must only test for the length return (pos <= GET_LEN_STRING(list));
}
/**************************************************************************** ** *F GET_ELM_STRING( <list>, <pos> ) . . . . . . select an element of a string ** ** 'GET_ELM_STRING' returns the <pos>-th element of the string <list>. ** <pos> must be a positive integer less than or equal to the length of ** <list>.
*/ staticinline Obj GET_ELM_STRING(Obj list, Int pos)
{
GAP_ASSERT(IS_STRING_REP(list));
GAP_ASSERT(pos > 0);
GAP_ASSERT((UInt) pos <= GET_LEN_STRING(list));
UChar c = CONST_CHARS_STRING(list)[pos - 1]; return ObjsChar[c];
}
/**************************************************************************** ** *F SET_ELM_STRING( <list>, <pos>, <val> ) . . . . set a character of a string ** ** 'SET_ELM_STRING' sets the <pos>-th character of the string <list>. ** <val> must be a character and <list> stay a string after the assignment.
*/ staticinlinevoid SET_ELM_STRING(Obj list, Int pos, Obj val)
{
GAP_ASSERT(IS_STRING_REP(list));
GAP_ASSERT(pos > 0);
GAP_ASSERT((UInt) pos <= GET_LEN_STRING(list));
GAP_ASSERT(TNUM_OBJ(val) == T_CHAR);
UChar * ptr = CHARS_STRING(list) + (pos - 1);
*ptr = CHAR_VALUE(val);
}
/**************************************************************************** ** *F Elm0String(<list>,<pos>) . . . . . . . . . select an element of a string *F Elm0vString(<list>,<pos>) . . . . . . . . . select an element of a string ** ** 'Elm0String' returns the element at the position <pos> of the string ** <list>, or returns 0 if <list> has no assigned object at <pos>. ** It can rely on <pos> being a positive integer. ** ** 'Elm0vString' does the same thing as 'Elm0String', but it can ** also rely on <pos> not being larger than the length of <list>. ** ** 'Elm0String' is the function on 'Elm0ListFuncs' for strings. ** 'Elm0vString' is the function in 'Elm0vListFuncs' for strings.
*/ static Obj Elm0String(Obj list, Int pos)
{ if ( pos <= GET_LEN_STRING( list ) ) { return GET_ELM_STRING( list, pos );
} else { return 0;
}
}
/**************************************************************************** ** *F ElmString(<list>,<pos>) . . . . . . . . . . select an element of a string *F ElmvString(<list>,<pos>) . . . . . . . . . select an element of a string ** ** 'ElmString' returns the element at the position <pos> of the string ** <list>, or signals an error if <list> has no assigned object at <pos>. ** It can rely on <pos> being a positive integer. ** ** 'ElmvString' does the same thing as 'ElmString', but it can ** also rely on <pos> not being larger than the length of <list>. ** ** 'ElmwString' does the same thing as 'ElmString', but it can ** also rely on <list> having an assigned object at <pos>. ** ** 'ElmString' is the function in 'ElmListFuncs' for strings. ** 'ElmfString' is the function in 'ElmfListFuncs' for strings. ** 'ElmwString' is the function in 'ElmwListFuncs' for strings.
*/ static Obj ElmString(Obj list, Int pos)
{ // check the position if ( GET_LEN_STRING( list ) < pos ) {
ErrorMayQuit("List Element: [%d] must have an assigned value",
(Int)pos, 0);
}
// return the selected element return GET_ELM_STRING( list, pos );
}
#define ElmvString Elm0vString
#define ElmwString Elm0vString
/**************************************************************************** ** *F ElmsString(<list>,<poss>) . . . . . . . . select a sublist from a string ** ** 'ElmsString' returns a new list containing the elements at the positions ** given in the list <poss> from the string <list>. It is the ** responsibility of the called to ensure that <poss> is dense and contains ** only positive integers. An error is signalled if an element of <poss> is ** larger than the length of <list>. ** ** 'ElmsString' is the function in 'ElmsListFuncs' for strings.
*/ static Obj ElmsString(Obj list, Obj poss)
{
Obj elms; // selected sublist, result Int lenList; // length of <list> Char elm; // one element from <list> Int lenPoss; // length of <positions> Int pos; // <position> as integer Int inc; // increment in a range Int i; // loop variable
// general code if ( ! IS_RANGE(poss) ) {
// get the length of <list>
lenList = GET_LEN_STRING( list );
// get the length of <positions>
lenPoss = LEN_LIST( poss );
// make the result list
elms = NEW_STRING( lenPoss );
// loop over the entries of <positions> and select for ( i = 1; i <= lenPoss; i++ ) {
// get <position>
Obj p = ELMW_LIST(poss, i); if (!IS_INTOBJ(p)) {
ErrorMayQuit("List Elements: position is too large for " "this type of list",
0, 0);
}
pos = INT_INTOBJ(p);
// select the element if ( lenList < pos ) {
ErrorMayQuit( "List Elements: [%d] must have an assigned value",
(Int)pos, 0);
}
// select the element
elm = CONST_CHARS_STRING(list)[pos-1];
// assign the element into <elms>
CHARS_STRING(elms)[i-1] = elm;
}
}
// special code for ranges else {
// get the length of <list>
lenList = GET_LEN_STRING( list );
// get the length of <positions>, the first elements, and the inc.
lenPoss = GET_LEN_RANGE( poss );
pos = GET_LOW_RANGE( poss );
inc = GET_INC_RANGE( poss );
// check that no <position> is larger than 'LEN_LIST(<list>)' if ( lenList < pos ) {
ErrorMayQuit( "List Elements: [%d] must have an assigned value",
(Int)pos, 0);
} if ( lenList < pos + (lenPoss-1) * inc ) {
ErrorMayQuit( "List Elements: [%d] must have an assigned value",
(Int)(pos + (lenPoss - 1) * inc), 0);
}
// make the result list
elms = NEW_STRING( lenPoss );
// loop over the entries of <positions> and select const UInt1 * p = CONST_CHARS_STRING(list);
UInt1 * pn = CHARS_STRING(elms); for ( i = 1; i <= lenPoss; i++, pos += inc ) {
pn[i - 1] = p[pos - 1];
}
}
return elms;
}
/**************************************************************************** ** *F AssString(<list>,<pos>,<val>) . . . . . . . . . . . . assign to a string ** ** 'AssString' assigns the value <val> to the string <list> at the position ** <pos>. It is the responsibility of the caller to ensure that <pos> is ** positive, and that <val> is not 0. ** ** 'AssString' is the function in 'AssListFuncs' for strings. ** ** 'AssString' keeps <list> in string representation if possible. **
*/ staticvoid AssString(Obj list, Int pos, Obj val)
{
UInt len = GET_LEN_STRING(list);
if (TNUM_OBJ(val) != T_CHAR || pos > len+1) { // convert the range into a plain list
PLAIN_LIST(list);
CLEAR_FILTS_LIST(list);
// resize the list if necessary if ( len < pos ) {
GROW_PLIST( list, pos );
SET_LEN_PLIST( list, pos );
}
// now perform the assignment and return the assigned value
SET_ELM_PLIST( list, pos, val );
CHANGED_BAG( list );
} else {
CLEAR_FILTS_LIST(list);
// resize the list if necessary if ( len < pos ) {
GROW_STRING( list, pos );
SET_LEN_STRING( list, pos );
CHARS_STRING(list)[pos] = (UInt1)0;
}
// now perform the assignment and return the assigned value
SET_ELM_STRING( list, pos, val );
}
}
/**************************************************************************** ** *F AsssString(<list>,<poss>,<vals>) . . assign several elements to a string ** ** 'AsssString' assigns the values from the list <vals> at the positions ** given in the list <poss> to the string <list>. It is the responsibility ** of the caller to ensure that <poss> is dense and contains only positive ** integers, that <poss> and <vals> have the same length, and that <vals> is ** dense. ** ** 'AsssString' is the function in 'AsssListFuncs' for strings. ** ** 'AsssString' simply delegates to AssString. Note that the ordering of ** <poss> can be important if <list> should stay in string representation. **
*/ staticvoid AsssString(Obj list, Obj poss, Obj vals)
{ Int i, len = LEN_LIST(poss); for (i = 1; i <= len; i++) {
ASS_LIST(list, INT_INTOBJ(ELM_LIST(poss, i)), ELM_LIST(vals, i));
}
}
/**************************************************************************** ** *F IsSSortString(<list>) . . . . . . . strictly sorted list test for strings ** ** 'IsSSortString' returns 1 if the string <list> is strictly sorted and 0 ** otherwise. ** ** 'IsSSortString' is the function in 'IsSSortListFuncs' for strings.
*/ staticBOOL IsSSortString(Obj list)
{ Int len; Int i; const UInt1 * ptr;
// test whether the string is strictly sorted
len = GET_LEN_STRING( list );
ptr = CONST_CHARS_STRING(list); for ( i = 1; i < len; i++ ) { if ( ! (ptr[i-1] < ptr[i]) ) break;
}
// retype according to the outcome
SET_FILT_LIST( list, (len <= i) ? FN_IS_SSORT : FN_IS_NSORT ); return (len <= i);
}
/**************************************************************************** ** *F IsPossString(<list>) . . . . . positions list test function for strings ** ** 'IsPossString' is the function in 'IsPossListFuncs' for strings.
*/ staticBOOL IsPossString(Obj list)
{ return GET_LEN_STRING( list ) == 0;
}
/**************************************************************************** ** *F PosString(<list>,<val>,<pos>) . . . . position of an element in a string ** ** 'PosString' returns the position of the value <val> in the string <list> ** after the first position <start> as a C integer. 0 is returned if <val> ** is not in the list. ** ** 'PosString' is the function in 'PosListFuncs' for strings.
*/ static Obj PosString(Obj list, Obj val, Obj start)
{ Int lenList; // length of <list> Int i; // loop variable
UInt1 valc; // C characters const UInt1 *p; // pointer to chars of <list>
UInt istart;
/* if the starting position is too big to be a small int
then there can't be anything to find */ if (!IS_INTOBJ(start)) return Fail;
istart = INT_INTOBJ(start);
// get the length of <list>
lenList = GET_LEN_STRING( list );
// a string contains only characters if (TNUM_OBJ(val) != T_CHAR) return Fail;
// val as C character
valc = CHAR_VALUE(val);
// search entries in <list>
p = CONST_CHARS_STRING(list); for ( i = istart; i < lenList && p[i] != valc; i++ );
// return the position (0 if <val> was not found) return (lenList <= i ? Fail : INTOBJ_INT(i+1));
}
/**************************************************************************** ** *F PlainString(<list>) . . . . . . . . . . convert a string to a plain list ** ** 'PlainString' converts the string <list> to a plain list. Not much work. ** ** 'PlainString' is the function in 'PlainListFuncs' for strings.
*/ staticvoid PlainString(Obj list)
{ Int lenList; // logical length of the string
Obj tmp; // handle of the list Int i; // loop variable
// find the length and allocate a temporary copy
lenList = GET_LEN_STRING( list );
tmp = NEW_PLIST_WITH_MUTABILITY(IS_MUTABLE_OBJ(list), T_PLIST, lenList);
SET_LEN_PLIST( tmp, lenList );
// copy the characters for ( i = 1; i <= lenList; i++ ) {
SET_ELM_PLIST( tmp, i, GET_ELM_STRING( list, i ) );
}
// change size and type of the string and copy back
ResizeBag( list, SIZE_OBJ(tmp) );
RetypeBag( list, TNUM_OBJ(tmp) );
/**************************************************************************** ** *F IS_STRING( <obj> ) . . . . . . . . . . . . test if an object is a string ** ** 'IS_STRING' returns 1 if the object <obj> is a string and 0 otherwise. ** It does not change the representation of <obj>.
*/ BOOL (*IsStringFuncs[LAST_REAL_TNUM + 1])(Obj obj);
static Obj IsStringFilt;
staticBOOL IsStringList(Obj list)
{ Int lenList;
Obj elm; Int i;
lenList = LEN_LIST( list ); for ( i = 1; i <= lenList; i++ ) {
elm = ELMV0_LIST( list, i ); if ( elm == 0 ) break; #ifdef HPCGAP if ( !CheckReadAccess(elm) ) break; #endif if ( TNUM_OBJ( elm ) != T_CHAR ) break;
}
/**************************************************************************** ** *F CopyToStringRep( <string> ) . . . copy a string to string representation ** ** 'CopyToStringRep' copies the string <string> to a new string in string ** representation.
*/
Obj CopyToStringRep(
Obj string )
{ Int lenString; // length of the string
Obj elm; // one element of the string
Obj copy; // temporary string Int i; // loop variable
if ( IS_STRING_REP(string) ) {
memcpy(CHARS_STRING(copy), CONST_CHARS_STRING(string),
GET_LEN_STRING(string)); // XXX no error checks?
} else { // copy the string to the string representation for ( i = 1; i <= lenString; i++ ) {
elm = ELMW_LIST( string, i );
CHARS_STRING(copy)[i-1] = CHAR_VALUE(elm);
}
CHARS_STRING(copy)[lenString] = '\0';
} return copy;
}
/**************************************************************************** ** *F ImmutableString( <string> ) . . . copy to immutable string in string rep. ** ** 'ImmutableString' returns an immutable string in string representation ** equal to <string>. This may return <string> if it already satisfies these ** criteria.
*/
Obj ImmutableString(Obj string)
{ if (!IS_STRING_REP(string) || IS_MUTABLE_OBJ(string)) {
string = CopyToStringRep(string);
MakeImmutableNoRecurse(string);
} return string;
}
/**************************************************************************** ** *F ConvString( <string> ) . . . . convert a string to string representation ** ** 'ConvString' converts the string <string> to string representation.
*/ void ConvString (
Obj string )
{ Int lenString; // length of the string
Obj elm; // one element of the string
Obj tmp; // temporary string Int i; // loop variable
// do nothing if the string is already in the string representation if ( IS_STRING_REP(string) )
{ return;
}
// copy the string to the string representation for ( i = 1; i <= lenString; i++ ) {
elm = ELMW_LIST( string, i );
CHARS_STRING(tmp)[i-1] = CHAR_VALUE(elm);
}
CHARS_STRING(tmp)[lenString] = '\0';
// copy back to string
RetypeBagSM( string, T_STRING );
ResizeBag( string, SIZEBAG_STRINGLEN(lenString) ); // copy data area from tmp
memcpy(ADDR_OBJ(string), CONST_ADDR_OBJ(tmp), SIZE_OBJ(tmp));
}
/**************************************************************************** ** *F IsStringConv( <obj> ) . . . . . test if an object is a string and convert ** ** 'IsStringConv' returns 1 if the object <obj> is a string, and 0 ** otherwise. If <obj> is a string it changes its representation to the ** string representation.
*/ BOOL IsStringConv(Obj obj)
{ Int res;
// test whether the object is a string
res = IS_STRING( obj );
// if so, convert it to the string representation if ( res ) {
ConvString( obj );
}
return res;
}
/**************************************************************************** ** *F AppendCStr( <str>, <buf>, <len> ) . . append data in a buffer to a string ** ** 'AppendCStr' appends <len> bytes of data taken from <buf> to <str>, where ** <str> must be a mutable GAP string object.
*/ void AppendCStr(Obj str, constchar * buf, UInt len)
{
GAP_ASSERT(IS_MUTABLE_OBJ(str));
GAP_ASSERT(IS_STRING_REP(str));
/**************************************************************************** ** *F AppendString( <str1>, <str2> ) . . . . . . . append one string to another ** ** 'AppendString' appends <str2> to the end of <str1>. Both <str1> and <str> ** must be a GAP string objects, and <str1> must be mutable.
*/ void AppendString(Obj str1, Obj str2)
{
GAP_ASSERT(IS_MUTABLE_OBJ(str1));
GAP_ASSERT(IS_STRING_REP(str1));
GAP_ASSERT(IS_STRING_REP(str2));
/**************************************************************************** ** *F FiltIS_STRING( <self>, <obj> ) . . . . . . . . . test value is a string
*/ static Obj FiltIS_STRING(Obj self, Obj obj)
{ return (IS_STRING( obj ) ? True : False);
}
/**************************************************************************** ** *F FuncIS_STRING_CONV( <self>, <obj> ) . . . . . . . . . . check and convert
*/ static Obj FuncIS_STRING_CONV(Obj self, Obj obj)
{ // return 'true' if <obj> is a string and 'false' otherwise return (IsStringConv(obj) ? True : False);
}
/**************************************************************************** ** *F FuncCONV_STRING( <self>, <string> ) . . . . . . . . convert to string rep
*/ static Obj FuncCONV_STRING(Obj self, Obj string)
{ if (!IS_STRING(string)) {
RequireArgument(SELF_NAME, string, "must be a string");
}
// convert to the string representation
ConvString( string );
return 0;
}
/**************************************************************************** ** *F FiltIS_STRING_REP( <self>, <obj> ) . . . . test if value is a string rep
*/ static Obj IsStringRepFilt;
/**************************************************************************** ** *F FuncCOPY_TO_STRING_REP( <self>, <obj> ) . copy a string into string rep
*/ static Obj FuncCOPY_TO_STRING_REP(Obj self, Obj string)
{ if (!IS_STRING(string)) {
RequireArgument(SELF_NAME, string, "must be a string");
} return CopyToStringRep(string);
}
/**************************************************************************** ** *F FuncPOSITION_SUBSTRING( <self>, <string>, <substr>, <off> ) . position of ** substring ** ** <str> and <substr> must be strings and <off> an integer. The position ** of first character of substring in string, search starting from ** <off>+1, is returned if such a substring exists. Otherwise `fail' is ** returned.
*/ static Obj FuncPOSITION_SUBSTRING(Obj self, Obj string, Obj substr, Obj off)
{ Int ipos, i, j, lens, lenss, max; const UInt1 *s, *ss;
// special case for the empty string
lenss = GET_LEN_STRING(substr); if ( lenss == 0 ) { return INTOBJ_INT(ipos + 1);
}
lens = GET_LEN_STRING(string);
max = lens - lenss + 1;
s = CONST_CHARS_STRING(string);
ss = CONST_CHARS_STRING(substr);
const UInt1 c = ss[0]; for (i = ipos; i < max; i++) { if (c == s[i]) { for (j = 1; j < lenss; j++) { if (! (s[i+j] == ss[j])) break;
} if (j == lenss) return INTOBJ_INT(i+1);
}
} return Fail;
}
/**************************************************************************** ** *F FuncNormalizeWhitespace( <self>, <string> ) . . . . . normalize white ** space in place ** ** Whitespace characters are " \r\t\n". Leading and trailing whitespace in ** string is removed. Intermediate sequences of whitespace characters are ** substituted by a single space. **
*/ static Obj FuncNormalizeWhitespace(Obj self, Obj string)
{
UInt1 *s, c; Int i, j, len, white;
RequireStringRep(SELF_NAME, string);
len = GET_LEN_STRING(string);
s = CHARS_STRING(string);
i = -1;
white = 1; for (j = 0; j < len; j++) {
c = s[j]; if (c == ' ' || c == '\n' || c == '\t' || c == '\r') { if (! white) {
i++;
s[i] = ' ';
white = 1;
}
} else {
i++;
s[i] = c;
white = 0;
}
} if (white && i > -1)
i--;
s[i+1] = '\0';
SET_LEN_STRING(string, i+1);
// to make it useful as C-string
CHARS_STRING(string)[i+1] = (UInt1)0;
return (Obj)0;
}
/**************************************************************************** ** *F FuncREMOVE_CHARACTERS( <self>, <string>, <rem> ) . . . . . delete characters ** from <rem> in <string> in place **
*/
static Obj FuncREMOVE_CHARACTERS(Obj self, Obj string, Obj rem)
{
UInt1 *s; Int i, j, len;
UInt1 REMCHARLIST[256] = {0};
// set REMCHARLIST by setting positions of characters in rem to 1
len = GET_LEN_STRING(rem);
s = CHARS_STRING(rem); for(i=0; i<len; i++) REMCHARLIST[s[i]] = 1;
// now change string in place
len = GET_LEN_STRING(string);
s = CHARS_STRING(string);
i = -1; for (j = 0; j < len; j++) { if (REMCHARLIST[s[j]] == 0) {
i++;
s[i] = s[j];
}
}
i++;
s[i] = '\0';
SET_LEN_STRING(string, i);
SHRINK_STRING(string);
// set SPLITSTRINGSEPS by setting positions of characters in rem to 1
len = GET_LEN_STRING(seps);
s = CONST_CHARS_STRING(seps); for(i=0; i<len; i++) SPLITSTRINGSEPS[s[i]] = 1;
// set SPLITSTRINGWSPACE by setting positions of characters in rem to 1
len = GET_LEN_STRING(wspace);
s = CONST_CHARS_STRING(wspace); for(i=0; i<len; i++) SPLITSTRINGWSPACE[s[i]] = 1;
// create the result (list of strings)
res = NEW_PLIST(T_PLIST, 2);
pos = 0;
// now do the splitting
len = GET_LEN_STRING(string);
s = CONST_CHARS_STRING(string); for (a=0, z=0; z<len; z++) { // Whenever we encounter a separator or a white space, the substring // starting after the last separator/white space is cut out. The // only difference between white spaces and separators is that white // spaces don't separate empty strings. if (SPLITSTRINGWSPACE[s[z]] == 1) { if (a<z) {
l = z-a;
part = NEW_STRING(l); // update s in case there was a garbage collection
s = CONST_CHARS_STRING(string);
COPY_CHARS(part, s + a, l);
CHARS_STRING(part)[l] = 0;
pos++;
AssPlist(res, pos, part);
s = CONST_CHARS_STRING(string);
a = z+1;
} else {
a = z+1;
}
} else { if (SPLITSTRINGSEPS[s[z]] == 1) {
l = z-a;
part = NEW_STRING(l); // update s in case there was a garbage collection
s = CONST_CHARS_STRING(string);
COPY_CHARS(part, s + a, l);
CHARS_STRING(part)[l] = 0;
pos++;
AssPlist(res, pos, part);
s = CONST_CHARS_STRING(string);
a = z+1;
}
}
}
// Pick up a substring at the end of the string. Note that a trailing // separator does not produce an empty string. if (a<z) { // copy until last position which is z-1
l = z-a;
part = NEW_STRING(l);
s = CONST_CHARS_STRING(string);
COPY_CHARS(part, s + a, l);
CHARS_STRING(part)[l] = 0;
pos++;
AssPlist(res, pos, part);
}
return res;
}
#ifdef HPCGAP
/**************************************************************************** ** *F FuncFIND_ALL_IN_STRING( <self>, <string>, <chars> ) ** ** Kernel function to return a list of all occurrences of a set of characters ** within a string.
*/
static Obj FuncFIND_ALL_IN_STRING(Obj self, Obj string, Obj chars)
{
Obj result;
UInt i, len, matches; unsignedchar table[1<<(8*sizeof(char))]; const UInt1 *s; if (!IsStringConv(string) || !IsStringConv(chars))
ErrorQuit("FIND_ALL_IN_STRING: Requires two string arguments", 0, 0);
memset(table, 0, sizeof(table));
len = GET_LEN_STRING(chars);
s = CONST_CHARS_STRING(chars); for (i=0; i<len; i++)
table[s[i]] = 1;
len = GET_LEN_STRING(string);
s = CONST_CHARS_STRING(string);
matches = 0; for (i = 0; i < len; i++) if (table[s[i]])
matches++;
result = NEW_PLIST(T_PLIST_DENSE, matches);
SET_LEN_PLIST(result, matches);
matches = 1; for (i = 0; i < len; i++) if (table[s[i]]) {
SET_ELM_PLIST(result, matches, INTOBJ_INT(i+1));
matches++;
} return result;
}
/**************************************************************************** ** *F FuncNORMALIZE_NEWLINES( <self>, <string> ) ** ** Kernel function to replace all occurrences of CR or CRLF within a ** string with LF characters. This function modifies its argument and ** returns it also as its result.
*/
static Obj FuncNORMALIZE_NEWLINES(Obj self, Obj string)
{
UInt i, j, len; Char *s; if (!IsStringConv(string) || !REGION(string))
ErrorQuit("NORMALIZE_NEWLINES: Requires a mutable string argument", 0, 0);
len = GET_LEN_STRING(string);
s = CSTR_STRING(string); for (i = j = 0; i < len; i++) { if (s[i] == '\r') {
s[j++] = '\n'; if (i + 1 < len && s[i+1] == '\n')
i++;
} else {
s[j++] = s[i];
}
}
SET_LEN_STRING(string, j); return string;
}
#endif
/**************************************************************************** ** *F FuncSMALLINT_STR( <self>, <string> ) ** ** Kernel function to extract parse small integers from strings. Needed before ** we can conveniently have Int working for things like parsing command line ** options
*/
static Obj FuncSMALLINT_STR(Obj self, Obj str)
{ constChar *string = CONST_CSTR_STRING(str); Int x = 0; Int sign = 1; while (isspace((unsignedint)*string))
string++; if (*string == '-') {
sign = -1;
string++;
} elseif (*string == '+') {
string++;
} constChar * start = string; while (IsDigit(*string)) {
x *= 10;
x += (*string - '0');
string++;
} if (start == string || *string) return Fail; return INTOBJ_INT(sign*x);
}
/**************************************************************************** ** *F UnbString( <string>, <pos> ) . . . . . . unbind an element from a string ** ** This is to avoid unpacking of the string to a plain list when <pos> is ** larger or equal to the length of <string>.
*/ staticvoid UnbString(Obj string, Int pos)
{
GAP_ASSERT(IS_MUTABLE_OBJ(string)); constInt len = GET_LEN_STRING(string); if (len == pos) { // maybe the string becomes sorted
CLEAR_FILTS_LIST(string);
CHARS_STRING(string)[pos - 1] = (UInt1)0;
SET_LEN_STRING(string, len - 1);
} elseif (pos < len) {
PLAIN_LIST(string);
UNB_LIST(string, pos);
}
}
// make all the character constants once and for all for ( i = 0; i < 256; i++ ) { for (j = 0; j < 17; j++ ) {
CharCookie[i][j] = cookie_base[j];
}
CharCookie[i][j++] = '0' + i/100;
CharCookie[i][j++] = '0' + (i % 100)/10;
CharCookie[i][j++] = '0' + i % 10;
CharCookie[i][j++] = '\0';
InitGlobalBag( &ObjsChar[i], &(CharCookie[i][0]) );
}
// install the type method
ImportGVarFromLibrary( "TYPE_CHAR", &TYPE_CHAR );
TypeObjFuncs[ T_CHAR ] = TypeChar;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.