Skip to content
This repository was archived by the owner on Aug 31, 2021. It is now read-only.

Commit 25fd9b6

Browse files
author
runrevali
committed
[[ LCB StdLib ]] Change char to mean grapheme rather than codeunit
1 parent aa4df7e commit 25fd9b6

8 files changed

Lines changed: 137 additions & 64 deletions

File tree

engine/src/exec-strings-chunk.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1132,6 +1132,13 @@ bool MCTextChunkIterator_Tokenized::Next()
11321132

11331133
MCTextChunkIterator *MCStringsTextChunkIteratorCreate(MCExecContext& ctxt, MCStringRef p_text, Chunk_term p_chunk_type)
11341134
{
1135+
if (p_chunk_type == CT_TOKEN)
1136+
{
1137+
MCTextChunkIterator *tci;
1138+
tci = new MCTextChunkIterator_Tokenized(p_text, MCChunkTypeFromChunkTerm(p_chunk_type));
1139+
return tci;
1140+
}
1141+
11351142
return MCChunkCreateTextChunkIterator(p_text, MCChunkTypeFromChunkTerm(p_chunk_type), p_chunk_type == CT_LINE ? ctxt . GetLineDelimiter() : ctxt . GetItemDelimiter(), ctxt . GetStringComparisonType());
11361143
}
11371144

engine/src/exec-strings.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1736,7 +1736,7 @@ uindex_t MCStringsChunkOffset(MCExecContext& ctxt, MCStringRef p_chunk, MCString
17361736
MCTextChunkIterator *tci;
17371737
tci = MCStringsTextChunkIteratorCreate(ctxt, p_string, p_chunk_type);
17381738

1739-
uindex_t t_offset = tci -> ChunkOffset(p_chunk, p_start_offset, ctxt . GetWholeMatches());
1739+
uindex_t t_offset = tci -> ChunkOffset(p_chunk, p_start_offset, nil, ctxt . GetWholeMatches());
17401740

17411741
delete tci;
17421742
return t_offset;

libfoundation/include/foundation-chunk.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ uinteger_t MCChunkCountCodepointChunkCallback(void *context);
4343

4444
uindex_t MCChunkCountChunkChunks(MCStringRef p_string, MCStringRef p_delimiter, MCStringOptions p_options);
4545

46+
bool MCChunkEnsureExtentsByRange(bool p_strict, integer_t p_first, integer_t p_last, MCChunkCountCallback p_callback, void *p_context, uindex_t& r_first, uindex_t& r_chunk_count);
47+
bool MCChunkEnsureExtentsByExpression(bool p_strict, integer_t p_first, MCChunkCountCallback p_callback, void *p_context, uindex_t& r_first, uindex_t& r_chunk_count);
48+
4649
void MCChunkGetExtentsByRange(integer_t p_first, integer_t p_last, MCChunkCountCallback p_callback, void *p_context, uindex_t& r_first, uindex_t& r_chunk_count);
4750

4851
void MCChunkGetExtentsByExpression(integer_t p_first, MCChunkCountCallback p_callback, void *p_context, uindex_t& r_first, uindex_t& r_chunk_count);
@@ -53,6 +56,9 @@ void MCChunkGetExtentsOfByteChunkByExpression(MCDataRef p_data, integer_t p_firs
5356
void MCChunkGetExtentsOfCodeunitChunkByRange(MCStringRef p_data, integer_t p_first, integer_t p_last, uindex_t& r_first, uindex_t& r_chunk_count);
5457
void MCChunkGetExtentsOfCodeunitChunkByExpression(MCStringRef p_data, integer_t p_first, uindex_t& r_first, uindex_t& r_chunk_count);
5558

59+
bool MCChunkGetExtentsOfGraphemeChunkByRange(MCStringRef p_string, integer_t p_first, integer_t p_last, bool p_strict, uindex_t& r_first, uindex_t& r_chunk_count);
60+
bool MCChunkGetExtentsOfGraphemeChunkByExpression(MCStringRef p_string, integer_t p_first, bool p_strict, uindex_t& r_first, uindex_t& r_chunk_count);
61+
5662
void MCChunkGetExtentsOfElementChunkByRange(MCProperListRef p_string, integer_t p_first, integer_t p_last, uindex_t& r_first, uindex_t& r_chunk_count);
5763
void MCChunkGetExtentsOfElementChunkByExpression(MCProperListRef p_string, integer_t p_first, uindex_t& r_first, uindex_t& r_chunk_count);
5864

@@ -111,7 +117,7 @@ class MCTextChunkIterator
111117

112118
virtual bool Next() = 0;
113119

114-
virtual uindex_t ChunkOffset(MCStringRef p_needle, uindex_t p_start_offset, bool p_whole_matches)
120+
virtual uindex_t ChunkOffset(MCStringRef p_needle, uindex_t p_start_offset, uindex_t *p_end_offset, bool p_whole_matches)
115121
{
116122
// Ensure that when no item is skipped, the offset starts from the first item - without skipping it
117123
uindex_t t_chunk_offset;
@@ -145,7 +151,7 @@ class MCTextChunkIterator
145151
}
146152
t_chunk_offset++;
147153
}
148-
while (Next());
154+
while (Next() && (p_end_offset == nil || *p_end_offset < t_chunk_offset));
149155

150156
return 0;
151157
}

libfoundation/src/foundation-chunk.cpp

Lines changed: 59 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,17 @@ uindex_t MCChunkCountChunkChunks(MCStringRef p_string, MCStringRef p_delimiter,
8080
return t_count;
8181
}
8282

83-
void MCChunkGetExtentsByRange(integer_t p_first, integer_t p_last, MCChunkCountCallback p_callback, void *p_context, uindex_t& r_first, uindex_t& r_chunk_count)
83+
bool MCChunkEnsureExtentsByRange(bool p_strict, integer_t p_first, integer_t p_last, MCChunkCountCallback p_callback, void *p_context, uindex_t& r_first, uindex_t& r_chunk_count)
8484
{
8585
int32_t t_chunk_count;
86+
uinteger_t t_count;
87+
bool t_counted;
88+
t_counted = false;
8689

8790
if (p_first < 0 || p_last < 0)
8891
{
89-
uinteger_t t_count;
9092
t_count = p_callback(p_context);
93+
t_counted = true;
9194

9295
if (p_first < 0)
9396
p_first += t_count;
@@ -111,32 +114,73 @@ void MCChunkGetExtentsByRange(integer_t p_first, integer_t p_last, MCChunkCountC
111114
if (t_chunk_count < 0)
112115
t_chunk_count = 0;
113116

117+
if (p_strict)
118+
{
119+
if (t_chunk_count == 0)
120+
return false;
121+
122+
if (!t_counted)
123+
t_count = p_callback(p_context);
124+
125+
if (p_first + t_chunk_count > t_count)
126+
return false;
127+
}
128+
114129
r_chunk_count = t_chunk_count;
115130
r_first = p_first;
131+
return true;
116132
}
117133

118-
void MCChunkGetExtentsByExpression(integer_t p_first, MCChunkCountCallback p_callback, void *p_context, uindex_t& r_first, uindex_t& r_chunk_count)
134+
void MCChunkGetExtentsByRange(integer_t p_first, integer_t p_last, MCChunkCountCallback p_callback, void *p_context, uindex_t& r_first, uindex_t& r_chunk_count)
135+
{
136+
MCChunkEnsureExtentsByRange(false, p_first, p_last, p_callback, p_context, r_first, r_chunk_count);
137+
}
138+
139+
bool MCChunkEnsureExtentsByExpression(bool p_strict, integer_t p_first, MCChunkCountCallback p_callback, void *p_context, uindex_t& r_first, uindex_t& r_chunk_count)
119140
{
120-
r_chunk_count = 1;
141+
int32_t t_chunk_count;
142+
t_chunk_count = 1;
143+
144+
uinteger_t t_count;
145+
bool t_counted;
146+
t_counted = false;
121147

122148
if (p_first < 0)
123149
{
124-
uinteger_t t_count;
125150
t_count = p_callback(p_context);
151+
t_counted = true;
126152
p_first += t_count;
127153
}
128154
else
129155
p_first--;
130156

131157
if (p_first < 0)
132158
{
133-
r_chunk_count = 0;
159+
t_chunk_count = 0;
134160
p_first = 0;
135161
}
136162

163+
if (p_strict)
164+
{
165+
if (t_chunk_count == 0)
166+
return false;
167+
168+
if (!t_counted)
169+
t_count = p_callback(p_context);
170+
171+
if (p_first + t_chunk_count > t_count)
172+
return false;
173+
}
174+
137175
r_first = p_first;
176+
r_chunk_count = t_chunk_count;
177+
return true;
138178
}
139179

180+
void MCChunkGetExtentsByExpression(integer_t p_first, MCChunkCountCallback p_callback, void *p_context, uindex_t& r_first, uindex_t& r_chunk_count)
181+
{
182+
MCChunkEnsureExtentsByExpression(false, p_first, p_callback, p_context, r_first, r_chunk_count);
183+
}
140184
////////////////////////////////////////////////////////////////////////////////
141185

142186
void MCChunkGetExtentsOfByteChunkByRange(MCDataRef p_data, integer_t p_first, integer_t p_last, uindex_t& r_first, uindex_t& r_chunk_count)
@@ -159,14 +203,14 @@ void MCChunkGetExtentsOfCodeunitChunkByExpression(MCStringRef p_string, integer_
159203
MCChunkGetExtentsByExpression(p_first, MCChunkCountCodeunitChunkCallback, &p_string, r_first, r_chunk_count);
160204
}
161205

162-
void MCChunkGetExtentsOfGraphemeChunkByRange(MCStringRef p_string, integer_t p_first, integer_t p_last, uindex_t& r_first, uindex_t& r_chunk_count)
206+
bool MCChunkGetExtentsOfGraphemeChunkByRange(MCStringRef p_string, integer_t p_first, integer_t p_last, bool p_strict, uindex_t& r_first, uindex_t& r_chunk_count)
163207
{
164-
MCChunkGetExtentsByRange(p_first, p_last, MCChunkCountCodeunitChunkCallback, &p_string, r_first, r_chunk_count);
208+
return MCChunkEnsureExtentsByRange(p_strict, p_first, p_last, MCChunkCountGraphemeChunkCallback, &p_string, r_first, r_chunk_count);
165209
}
166210

167-
void MCChunkGetExtentsOfGraphemeChunkByExpression(MCStringRef p_string, integer_t p_first, uindex_t& r_first, uindex_t& r_chunk_count)
211+
bool MCChunkGetExtentsOfGraphemeChunkByExpression(MCStringRef p_string, integer_t p_first, bool p_strict, uindex_t& r_first, uindex_t& r_chunk_count)
168212
{
169-
MCChunkGetExtentsByExpression(p_first, MCChunkCountGraphemeChunkCallback, &p_string, r_first, r_chunk_count);
213+
return MCChunkEnsureExtentsByExpression(p_strict, p_first, MCChunkCountGraphemeChunkCallback, &p_string, r_first, r_chunk_count);
170214
}
171215

172216
void MCChunkGetExtentsOfElementChunkByRange(MCProperListRef p_string, integer_t p_first, integer_t p_last, uindex_t& r_first, uindex_t& r_chunk_count)
@@ -265,6 +309,7 @@ bool MCChunkApply(MCStringRef p_string, MCStringRef p_delimiter, MCStringOptions
265309
bool MCChunkIterate(MCRange& x_range, MCStringRef p_string, MCStringRef p_delimiter, MCStringOptions p_options, bool p_first)
266310
{
267311
// Currently assumes delimiter is 1 char long.
312+
// Reimplement with MCTextChunkIterator_Delimited to accommodate arbitrary delimiters.
268313
uindex_t t_delimiter_offset;
269314

270315
if (!p_first)
@@ -560,7 +605,7 @@ MCTextChunkIterator_ICU::MCTextChunkIterator_ICU(MCStringRef p_text, MCChunkType
560605
{
561606
MCRange t_range;
562607
uindex_t t_end;
563-
/* UNCHECKED */ MCLocaleBreakIteratorCreate(kMCBasicLocale, p_chunk_type == kMCChunkTypeSentence ? kMCBreakIteratorTypeSentence : kMCBreakIteratorTypeCharacter, break_iterator);
608+
/* UNCHECKED */ MCLocaleBreakIteratorCreate(kMCLocaleBasic, p_chunk_type == kMCChunkTypeSentence ? kMCBreakIteratorTypeSentence : kMCBreakIteratorTypeCharacter, break_iterator);
564609
/* UNCHECKED */ MCLocaleBreakIteratorSetText(break_iterator, m_text);
565610
t_range . length = 0;
566611
t_range . offset = 0;
@@ -576,7 +621,7 @@ MCTextChunkIterator_ICU::MCTextChunkIterator_ICU(MCStringRef p_text, MCChunkType
576621
case kMCChunkTypeTrueWord:
577622
{
578623
MCAutoArray<uindex_t> t_breaks;
579-
/* UNCHECKED */ MCLocaleBreakIteratorCreate(kMCBasicLocale, kMCBreakIteratorTypeWord, break_iterator);
624+
/* UNCHECKED */ MCLocaleBreakIteratorCreate(kMCLocaleBasic, kMCBreakIteratorTypeWord, break_iterator);
580625
/* UNCHECKED */ MCLocaleBreakIteratorSetText(break_iterator, m_text);
581626
MCRange t_range = MCRangeMake(0, 0);
582627

@@ -587,6 +632,8 @@ MCTextChunkIterator_ICU::MCTextChunkIterator_ICU(MCStringRef p_text, MCChunkType
587632
}
588633
}
589634
break;
635+
default:
636+
break;
590637
}
591638

592639
if (break_iterator != nil)
@@ -677,9 +724,6 @@ MCTextChunkIterator *MCChunkCreateTextChunkIterator(MCStringRef p_text, MCChunkT
677724
case kMCChunkTypeParagraph:
678725
t_iterator = new MCTextChunkIterator_Delimited(p_text, p_chunk_type, MCSTR("\n"));
679726
break;
680-
case kMCChunkTypeToken:
681-
t_iterator = new MCTextChunkIterator_Tokenized(p_text, p_chunk_type);
682-
break;
683727
case kMCChunkTypeWord:
684728
t_iterator = new MCTextChunkIterator_Word(p_text, p_chunk_type, p_delimiter);
685729
break;

libscript/libscript.xcodeproj/project.pbxproj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,6 @@
208208
4DDA20441A136FF4001B0CA2 /* script-builder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = "script-builder.cpp"; path = "src/script-builder.cpp"; sourceTree = "<group>"; };
209209
4DDA20661A139BA1001B0CA2 /* libscript-test */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "libscript-test"; sourceTree = BUILT_PRODUCTS_DIR; };
210210
7210DCA31A37102F00C23D23 /* module-helper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = "module-helper.cpp"; path = "src/module-helper.cpp"; sourceTree = "<group>"; };
211-
7628E9D81A66B25F00FFE7A9 /* module-chunk.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = "module-chunk.cpp"; path = "src/module-chunk.cpp"; sourceTree = "<group>"; };
212211
766113881A31FB640042DE7F /* module-array.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = "module-array.cpp"; path = "src/module-array.cpp"; sourceTree = "<group>"; };
213212
7661138C1A31FDFD0042DE7F /* array.mlc */ = {isa = PBXFileReference; lastKnownFileType = text; name = array.mlc; path = src/array.mlc; sourceTree = "<group>"; };
214213
76FEC22F1A5D3F0900188FD4 /* file.mlc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = file.mlc; path = src/file.mlc; sourceTree = "<group>"; };
@@ -323,7 +322,6 @@
323322
4D79B6581A273B8100DD750C /* Library Source */ = {
324323
isa = PBXGroup;
325324
children = (
326-
7628E9D81A66B25F00FFE7A9 /* module-chunk.cpp */,
327325
4D79B6591A273BA000DD750C /* module-arithmetic.cpp */,
328326
766113881A31FB640042DE7F /* module-array.cpp */,
329327
4D79B65A1A273BA000DD750C /* module-binary.cpp */,
@@ -547,6 +545,8 @@
547545
4D79B6831A273BA000DD750C /* module-byte.cpp in Sources */,
548546
4D79B6851A273BA000DD750C /* module-encoding.cpp in Sources */,
549547
4D21B60119E3DFED00B64BEF /* script-package.cpp in Sources */,
548+
C7A74E161A6D07CC0031099F /* module-system.cpp in Sources */,
549+
C7A74E171A6D07DF0031099F /* module-date.cpp in Sources */,
550550
4D79B68F1A273BA000DD750C /* module-type.cpp in Sources */,
551551
766113891A31FB640042DE7F /* module-array.cpp in Sources */,
552552
4D79B6811A273BA000DD750C /* module-binary.cpp in Sources */,

libscript/src/module-char.cpp

Lines changed: 55 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,35 @@
1717
#include <foundation.h>
1818
#include <foundation-auto.h>
1919
#include <foundation-chunk.h>
20+
#include <foundation-locale.h>
21+
22+
bool MCCharEvaluateChunk(MCStringRef p_target, MCRange p_grapheme_range, MCStringRef& r_output)
23+
{
24+
MCRange t_range;
25+
MCStringMapGraphemeIndices(p_target, kMCLocaleBasic, p_grapheme_range, t_range);
26+
27+
return MCStringCopySubstring(p_target, t_range, r_output);
28+
}
29+
30+
bool MCCharStoreChunk(MCStringRef &x_target, MCStringRef p_value, MCRange p_grapheme_range, MCStringRef& r_output)
31+
{
32+
MCAutoStringRef t_string;
33+
if (!MCStringMutableCopy(x_target, &t_string))
34+
return false;
35+
36+
MCRange t_range;
37+
MCStringMapGraphemeIndices(x_target, kMCLocaleBasic, p_grapheme_range, t_range);
38+
39+
if (!MCStringReplace(*t_string, MCRangeMake(t_range . offset, t_range . length), p_value))
40+
return false;
41+
42+
MCAutoStringRef t_new_string;
43+
if (!MCStringCopy(*t_string, &t_new_string))
44+
return false;
45+
46+
MCValueAssign(x_target, *t_new_string);
47+
return true;
48+
}
2049

2150
extern "C" MC_DLLEXPORT void MCCharEvalNumberOfCharsIn(MCStringRef p_target, index_t& r_output)
2251
{
@@ -25,57 +54,41 @@ extern "C" MC_DLLEXPORT void MCCharEvalNumberOfCharsIn(MCStringRef p_target, ind
2554

2655
extern "C" MC_DLLEXPORT void MCCharEvalIsAmongTheCharsOf(MCStringRef p_needle, MCStringRef p_target, bool& r_output)
2756
{
57+
MCTextChunkIterator *tci;
58+
tci = MCChunkCreateTextChunkIterator(p_target, kMCChunkTypeCharacter, nil, kMCStringOptionCompareExact);
59+
2860
// Error if there is more than one char in needle.
29-
if (MCStringGetLength(p_needle) != 1)
61+
if (tci -> CountChunks() != 1)
3062
{
3163
MCErrorCreateAndThrow(kMCGenericErrorTypeInfo, "reason", MCSTR("needle must be a single char"), nil);
3264
return;
3365
}
34-
35-
uindex_t t_dummy;
36-
r_output = MCStringFirstIndexOfChar(p_target, MCStringGetCodepointAtIndex(p_needle, 0), 0, kMCStringOptionCompareExact, t_dummy);
66+
r_output = tci -> IsAmong(p_needle);
3767
}
3868

3969
extern "C" MC_DLLEXPORT void MCCharFetchCharRangeOf(index_t p_start, index_t p_finish, MCStringRef p_target, MCStringRef& r_output)
4070
{
4171
uindex_t t_start, t_count;
42-
//MCChunkGetExtentsOfGraphemeChunkByRange(p_target, p_start, p_finish, t_start, t_count);
43-
44-
if (t_count == 0)
72+
if (!MCChunkGetExtentsOfGraphemeChunkByRange(p_target, p_start, p_finish, true, t_start, t_count))
4573
{
4674
MCErrorCreateAndThrow(kMCGenericErrorTypeInfo, "reason", MCSTR("chunk index out of range"), nil);
4775
return;
4876
}
4977

50-
// MCChunkMarkTextChunk()
51-
52-
if (!MCStringCopySubstring(p_target, MCRangeMake(t_start, t_count), r_output))
53-
return;
78+
MCCharEvaluateChunk(p_target, MCRangeMake(t_start, t_count), r_output);
5479
}
5580

5681
extern "C" MC_DLLEXPORT void MCCharStoreCharRangeOf(MCStringRef p_value, index_t p_start, index_t p_finish, MCStringRef& x_target)
5782
{
5883
uindex_t t_start, t_count;
59-
MCChunkGetExtentsOfCodeunitChunkByRange(x_target, p_start, p_finish, t_start, t_count);
6084

61-
if (t_count == 0 || t_start + t_count > MCStringGetLength(x_target))
85+
if (!MCChunkGetExtentsOfGraphemeChunkByRange(x_target, p_start, p_finish, true, t_start, t_count))
6286
{
6387
MCErrorCreateAndThrow(kMCGenericErrorTypeInfo, "reason", MCSTR("chunk index out of range"), nil);
6488
return;
6589
}
6690

67-
MCAutoStringRef t_string;
68-
if (!MCStringMutableCopy(x_target, &t_string))
69-
return;
70-
71-
if (!MCStringReplace(*t_string, MCRangeMake(t_start, t_count), p_value))
72-
return;
73-
74-
MCAutoStringRef t_new_string;
75-
if (!MCStringCopy(*t_string, &t_new_string))
76-
return;
77-
78-
MCValueAssign(x_target, *t_new_string);
91+
MCCharStoreChunk(x_target, p_value, MCRangeMake(t_start, t_count), p_value);
7992
}
8093

8194
extern "C" MC_DLLEXPORT void MCCharFetchCharOf(index_t p_index, MCStringRef p_target, MCStringRef& r_output)
@@ -204,16 +217,27 @@ extern "C" MC_DLLEXPORT void MCCharExecDeleteLastCharOf(MCStringRef& x_target)
204217
// Will result in tChar containing the value it had at the point of end repeat.
205218
extern "C" MC_DLLEXPORT bool MCCharRepeatForEachChar(void*& x_iterator, MCStringRef& r_iterand, MCStringRef p_string)
206219
{
207-
uintptr_t t_offset;
208-
t_offset = (uintptr_t)x_iterator;
220+
MCTextChunkIterator *t_iterator;
221+
bool t_first;
222+
t_first = false;
209223

210-
if (t_offset == MCStringGetLength(p_string))
211-
return false;
224+
if ((uintptr_t)x_iterator == 0)
225+
{
226+
t_first = true;
227+
t_iterator = MCChunkCreateTextChunkIterator(p_string, kMCChunkTypeCharacter, nil, kMCStringOptionCompareExact);
228+
}
229+
else
230+
t_iterator = (MCTextChunkIterator *)x_iterator;
212231

213-
if (!MCStringCopySubstring(p_string, MCRangeMake(t_offset, 1), r_iterand))
232+
if (t_iterator -> Next())
233+
t_iterator -> CopyString(r_iterand);
234+
else
235+
{
236+
delete t_iterator;
214237
return false;
238+
}
215239

216-
x_iterator = (void *)(t_offset + 1);
240+
x_iterator = (void *)(t_iterator);
217241

218242
return true;
219243
}

0 commit comments

Comments
 (0)