From cac8bbdae9f47bc862007f5b077b78d2855f0c96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=96=87=E5=AE=87=E7=A5=A5?= Date: Wed, 25 Sep 2024 00:41:00 +0800 Subject: [PATCH 1/4] move String::substring to builtin --- builtin/builtin.mbti | 1 + builtin/string.mbt | 44 +++++++++++++++++++++++++++++++++++++++++ builtin/string_test.mbt | 23 +++++++++++++++++++++ string/string.mbt | 31 ----------------------------- string/string.mbti | 1 - string/string_test.mbt | 23 --------------------- 6 files changed, 68 insertions(+), 55 deletions(-) create mode 100644 builtin/string.mbt diff --git a/builtin/builtin.mbti b/builtin/builtin.mbti index 85adea792..85aae2bd6 100644 --- a/builtin/builtin.mbti +++ b/builtin/builtin.mbti @@ -536,6 +536,7 @@ impl String { op_add(String, String) -> String op_equal(String, String) -> Bool op_get(String, Int) -> Char + substring(String, ~start : Int = .., ~end : Int = ..) -> String to_js_string(String) -> Js_string to_json(String) -> Json to_string(String) -> String diff --git a/builtin/string.mbt b/builtin/string.mbt new file mode 100644 index 000000000..80329eb4d --- /dev/null +++ b/builtin/string.mbt @@ -0,0 +1,44 @@ +// Copyright 2024 International Digital Economy Academy +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// @intrinsic %string.substring +fn unsafe_substring(str : String, start : Int, end : Int) -> String { + let len = end - start + let buf = Buffer::new(size_hint=len) + buf.write_sub_string(str, start, len) + buf.to_string() +} + +/// Get substring of the string from [start] to [end] exclusive. +pub fn substring( + self : String, + ~start : Int = 0, + ~end : Int = self.length() +) -> String { + if start < 0 { + abort("String::substring: start index is negative") + } + if end < 0 { + abort("String::substring: end index is negative") + } + if start > end { + abort("String::substring: start index is greater than end index") + } + if end > self.length() { + abort( + "String::substring: end index is greater than the length of the string", + ) + } + unsafe_substring(self, start, end) +} diff --git a/builtin/string_test.mbt b/builtin/string_test.mbt index f0894f8e5..d9a242579 100644 --- a/builtin/string_test.mbt +++ b/builtin/string_test.mbt @@ -38,3 +38,26 @@ test "String::escape" { , ) } + +test "substring" { + assert_eq!("abc".substring(), "abc") + assert_eq!("abc".substring(start=1), "bc") + assert_eq!("abc".substring(end=2), "ab") + assert_eq!("abc".substring(start=1, end=2), "b") +} + +test "panic substring_start_index_error" { + "test".substring(start=-1, end=0) |> ignore +} + +test "panic substring_end_index_error" { + "test".substring(start=0, end=-1) |> ignore +} + +test "panic substring_start_end_index_error" { + "test".substring(start=1, end=0) |> ignore +} + +test "panic substring_length_index_error" { + "test".substring(start=0, end=5) |> ignore +} diff --git a/string/string.mbt b/string/string.mbt index 824d31339..16837316c 100644 --- a/string/string.mbt +++ b/string/string.mbt @@ -104,37 +104,6 @@ pub fn to_array(self : String) -> Array[Char] { ) } -/// @intrinsic %string.substring -fn unsafe_substring(str : String, start : Int, end : Int) -> String { - let len = end - start - let buf = Buffer::new(size_hint=len) - buf.write_sub_string(str, start, len) - buf.to_string() -} - -/// Get substring of the string from [start] to [end] exclusive. -pub fn substring( - self : String, - ~start : Int = 0, - ~end : Int = self.length() -) -> String { - if start < 0 { - abort("String::substring: start index is negative") - } - if end < 0 { - abort("String::substring: end index is negative") - } - if start > end { - abort("String::substring: start index is greater than end index") - } - if end > self.length() { - abort( - "String::substring: end index is greater than the length of the string", - ) - } - unsafe_substring(self, start, end) -} - pub fn iter(self : String) -> Iter[Char] { Iter::new( fn(yield) { diff --git a/string/string.mbti b/string/string.mbti index 269fce175..218795886 100644 --- a/string/string.mbti +++ b/string/string.mbti @@ -28,7 +28,6 @@ impl String { rev_iter(String) -> Iter[Char] split(String, String) -> Iter[String] starts_with(String, String) -> Bool - substring(String, ~start : Int = .., ~end : Int = ..) -> String to_array(String) -> Array[Char] to_bytes(String) -> Bytes to_lower(String) -> String diff --git a/string/string_test.mbt b/string/string_test.mbt index ad530b9a4..89e8f963c 100644 --- a/string/string_test.mbt +++ b/string/string_test.mbt @@ -76,13 +76,6 @@ test "to_array" { inspect!(a[6], content="'😀'") } -test "substring" { - assert_eq!("abc".substring(), "abc") - assert_eq!("abc".substring(start=1), "bc") - assert_eq!("abc".substring(end=2), "ab") - assert_eq!("abc".substring(start=1, end=2), "b") -} - test "chars" { let mut str = "" "A😊𠮷BA😊𠮷B" @@ -169,22 +162,6 @@ test "Buffer::to_bytes" { assert_eq!(buffer.to_bytes().to_string(), "中文") } -test "panic substring_start_index_error" { - "test".substring(start=-1, end=0) |> ignore -} - -test "panic substring_end_index_error" { - "test".substring(start=0, end=-1) |> ignore -} - -test "panic substring_start_end_index_error" { - "test".substring(start=1, end=0) |> ignore -} - -test "panic substring_length_index_error" { - "test".substring(start=0, end=5) |> ignore -} - test "trim_left" { inspect!("aaabcd".trim_start("a"), content="bcd") inspect!("aaabcd".trim_start(" "), content="aaabcd") From 01dcd47de369ffa61b6740ff75cc205bd0886f38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=96=87=E5=AE=87=E7=A5=A5?= Date: Wed, 25 Sep 2024 00:56:26 +0800 Subject: [PATCH 2/4] add platform specific StringBuilder --- builtin/buffer.mbt | 2 +- builtin/builtin.mbti | 9 +++++++ builtin/moon.pkg.json | 4 +++- builtin/stringbuilder_buffer.mbt | 40 +++++++++++++++++++++++++++++++ builtin/stringbuilder_concat.mbt | 41 ++++++++++++++++++++++++++++++++ builtin/stringbuilder_test.mbt | 21 ++++++++++++++++ 6 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 builtin/stringbuilder_buffer.mbt create mode 100644 builtin/stringbuilder_concat.mbt create mode 100644 builtin/stringbuilder_test.mbt diff --git a/builtin/buffer.mbt b/builtin/buffer.mbt index 5e319f763..b9f677d26 100644 --- a/builtin/buffer.mbt +++ b/builtin/buffer.mbt @@ -92,7 +92,7 @@ pub fn write_string(self : Buffer, value : String) -> Unit { } pub fn write_object(self : Buffer, value : Show) -> Unit { - write_string(self, value.to_string()) + self.write_string(value.to_string()) } pub fn write_bytes(self : Buffer, value : Bytes) -> Unit { diff --git a/builtin/builtin.mbti b/builtin/builtin.mbti index 85aae2bd6..6f49dc571 100644 --- a/builtin/builtin.mbti +++ b/builtin/builtin.mbti @@ -315,6 +315,15 @@ impl SourceLoc { to_string(Self) -> String } +type StringBuilder +impl StringBuilder { + new(~size_hint : Int = ..) -> Self + to_string(Self) -> String + write_char(Self, Char) -> Unit + write_string(Self, String) -> Unit + write_sub_string(Self, String, Int, Int) -> Unit +} + pub type UnsafeMaybeUninit impl Unit { op_equal(Unit, Unit) -> Bool diff --git a/builtin/moon.pkg.json b/builtin/moon.pkg.json index 6458bc0bf..96b9f80a9 100644 --- a/builtin/moon.pkg.json +++ b/builtin/moon.pkg.json @@ -18,6 +18,8 @@ "int64_nonjs.mbt": ["not", "js"], "arraycore_js.mbt": ["js"], "arraycore_nonjs.mbt": ["not", "js"], - "array_nonjs_test.mbt": ["not", "js"] + "array_nonjs_test.mbt": ["not", "js"], + "stringbuilder_buffer.mbt": ["not", ["js"]], + "stringbuilder_concat.mbt": ["js"] } } diff --git a/builtin/stringbuilder_buffer.mbt b/builtin/stringbuilder_buffer.mbt new file mode 100644 index 000000000..015df072b --- /dev/null +++ b/builtin/stringbuilder_buffer.mbt @@ -0,0 +1,40 @@ +// Copyright 2024 International Digital Economy Academy +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +type StringBuilder Buffer + +pub fn StringBuilder::new(~size_hint : Int = 0) -> StringBuilder { + Buffer::new(~size_hint) +} + +pub fn StringBuilder::write_string(self : StringBuilder, str : String) -> Unit { + self._.write_string(str) +} + +pub fn StringBuilder::write_char(self : StringBuilder, ch : Char) -> Unit { + self._.write_char(ch) +} + +pub fn StringBuilder::write_sub_string( + self : StringBuilder, + str : String, + start : Int, + len : Int +) -> Unit { + self._.write_sub_string(str, start, len) +} + +pub fn StringBuilder::to_string(self : StringBuilder) -> String { + self._.to_string() +} diff --git a/builtin/stringbuilder_concat.mbt b/builtin/stringbuilder_concat.mbt new file mode 100644 index 000000000..18819e822 --- /dev/null +++ b/builtin/stringbuilder_concat.mbt @@ -0,0 +1,41 @@ +// Copyright 2024 International Digital Economy Academy +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +type StringBuilder Ref[String] + +pub fn StringBuilder::new(~size_hint : Int = 0) -> StringBuilder { + ignore(size_hint) + { val: "" } +} + +pub fn StringBuilder::write_string(self : StringBuilder, str : String) -> Unit { + self.val += str +} + +pub fn StringBuilder::write_char(self : StringBuilder, ch : Char) -> Unit { + self.val += Char::to_string(ch) +} + +pub fn StringBuilder::write_sub_string( + self : StringBuilder, + str : String, + start : Int, + len : Int +) -> Unit { + self.val += str.substring(~start, end=start + len) +} + +pub fn StringBuilder::to_string(self : StringBuilder) -> String { + self.val +} diff --git a/builtin/stringbuilder_test.mbt b/builtin/stringbuilder_test.mbt new file mode 100644 index 000000000..5a04c9ee9 --- /dev/null +++ b/builtin/stringbuilder_test.mbt @@ -0,0 +1,21 @@ +// Copyright 2024 International Digital Economy Academy +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +test "stringbuilder" { + let buf = StringBuilder::new() + buf.write_string("hello") + buf.write_char(' ') + buf.write_sub_string("world", 0, 3) + inspect!(buf.to_string(), content="hello wor") +} From f77b658fa4127434419aa43a8b9f58962b6a5575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=96=87=E5=AE=87=E7=A5=A5?= Date: Wed, 25 Sep 2024 00:59:46 +0800 Subject: [PATCH 3/4] use StringBuilder instead of Buffer in builtin package --- builtin/assert.mbt | 2 +- builtin/autoloc.mbt | 2 +- builtin/console.mbt | 8 ++++---- builtin/json.mbt | 2 +- builtin/linked_hash_map.mbt | 2 +- builtin/show.mbt | 4 ++-- builtin/traits.mbt | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/builtin/assert.mbt b/builtin/assert.mbt index 5f656f4e3..c5b22f0a4 100644 --- a/builtin/assert.mbt +++ b/builtin/assert.mbt @@ -13,7 +13,7 @@ // limitations under the License. fn debug_string[T : Show](t : T) -> String { - let buf = Buffer::new(size_hint=50) + let buf = StringBuilder::new(size_hint=50) t.output(buf) buf.to_string() } diff --git a/builtin/autoloc.mbt b/builtin/autoloc.mbt index a0b7fba57..cad96534b 100644 --- a/builtin/autoloc.mbt +++ b/builtin/autoloc.mbt @@ -27,7 +27,7 @@ pub fn ArgsLoc::to_string(self : ArgsLoc) -> String { } pub fn ArgsLoc::to_json(self : ArgsLoc) -> String { - let buf = Buffer::new(size_hint=10) + let buf = StringBuilder::new(size_hint=10) buf.write_char('[') for i = 0; i < self._.length(); i = i + 1 { if i != 0 { diff --git a/builtin/console.mbt b/builtin/console.mbt index 30ccae0cf..aa089a367 100644 --- a/builtin/console.mbt +++ b/builtin/console.mbt @@ -43,7 +43,7 @@ pub fn to_string(self : Int64) -> String { // The min and max value of i64 are -9223372036854775808 and 9223372036854775807, // so max=20 is enough. - let buf = Buffer::new(size_hint=20) + let buf = StringBuilder::new(size_hint=20) if self < 0L { buf.write_char('-') } @@ -71,7 +71,7 @@ pub fn to_string(self : Int) -> String { // The min and max value of i32 are -2147483648 and 2147483647, // so max=11 is enough. - let buf = Buffer::new() + let buf = StringBuilder::new() if self < 0 { buf.write_char('-') } @@ -88,7 +88,7 @@ pub fn to_string(self : Int) -> String { } pub fn UInt::to_string(self : UInt) -> String { - let buf = Buffer::new() + let buf = StringBuilder::new() fn write_digits(num) { let num2 = num / 10U if num2 != 0U { @@ -108,7 +108,7 @@ test "UInt::to_string" { } pub fn UInt64::to_string(self : UInt64) -> String { - let buf = Buffer::new() + let buf = StringBuilder::new() fn write_digits(num : UInt64) { let num2 = num / 10UL if num2 != 0UL { diff --git a/builtin/json.mbt b/builtin/json.mbt index c9d4a4b7d..12a07ef22 100644 --- a/builtin/json.mbt +++ b/builtin/json.mbt @@ -61,7 +61,7 @@ fn escape_json_string(str : String) -> String { } let len = str.length() - let buf = Buffer::new(size_hint=len) + let buf = StringBuilder::new(size_hint=len) for i in 0.. Int { // Utils fn debug_entries[K : Show, V : Show](self : Map[K, V]) -> String { - let buf = Buffer::new() + let buf = StringBuilder::new() for i = 0; i < self.entries.length(); i = i + 1 { if i > 0 { buf.write_char(',') diff --git a/builtin/show.mbt b/builtin/show.mbt index bd89c2e10..3693ed990 100644 --- a/builtin/show.mbt +++ b/builtin/show.mbt @@ -60,7 +60,7 @@ pub impl Show for Bytes with output(self, logger) { } pub impl Show for Bytes with to_string(self) { - let buf = Buffer::new() + let buf = StringBuilder::new() Show::output(self, buf) buf.to_string() } @@ -128,7 +128,7 @@ pub impl Show for String with to_string(self) { self } /// Returns a valid MoonBit string literal representation of a string, /// add quotes and escape special characters. pub fn escape(self : String) -> String { - let buf = Buffer::new() + let buf = StringBuilder::new() Show::output(self, buf) buf.to_string() } diff --git a/builtin/traits.mbt b/builtin/traits.mbt index 15098df28..a7903494a 100644 --- a/builtin/traits.mbt +++ b/builtin/traits.mbt @@ -74,7 +74,7 @@ pub trait Show { // Default implementation for `Show::to_string`, uses a `Buffer` impl Show with to_string(self) { - let logger = Buffer::new() + let logger = StringBuilder::new() self.output(logger) logger.to_string() } From 06c281d1741cde1ce53d4f1eec7e7b131d02fe26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=96=87=E5=AE=87=E7=A5=A5?= Date: Wed, 25 Sep 2024 15:46:15 +0800 Subject: [PATCH 4/4] use @builtin.StringBuilder in json package --- json/internal_types.mbt | 37 ------------------------------------- json/json.mbt | 8 ++++---- json/lex_string.mbt | 22 +++++++++++----------- 3 files changed, 15 insertions(+), 52 deletions(-) diff --git a/json/internal_types.mbt b/json/internal_types.mbt index 8d545af05..ca15ab92c 100644 --- a/json/internal_types.mbt +++ b/json/internal_types.mbt @@ -61,40 +61,3 @@ priv enum Token { Comma Colon } derive(Eq, Show) - -priv struct StringBuilder { - mut buffer : String -} - -fn StringBuilder::make() -> StringBuilder { - { buffer: "" } -} - -fn add_string(self : StringBuilder, s : String) -> Unit { - self.buffer = self.buffer + s -} - -fn add_substring( - self : StringBuilder, - s : String, - start : Int, - end : Int -) -> Unit { - self.buffer = self.buffer + s.substring(~start, ~end) -} - -fn add_char(self : StringBuilder, c : Char) -> Unit { - self.buffer = self.buffer + c.to_string() -} - -fn to_string(self : StringBuilder) -> String { - self.buffer -} - -test "add_string method coverage" { - let sb = StringBuilder::make() - add_string(sb, "Hello") - assert_eq!(sb.buffer, "Hello") - add_string(sb, " World") - assert_eq!(sb.buffer, "Hello World") -} diff --git a/json/json.mbt b/json/json.mbt index 217d8a573..99fc57671 100644 --- a/json/json.mbt +++ b/json/json.mbt @@ -96,7 +96,7 @@ pub fn stringify( if members.is_empty() { return "{}" } - let buf = Buffer::new(size_hint=0) + let buf = StringBuilder::new(size_hint=0) buf.write_char('{') buf.write_string(indent_str(level + 1, indent)) let mut first = true @@ -125,7 +125,7 @@ pub fn stringify( if arr.is_empty() { return "[]" } - let buf = Buffer::new(size_hint=0) + let buf = StringBuilder::new(size_hint=0) buf.write_char('[') buf.write_string(indent_str(level + 1, indent)) for i, v in arr { @@ -139,7 +139,7 @@ pub fn stringify( buf..write_char(']').to_string() } String(s) => { - let buf = Buffer::new(size_hint=0) + let buf = StringBuilder::new(size_hint=0) buf ..write_char('\"') ..write_string(escape(s, ~escape_slash)) @@ -165,7 +165,7 @@ fn escape(str : String, ~escape_slash : Bool) -> String { } } - let buf = Buffer::new(size_hint=str.length()) + let buf = StringBuilder::new(size_hint=str.length()) for c in str { match c { '"' | '\\' => buf.write_string("\\\{c}") diff --git a/json/lex_string.mbt b/json/lex_string.mbt index 048ee819f..6b382d7cb 100644 --- a/json/lex_string.mbt +++ b/json/lex_string.mbt @@ -13,11 +13,11 @@ // limitations under the License. fn lex_string(ctx : ParseContext) -> String!ParseError { - let buf = StringBuilder::make() + let buf = StringBuilder::new() let mut start = ctx.offset fn flush(end : Int) { if start > 0 && end > start { - buf.add_substring(ctx.input, start, end) + buf.write_sub_string(ctx.input, start, end - start) } } @@ -31,17 +31,17 @@ fn lex_string(ctx : ParseContext) -> String!ParseError { Some('\\') => { flush(ctx.offset - 1) match read_char(ctx) { - Some('b') => buf.add_char('\b') - Some('f') => buf.add_char('\x0C') - Some('n') => buf.add_char('\n') - Some('r') => buf.add_char('\r') - Some('t') => buf.add_char('\t') - Some('"') => buf.add_char('"') - Some('\\') => buf.add_char('\\') - Some('/') => buf.add_char('/') + Some('b') => buf.write_char('\b') + Some('f') => buf.write_char('\x0C') + Some('n') => buf.write_char('\n') + Some('r') => buf.write_char('\r') + Some('t') => buf.write_char('\t') + Some('"') => buf.write_char('"') + Some('\\') => buf.write_char('\\') + Some('/') => buf.write_char('/') Some('u') => { let c = lex_hex_digits!(ctx, 4) - buf.add_char(Char::from_int(c)) + buf.write_char(Char::from_int(c)) } Some(_) => invalid_char!(ctx, shift=-1) None => raise InvalidEof