171 lines
5.8 KiB
Plaintext
171 lines
5.8 KiB
Plaintext
/// The `string32` module defines the `String32` type which represents UTF8
|
|
/// encoded strings that are guaranteed to be 32 bytes long, with 0 padding on
|
|
/// the right.
|
|
module token_bridge::string32 {
|
|
|
|
use std::string::{Self, String};
|
|
use std::option;
|
|
use std::vector;
|
|
|
|
use wormhole::cursor::Cursor;
|
|
use wormhole::deserialize;
|
|
use wormhole::serialize;
|
|
|
|
const E_STRING_TOO_LONG: u64 = 0;
|
|
|
|
/// A `String32` holds a ut8 string which is guaranteed to be 32 bytes long.
|
|
struct String32 has copy, drop, store {
|
|
string: String
|
|
}
|
|
|
|
spec String32 {
|
|
invariant string::length(string) == 32;
|
|
}
|
|
|
|
/// Right-pads a `String` to a `String32` with 0 bytes.
|
|
/// Aborts if the string is longer than 32 bytes.
|
|
public fun right_pad(s: &String): String32 {
|
|
let length = string::length(s);
|
|
assert!(length <= 32, E_STRING_TOO_LONG);
|
|
let string = *string::bytes(s);
|
|
let zeros = 32 - length;
|
|
while ({
|
|
spec {
|
|
invariant zeros + vector::length(string) == 32;
|
|
};
|
|
zeros > 0
|
|
}) {
|
|
vector::push_back(&mut string, 0);
|
|
zeros = zeros - 1;
|
|
};
|
|
String32 { string: string::utf8(string) }
|
|
}
|
|
|
|
/// Internal function to take the first 32 bytes of a byte sequence and
|
|
/// convert to a utf8 `String`.
|
|
/// Takes the longest prefix that's valid utf8 and maximum 32 bytes.
|
|
///
|
|
/// Even if the input is valid utf8, the result might be shorter than 32
|
|
/// bytes, because the original string might have a multi-byte utf8
|
|
/// character at the 32 byte boundary, which, when split, results in an
|
|
/// invalid code point, so we remove it.
|
|
fun take(bytes: vector<u8>, n: u64): String {
|
|
while (vector::length(&bytes) > n) {
|
|
vector::pop_back(&mut bytes);
|
|
};
|
|
|
|
let utf8 = string::try_utf8(bytes);
|
|
while (option::is_none(&utf8)) {
|
|
vector::pop_back(&mut bytes);
|
|
utf8 = string::try_utf8(bytes);
|
|
};
|
|
option::extract(&mut utf8)
|
|
}
|
|
|
|
/// Takes the first `n` bytes of a `String`.
|
|
///
|
|
/// Even if the input string is longer than `n`, the resulting string might
|
|
/// be shorter because the original string might have a multi-byte utf8
|
|
/// character at the byte boundary, which, when split, results in an invalid
|
|
/// code point, so we remove it.
|
|
public fun take_utf8(str: String, n: u64): String {
|
|
take(*string::bytes(&str), n)
|
|
}
|
|
|
|
/// Truncates or right-pads a `String` to a `String32`.
|
|
/// Does not abort.
|
|
public fun from_string(s: &String): String32 {
|
|
right_pad(&take(*string::bytes(s), 32))
|
|
}
|
|
|
|
/// Truncates or right-pads a byte vector to a `String32`.
|
|
/// Does not abort.
|
|
public fun from_bytes(b: vector<u8>): String32 {
|
|
right_pad(&take(b, 32))
|
|
}
|
|
|
|
/// Converts `String32` to `String`, removing trailing 0s.
|
|
public fun to_string(s: &String32): String {
|
|
let String32 { string } = s;
|
|
let bytes = *string::bytes(string);
|
|
// keep dropping the last character while it's 0
|
|
while (!vector::is_empty(&bytes) &&
|
|
*vector::borrow(&bytes, vector::length(&bytes) - 1) == 0
|
|
) {
|
|
vector::pop_back(&mut bytes);
|
|
};
|
|
string::utf8(bytes)
|
|
}
|
|
|
|
/// Converts `String32` to a byte vector of length 32.
|
|
public fun to_bytes(s: &String32): vector<u8> {
|
|
*string::bytes(&s.string)
|
|
}
|
|
|
|
public fun deserialize(cur: &mut Cursor<u8>): String32 {
|
|
let bytes = deserialize::deserialize_vector(cur, 32);
|
|
from_bytes(bytes)
|
|
}
|
|
|
|
public fun serialize(buf: &mut vector<u8>, e: String32) {
|
|
serialize::serialize_vector(buf, to_bytes(&e))
|
|
}
|
|
|
|
}
|
|
|
|
#[test_only]
|
|
module token_bridge::string32_test {
|
|
use std::string;
|
|
use std::vector;
|
|
use token_bridge::string32;
|
|
|
|
#[test]
|
|
public fun test_right_pad() {
|
|
let result = string32::right_pad(&string::utf8(b"hello"));
|
|
assert!(string32::to_string(&result) == string::utf8(b"hello"), 0)
|
|
}
|
|
|
|
#[test]
|
|
#[expected_failure(abort_code = string32::E_STRING_TOO_LONG)]
|
|
public fun test_right_pad_fail() {
|
|
let too_long = string::utf8(b"this string is very very very very very very very very very very very very very very very long");
|
|
string32::right_pad(&too_long);
|
|
}
|
|
|
|
#[test]
|
|
public fun test_from_string_short() {
|
|
let result = string32::from_string(&string::utf8(b"hello"));
|
|
assert!(string32::to_string(&result) == string::utf8(b"hello"), 0)
|
|
}
|
|
|
|
#[test]
|
|
public fun test_from_string_long() {
|
|
let long = string32::from_string(&string::utf8(b"this string is very very very very very very very very very very very very very very very long"));
|
|
assert!(string32::to_string(&long) == string::utf8(b"this string is very very very ve"), 0)
|
|
}
|
|
|
|
#[test]
|
|
public fun test_from_string_weird_utf8() {
|
|
let string = b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
|
|
assert!(vector::length(&string) == 31, 0);
|
|
// append the samaritan letter Alaf, a 3-byte utf8 character the move
|
|
// parser only allows ascii characters unfortunately (the character
|
|
// looks nice)
|
|
vector::append(&mut string, x"e0a080");
|
|
// it's valid utf8
|
|
let string = string::utf8(string);
|
|
// string length is bytes, not characters
|
|
assert!(string::length(&string) == 34, 0);
|
|
let padded = string32::from_string(&string);
|
|
// notice that the e0 byte got dropped at the end
|
|
assert!(string32::to_string(&padded) == string::utf8(b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), 0)
|
|
}
|
|
|
|
#[test]
|
|
public fun test_from_bytes_invalid_utf8() {
|
|
// invalid utf8
|
|
let bytes = x"e0a0";
|
|
let result = string::utf8(b"");
|
|
assert!(string32::to_string(&string32::from_bytes(bytes)) == result, 0)
|
|
}
|
|
} |