Loading...
Loading...
Compare original and translation side by side
TextTextBlobsize()Nat32Nat64NatTextText.fromChar#Blobsize()Nat32Nat64NatTextText.fromChar#Blob[Nat8]TextBlob[Nat8]TextTextBlob[Nat8]TextBlob[Nat8]Nat8Nat16Nat32Nat64+%Nat8Nat16Nat32Nat64+%let bytes = Blob.toArray(data)Blobdata[i]data.size()Nat64Nat8 -> Natb.toNat16().toNat32()<<>>&|Nat32Nat64+%Nat64CharText[Nat8]Text.fromChar#BlobText.decodeUtf8#Prim.trap("…")next_i <= szlet bytes = Blob.toArray(data)Blobdata[i]data.size()Nat64Nat8Natb.toNat16().toNat32()Nat32Nat64<<>>&|+%Nat64CharText[Nat8]Text.fromChar#BlobText.decodeUtf8#Prim.trap("…")next_i <= sz// Before
let bytes = Blob.toArray(data);
var i = 0;
let b1 = bytes[i];
// After
let sz = Nat64.fromIntWrap(data.size());
var i : Nat64 = 0;
let b1 = data[i.toNat()];// Before (goes through arbitrary-precision Nat)
let n = (Nat32.fromNat(Nat8.toNat(b1)) << 16)
| (Nat32.fromNat(Nat8.toNat(b2)) << 8)
| Nat32.fromNat(Nat8.toNat(b3));
// After (fixed-width path)
let n = (b1.toNat16().toNat32() << 16)
| (b2.toNat16().toNat32() << 8)
| b3.toNat16().toNat32();CharText[Nat8]// Before
private let alphabet : [Char] = ['A', 'B', /*…*/, '/'];
let c1 = Text.fromChar(alphabet[idx1]);
let c2 = Text.fromChar(alphabet[idx2]);
result #= c1 # c2 # c3 # c4; // many small concats
// After
private let alphabet : [Nat8] = [65, 66, /*…*/, 47]; // ASCII bytes
let bytes = Blob.fromArray([
alphabet[idx1], alphabet[idx2], alphabet[idx3], alphabet[idx4]
]);
switch (Text.decodeUtf8(bytes)) {
case (?t) { result := result # t } // one append per block
case (_) { Prim.trap("Cannot happen: Utf8 decode error …") }
};// Example: main loop over full blocks, then a small tail path
var i : Nat64 = 0;
var next_i : Nat64 = block; // e.g., 3, 6, etc.
while (next_i <= sz) {
// read <block> bytes, produce <k> output chars
i := next_i; next_i +%= block;
};
while (i < sz) {
// read remaining bytes (tail), produce padded output as needed
i +%= tailStep;
};// Before
let bytes = Blob.toArray(data);
var i = 0;
let b1 = bytes[i];
// After
let sz = Nat64.fromIntWrap(data.size());
var i : Nat64 = 0;
let b1 = data[i.toNat()];// Before (goes through arbitrary-precision Nat)
let n = (Nat32.fromNat(Nat8.toNat(b1)) << 16)
| (Nat32.fromNat(Nat8.toNat(b2)) << 8)
| Nat32.fromNat(Nat8.toNat(b3));
// After (fixed-width path)
let n = (b1.toNat16().toNat32() << 16)
| (b2.toNat16().toNat32() << 8)
| b3.toNat16().toNat32();[Nat8]CharText// Before
private let alphabet : [Char] = ['A', 'B', /*…*/, '/'];
let c1 = Text.fromChar(alphabet[idx1]);
let c2 = Text.fromChar(alphabet[idx2]);
result #= c1 # c2 # c3 # c4; // many small concats
// After
private let alphabet : [Nat8] = [65, 66, /*…*/, 47]; // ASCII bytes
let bytes = Blob.fromArray([
alphabet[idx1], alphabet[idx2], alphabet[idx3], alphabet[idx4]
]);
switch (Text.decodeUtf8(bytes)) {
case (?t) { result := result # t } // one append per block
case (_) { Prim.trap("Cannot happen: Utf8 decode error …") }
};// Example: main loop over full blocks, then a small tail path
var i : Nat64 = 0;
var next_i : Nat64 = block; // e.g., 3, 6, etc.
while (next_i <= sz) {
// read <block> bytes, produce <k> output chars
i := next_i; next_i +%= block;
};
while (i < sz) {
// read remaining bytes (tail), produce padded output as needed
i +%= tailStep;
};Blob.toArrayBlobNat64+%CharText[Nat8]Blob.fromArrayText.decodeUtf8Nat16/Nat32/Nat64NatPrim.trapBlob.toArrayBlobNat64+%CharText[Nat8]Blob.fromArrayText.decodeUtf8Nat16/Nat32/Nat64NatPrim.trapNatNat32Nat64Nat8 -> Nat16 -> Nat32TextText.fromChar#BlobText.decodeUtf8Blob[Nat8]Blobsize()Nat64+%next_i <= szNatNat32Nat64Nat8 -> Nat16 -> Nat32TextText.fromChar#BlobText.decodeUtf8Blob[Nat8]Blobsize()Nat64+%next_i <= sz+%Nat64Text.decodeUtf8=Prim.trapBlob.toArray+%Nat64Text.decodeUtf8=Prim.trapBlob.toArray[Nat8]data.size()Nat64Nat8Nat32Nat16+%#c1..c4decodeUtf8[Nat8]data.size()Nat64Nat8Nat16Nat32+%#c1..c4decodeUtf8TextTextTextTextNat32Nat64writeupdatesumfinalizesrc/Ripemd160.moNat32Nat64writeupdatesumfinalizesrc/Ripemd160.moresearch-ag/sha2src/Ripemd160.moresearch-ag/sha2src/Ripemd160.mo[var Nat32][var Nat64][var Nat32][var Nat64]// Persistent chaining state, single allocation reused across all blocks.
private let s : [var Nat32] = VarArray.repeat<Nat32>(0, 5);varNat16// 持久化链状态,单次分配可在所有块中复用。
private let s : [var Nat32] = VarArray.repeat<Nat32>(0, 5);varNat16[var Nat32][var Nat8][var Nat32][var Nat8]// 16 little-endian (or big-endian) words for the current block.
// Bytes are folded in at write time; transform() reads words directly.
private let msg : [var Nat32] = VarArray.repeat<Nat32>(0, 16);// AVOID for hot-path block hashing
private let buf : [var Nat8] = ...; // 64-byte buffer
transform(buf.toArray(), 0); // allocates a 64-byte [Nat8] PER BLOCK
// transform then re-decodes 4 bytes → Nat32 word internally[var Nat32]toArray()transform()// 当前块的16个小端(或大端)字。
// 字节在写入时折叠;transform()直接读取字。
private let msg : [var Nat32] = VarArray.repeat<Nat32>(0, 16);// 热点路径块哈希应避免使用此模式
private let buf : [var Nat8] = ...; // 64字节缓冲区
transform(buf.toArray(), 0); // 每个块分配一个64字节的[Nat8]
// transform随后在内部重新解码4字节→Nat32字[var Nat32]toArray()transform()Nat16Nat16// 0..63 byte position within the current block.
// Nat16 is unboxed in mutable storage; Nat is heap-allocated per increment.
private var i_msg : Nat16 = 0;Nat16Nat8var x : Nat = 0Nat64// 当前块内的0..63字节位置。
// Nat16在可变存储中是未装箱的;Nat每次递增都会在堆上分配。
private var i_msg : Nat16 = 0;Nat16Nat8var x : Nat = 0Nat64writeBytewriteByteprivate func writeByte(b : Nat8) {
let pos = i_msg;
let wi = Nat16.toNat(pos >> 2);
let lane = pos & 0x3;
let v : Nat32 = Nat32.fromNat16(b.toNat16()) << Nat32.fromNat16(lane << 3);
if (lane == 0) { msg[wi] := v } // first byte: overwrite stale word
else { msg[wi] := msg[wi] | v }; // subsequent bytes: OR in
let next = pos +% 1;
if (next == 64) { transform(); n_blocks +%= 1; i_msg := 0 }
else { i_msg := next };
};(3 - lane) << 3private func writeByte(b : Nat8) {
let pos = i_msg;
let wi = Nat16.toNat(pos >> 2);
let lane = pos & 0x3;
let v : Nat32 = Nat32.fromNat16(b.toNat16()) << Nat32.fromNat16(lane << 3);
if (lane == 0) { msg[wi] := v } // 第一个字节:覆盖旧字
else { msg[wi] := msg[wi] | v }; // 后续字节:按位或
let next = pos +% 1;
if (next == 64) { transform(); n_blocks +%= 1; i_msg := 0 }
else { i_msg := next };
};(3 - lane) << 3writewritepublic func write(data : [Nat8]) {
let n = data.size();
if (n == 0) return;
var i = 0;
// (1) Finish any partial block one byte at a time.
while (i_msg != 0 and i < n) { writeByte(data[i]); i += 1 };
// (2) Fast path: decode 16 LE words inline directly from input → msg.
while (i + 64 <= n) {
msg[0] := data[i].toNat16().toNat32()
| (data[i+1].toNat16().toNat32() << 8)
| (data[i+2].toNat16().toNat32() << 16)
| (data[i+3].toNat16().toNat32() << 24);
// ... msg[1] .. msg[15] (15 more identical lines)
transform();
n_blocks +%= 1;
i += 64;
};
// (3) Tail: remaining < 64 bytes go into the partial block.
while (i < n) { writeByte(data[i]); i += 1 };
};public func write(data : [Nat8]) {
let n = data.size();
if (n == 0) return;
var i = 0;
// (1) 逐个字节完成部分块。
while (i_msg != 0 and i < n) { writeByte(data[i]); i += 1 };
// (2) 快速路径:直接从输入将16个小端字解码到msg中。
while (i + 64 <= n) {
msg[0] := data[i].toNat16().toNat32()
| (data[i+1].toNat16().toNat32() << 8)
| (data[i+2].toNat16().toNat32() << 16)
| (data[i+3].toNat16().toNat32() << 24);
// ... msg[1] .. msg[15](另外15行相同代码)
transform();
n_blocks +%= 1;
i += 64;
};
// (3) 剩余数据:少于64字节的部分进入部分块。
while (i < n) { writeByte(data[i]); i += 1 };
};transform()transform()// AVOID: each call allocates a (Nat32, Nat32) tuple — 320 tuples per block
let (a, c) = r11(a, b, c, d, e, msg, 0, 11);
let (e, b) = r11(e, a, b, c, d, msg, 1, 14);
// ... 158 morevar a1 : Nat32 = s[0]; var b1 : Nat32 = s[1]; /* ... */
// Left line round 1: f1(b,c,d) = b ^ c ^ d, K = 0
a1 := rol(a1 +% (b1 ^ c1 ^ d1) +% w0, 11) +% e1; c1 := rol(c1, 10);
e1 := rol(e1 +% (a1 ^ b1 ^ c1) +% w1, 14) +% d1; b1 := rol(b1, 10);
// ... 158 more, with K and rotation amounts varying per round
// Combine back to s without allocating temporaries
let t = s[0];
s[0] := s[1] +% c1 +% d2;
// ...fK// 避免使用:每次调用都会分配一个(Nat32, Nat32)元组——每个块320个元组
let (a, c) = r11(a, b, c, d, e, msg, 0, 11);
let (e, b) = r11(e, a, b, c, d, msg, 1, 14);
// ... 另外158行var a1 : Nat32 = s[0]; var b1 : Nat32 = s[1]; /* ... */
// 左线路轮次1:f1(b,c,d) = b ^ c ^ d, K = 0
a1 := rol(a1 +% (b1 ^ c1 ^ d1) +% w0, 11) +% e1; c1 := rol(c1, 10);
e1 := rol(e1 +% (a1 ^ b1 ^ c1) +% w1, 14) +% d1; b1 := rol(b1, 10);
// ... 另外158行,每轮的K和旋转量不同
// 将结果合并回s,不分配临时变量
let t = s[0];
s[0] := s[1] +% c1 +% d2;
// ...fKNat64Nat8Nat64Nat8Nat8.fromNat(Nat64.toNat(x & 0xff))Nat// Stage all narrowing on fixed-width types — no Nat allocation.
private func lowByte64(v : Nat64) : Nat8 {
Nat8.fromNat16(Nat16.fromNat32(Nat32.fromNat64(v & 0xff)));
};Nat8.fromNat(Nat64.toNat(x & 0xff))Nat// 在固定宽度类型上分阶段缩窄——不分配Nat。
private func lowByte64(v : Nat64) : Nat8 {
Nat8.fromNat16(Nat16.fromNat32(Nat32.fromNat64(v & 0xff)));
};writeByte[var Nat8]writeByte[var Nat8]public func sum() : [Nat8] {
let bitlen : Nat64 = ((n_blocks << 6) +% Nat64.fromNat(Nat16.toNat(i_msg))) << 3;
writeByte(0x80);
while (i_msg != 56) { writeByte(0) };
writeByte(lowByte64(bitlen));
writeByte(lowByte64(bitlen >> 8));
// ... 6 more length bytes; the 8th triggers transform()
// serialize state to 20/32 output bytes
};pad : [var Nat8]sizedesc : [var Nat8].toArray()public func sum() : [Nat8] {
let bitlen : Nat64 = ((n_blocks << 6) +% Nat64.fromNat(Nat16.toNat(i_msg))) << 3;
writeByte(0x80);
while (i_msg != 56) { writeByte(0) };
writeByte(lowByte64(bitlen));
writeByte(lowByte64(bitlen >> 8));
// ... 另外6个长度字节;第8个会触发transform()
// 将状态序列化为20/32输出字节
};pad : [var Nat8]sizedesc : [var Nat8].toArray()var counter : Nat = 0Nat16Nat64(Nat32, Nat32)buf.toArray()transformpadsizedescsumfinalizeCommon.readLE32(arr, i)[var Nat32]Natvar counter : Nat = 0Nat16Nat64(Nat32, Nat32)buf.toArray()transformsumfinalizepadsizedesc[var Nat32]Common.readLE32(arr, i)Nat^&|+%<<>>a1 := rol(a1 +% ((b1 & c1) | (^b1 & d1)) +% w0 +% 0x5A827999, 11) +% e1;^b1 & d1(^b1) & d1^&|+%<<>>a1 := rol(a1 +% ((b1 & c1) | (^b1 & d1)) +% w0 +% 0x5A827999, 11) +% e1;^b1 & d1(^b1) & d1msg : [var Nat32]< 16for r in roundstransform()msg : [var Nat32]for r in roundstransform()| Metric | Before | After | Speedup |
|---|---|---|---|
| Instructions | 1,470,125 | 734,849 | 2.0× |
| GC traffic | 160.28 KiB | 43.88 KiB | 3.65× |
| Heap (steady) | 272 B | 272 B | 1.0× |
| 指标 | 优化前 | 优化后 | 加速比 |
|---|---|---|---|
| 指令数 | 1,470,125 | 734,849 | 2.0× |
| GC流量 | 160.28 KiB | 43.88 KiB | 3.65× |
| 堆内存(稳定) | 272 B | 272 B | 1.0× |