mirror of
https://github.com/fluencelabs/asmble
synced 2025-06-18 17:21:23 +00:00
Added Rust regex benchmark. Fixes issue #9.
This commit is contained in:
@ -32,7 +32,7 @@ The result will be:
|
|||||||
|
|
||||||
70 : i32
|
70 : i32
|
||||||
|
|
||||||
Which is how the test harness prints an integer.
|
Which is how the test harness prints an integer. See the [examples](examples) directory for more examples.
|
||||||
|
|
||||||
## CLI Usage
|
## CLI Usage
|
||||||
|
|
||||||
|
13
build.gradle
13
build.gradle
@ -13,7 +13,7 @@ buildscript {
|
|||||||
}
|
}
|
||||||
dependencies {
|
dependencies {
|
||||||
classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlin_version"
|
classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlin_version"
|
||||||
classpath 'me.champeau.gradle:jmh-gradle-plugin:0.4.4'
|
classpath 'me.champeau.gradle:jmh-gradle-plugin:0.4.5'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -109,14 +109,25 @@ project(':examples') {
|
|||||||
|
|
||||||
project(':examples:rust-regex') {
|
project(':examples:rust-regex') {
|
||||||
apply plugin: 'application'
|
apply plugin: 'application'
|
||||||
|
apply plugin: 'me.champeau.gradle.jmh'
|
||||||
ext.wasmCompiledClassName = 'asmble.generated.RustRegex'
|
ext.wasmCompiledClassName = 'asmble.generated.RustRegex'
|
||||||
dependencies {
|
dependencies {
|
||||||
compile files('build/wasm-classes')
|
compile files('build/wasm-classes')
|
||||||
|
testCompile 'junit:junit:4.12'
|
||||||
}
|
}
|
||||||
compileJava {
|
compileJava {
|
||||||
dependsOn compileRustWasm
|
dependsOn compileRustWasm
|
||||||
}
|
}
|
||||||
mainClassName = 'asmble.examples.rustregex.Main'
|
mainClassName = 'asmble.examples.rustregex.Main'
|
||||||
|
test {
|
||||||
|
testLogging.showStandardStreams = true
|
||||||
|
testLogging.events 'PASSED', 'SKIPPED'
|
||||||
|
}
|
||||||
|
jmh {
|
||||||
|
iterations = 5
|
||||||
|
warmupIterations = 5
|
||||||
|
fork = 3
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
project(':examples:rust-simple') {
|
project(':examples:rust-simple') {
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
## Examples
|
## Examples
|
||||||
|
|
||||||
|
Below are some examples.
|
||||||
|
|
||||||
### Rust
|
### Rust
|
||||||
|
|
||||||
In order of complexity:
|
Compile Rust to WASM and then to the JVM. In order of complexity:
|
||||||
|
|
||||||
* [rust-simple](rust-simple)
|
* [rust-simple](rust-simple)
|
||||||
* [rust-string](rust-string)
|
* [rust-string](rust-string)
|
||||||
* rust-regex
|
* [rust-regex](rust-regex)
|
@ -1,8 +1,8 @@
|
|||||||
### Example: Rust Regex
|
### Example: Rust Regex
|
||||||
|
|
||||||
This shows an example of using the Rust regex library on the JVM. This builds on [rust-simple](../rust-simple) and
|
This shows an example of using the Rust regex library on the JVM compiled via WASM. This builds on
|
||||||
[rust-string](../rust-string). There is also a simple benchmark checking the performance compared to the built-in Java
|
the [rust-simple](../rust-simple) and [rust-string](../rust-string) examples. There is also a simple benchmark checking
|
||||||
regex engine.
|
the performance compared to the built-in Java regex engine.
|
||||||
|
|
||||||
#### Main
|
#### Main
|
||||||
|
|
||||||
@ -20,6 +20,121 @@ In release mode, the generated class is 903KB w/ ~575 methods. The output:
|
|||||||
'Twain' count in Java: 811
|
'Twain' count in Java: 811
|
||||||
'Twain' count in Rust: 811
|
'Twain' count in Rust: 811
|
||||||
|
|
||||||
|
#### Tests
|
||||||
|
|
||||||
|
I wanted to compare the Java regex engine with the Rust regex engine. Before running benchmarks, I wrote a
|
||||||
|
[unit test](src/test/java/asmble/examples/rustregex/RegexTest.java) to test parity. I used the examples from the
|
||||||
|
aforementioned [blog post](https://rust-leipzig.github.io/regex/2017/03/28/comparison-of-regex-engines/) to test with.
|
||||||
|
The test simply confirms the Java regex library and the Rust regex library produce the same match counts across the
|
||||||
|
Mark Twain corpus. To run the test, execute:
|
||||||
|
|
||||||
|
gradlew --no-daemon :examples:rust-regex:test
|
||||||
|
|
||||||
|
Here is my output of the test part:
|
||||||
|
|
||||||
|
asmble.examples.rustregex.RegexTest > checkJavaVersusRust[pattern: Twain] PASSED
|
||||||
|
asmble.examples.rustregex.RegexTest > checkJavaVersusRust[pattern: (?i)Twain] PASSED
|
||||||
|
asmble.examples.rustregex.RegexTest > checkJavaVersusRust[pattern: [a-z]shing] PASSED
|
||||||
|
asmble.examples.rustregex.RegexTest > checkJavaVersusRust[pattern: Huck[a-zA-Z]+|Saw[a-zA-Z]+] PASSED
|
||||||
|
asmble.examples.rustregex.RegexTest > checkJavaVersusRust[pattern: \b\w+nn\b] PASSED
|
||||||
|
asmble.examples.rustregex.RegexTest > checkJavaVersusRust[pattern: [a-q][^u-z]{13}x] SKIPPED
|
||||||
|
asmble.examples.rustregex.RegexTest > checkJavaVersusRust[pattern: Tom|Sawyer|Huckleberry|Finn] PASSED
|
||||||
|
asmble.examples.rustregex.RegexTest > checkJavaVersusRust[pattern: (?i)Tom|Sawyer|Huckleberry|Finn] PASSED
|
||||||
|
asmble.examples.rustregex.RegexTest > checkJavaVersusRust[pattern: .{0,2}(Tom|Sawyer|Huckleberry|Finn)] PASSED
|
||||||
|
asmble.examples.rustregex.RegexTest > checkJavaVersusRust[pattern: .{2,4}(Tom|Sawyer|Huckleberry|Finn)] PASSED
|
||||||
|
asmble.examples.rustregex.RegexTest > checkJavaVersusRust[pattern: Tom.{10,25}river|river.{10,25}Tom] PASSED
|
||||||
|
asmble.examples.rustregex.RegexTest > checkJavaVersusRust[pattern: [a-zA-Z]+ing] PASSED
|
||||||
|
asmble.examples.rustregex.RegexTest > checkJavaVersusRust[pattern: \s[a-zA-Z]{0,12}ing\s] PASSED
|
||||||
|
asmble.examples.rustregex.RegexTest > checkJavaVersusRust[pattern: ([A-Za-z]awyer|[A-Za-z]inn)\s] PASSED
|
||||||
|
asmble.examples.rustregex.RegexTest > checkJavaVersusRust[pattern: ["'][^"']{0,30}[?!\.]["']] PASSED
|
||||||
|
asmble.examples.rustregex.RegexTest > checkJavaVersusRust[pattern: ?|?] PASSED
|
||||||
|
asmble.examples.rustregex.RegexTest > checkJavaVersusRust[pattern: \p{Sm}] PASSED
|
||||||
|
|
||||||
|
As mentioned in the blog post, `[a-q][^u-z]{13}x` is a very slow pattern for Rust, so I skipped it (but it does produce
|
||||||
|
the same count if you're willing to wait a couple of minutes). Also, `?|?` is actually `∞|✓`, it's just not printable
|
||||||
|
unicode in the text output I used.
|
||||||
|
|
||||||
#### Benchmarks
|
#### Benchmarks
|
||||||
|
|
||||||
TODO: JMH benchmarks
|
With the accuracy confirmed, now was time to benchmark the two engines. I wrote a
|
||||||
|
[JMH benchmark](src/jmh/java/asmble/examples/rustregex/RegexBenchmark.java) to test the same patterns as the unit test
|
||||||
|
checks. It precompiles the patterns and preloads the target string on the Rust side before checking simple match count.
|
||||||
|
As with any benchmarks, this is just my empirical data and everyone else's will be different. To run the benchmark,
|
||||||
|
execute (it takes a while to run):
|
||||||
|
|
||||||
|
gradlew --no-daemon :examples:rust-regex:jmh
|
||||||
|
|
||||||
|
Here are my results (reordered and with added linebreaks for readability, higher score is better):
|
||||||
|
|
||||||
|
Benchmark (patternString) Mode Cnt Score Error Units
|
||||||
|
|
||||||
|
RegexBenchmark.javaRegexCheck Twain thrpt 15 29.756 ± 1.169 ops/s
|
||||||
|
RegexBenchmark.rustRegexCheck Twain thrpt 15 55.012 ± 0.677 ops/s
|
||||||
|
|
||||||
|
RegexBenchmark.javaRegexCheck (?i)Twain thrpt 15 6.181 ± 0.560 ops/s
|
||||||
|
RegexBenchmark.rustRegexCheck (?i)Twain thrpt 15 1.333 ± 0.029 ops/s
|
||||||
|
|
||||||
|
RegexBenchmark.javaRegexCheck [a-z]shing thrpt 15 6.138 ± 0.937 ops/s
|
||||||
|
RegexBenchmark.rustRegexCheck [a-z]shing thrpt 15 12.352 ± 0.103 ops/s
|
||||||
|
|
||||||
|
RegexBenchmark.javaRegexCheck Huck[a-zA-Z]+|Saw[a-zA-Z]+ thrpt 15 4.774 ± 0.330 ops/s
|
||||||
|
RegexBenchmark.rustRegexCheck Huck[a-zA-Z]+|Saw[a-zA-Z]+ thrpt 15 56.079 ± 0.487 ops/s
|
||||||
|
|
||||||
|
RegexBenchmark.javaRegexCheck \b\w+nn\b thrpt 15 2.703 ± 0.086 ops/s
|
||||||
|
RegexBenchmark.rustRegexCheck \b\w+nn\b thrpt 15 0.131 ± 0.001 ops/s
|
||||||
|
|
||||||
|
RegexBenchmark.javaRegexCheck Tom|Sawyer|Huckleberry|Finn thrpt 15 2.633 ± 0.033 ops/s
|
||||||
|
RegexBenchmark.rustRegexCheck Tom|Sawyer|Huckleberry|Finn thrpt 15 14.388 ± 0.138 ops/s
|
||||||
|
|
||||||
|
RegexBenchmark.javaRegexCheck (?i)Tom|Sawyer|Huckleberry|Finn thrpt 15 3.178 ± 0.045 ops/s
|
||||||
|
RegexBenchmark.rustRegexCheck (?i)Tom|Sawyer|Huckleberry|Finn thrpt 15 8.882 ± 0.110 ops/s
|
||||||
|
|
||||||
|
RegexBenchmark.javaRegexCheck .{0,2}(Tom|Sawyer|Huckleberry|Finn) thrpt 15 1.191 ± 0.010 ops/s
|
||||||
|
RegexBenchmark.rustRegexCheck .{0,2}(Tom|Sawyer|Huckleberry|Finn) thrpt 15 0.572 ± 0.012 ops/s
|
||||||
|
|
||||||
|
RegexBenchmark.javaRegexCheck .{2,4}(Tom|Sawyer|Huckleberry|Finn) thrpt 15 1.017 ± 0.024 ops/s
|
||||||
|
RegexBenchmark.rustRegexCheck .{2,4}(Tom|Sawyer|Huckleberry|Finn) thrpt 15 0.584 ± 0.008 ops/s
|
||||||
|
|
||||||
|
RegexBenchmark.javaRegexCheck Tom.{10,25}river|river.{10,25}Tom thrpt 15 5.326 ± 0.050 ops/s
|
||||||
|
RegexBenchmark.rustRegexCheck Tom.{10,25}river|river.{10,25}Tom thrpt 15 15.705 ± 0.247 ops/s
|
||||||
|
|
||||||
|
RegexBenchmark.javaRegexCheck [a-zA-Z]+ing thrpt 15 1.768 ± 0.057 ops/s
|
||||||
|
RegexBenchmark.rustRegexCheck [a-zA-Z]+ing thrpt 15 1.001 ± 0.012 ops/s
|
||||||
|
|
||||||
|
RegexBenchmark.javaRegexCheck \s[a-zA-Z]{0,12}ing\s thrpt 15 4.020 ± 0.111 ops/s
|
||||||
|
RegexBenchmark.rustRegexCheck \s[a-zA-Z]{0,12}ing\s thrpt 15 0.416 ± 0.004 ops/s
|
||||||
|
|
||||||
|
RegexBenchmark.javaRegexCheck ([A-Za-z]awyer|[A-Za-z]inn)\s thrpt 15 2.441 ± 0.024 ops/s
|
||||||
|
RegexBenchmark.rustRegexCheck ([A-Za-z]awyer|[A-Za-z]inn)\s thrpt 15 0.591 ± 0.004 ops/s
|
||||||
|
|
||||||
|
RegexBenchmark.javaRegexCheck ["'][^"']{0,30}[?!\.]["'] thrpt 15 20.466 ± 0.309 ops/s
|
||||||
|
RegexBenchmark.rustRegexCheck ["'][^"']{0,30}[?!\.]["'] thrpt 15 2.459 ± 0.024 ops/s
|
||||||
|
|
||||||
|
RegexBenchmark.javaRegexCheck ?|? thrpt 15 15.856 ± 0.158 ops/s
|
||||||
|
RegexBenchmark.rustRegexCheck ?|? thrpt 15 14.657 ± 0.177 ops/s
|
||||||
|
|
||||||
|
RegexBenchmark.javaRegexCheck \p{Sm} thrpt 15 22.156 ± 0.406 ops/s
|
||||||
|
RegexBenchmark.rustRegexCheck \p{Sm} thrpt 15 0.592 ± 0.005 ops/s
|
||||||
|
|
||||||
|
To keep from making this a big long post like most benchmark posts tend to be, here is a bulleted list of notes:
|
||||||
|
|
||||||
|
* I ran this on a Win 10 box, 1.8GHz i7-8550U HP laptop. I used latest Zulu JDK 8. For JMH, I set it at 3 forks, 5
|
||||||
|
warmup iterations, and 5 measurement iterations (that's why `cnt` above is 15 = 5 measurements * 3 forks). It took a
|
||||||
|
bit over 25 minutes to complete.
|
||||||
|
* All of the tests had the Java and Rust patterns precompiled. In Rust's case, I also placed the UTF-8 string on the
|
||||||
|
accessible heap before the benchmark started to be fair.
|
||||||
|
* Like the unit test, I excluded `[a-q][^u-z]{13}x` because Rust is really slow at it (Java wins by a mile here). Also
|
||||||
|
like the unit test, `?|?` is actually `∞|✓`.
|
||||||
|
* Of the ones tested, Rust is faster in 6 and Java is faster in the other 10. And where it is faster, it is much faster.
|
||||||
|
This is quite decent since the Rust+WASM version uses `ByteBuffer`s everywhere, has some overflow checks, and in
|
||||||
|
general there are some impedance mismatches with the WASM bytecode and the JVM bytecode.
|
||||||
|
* Notice the low error numbers on the Rust versions. The error number is the deviation between invocations. This shows
|
||||||
|
the WASM-to-JVM ends up quite deterministic (or maybe, that there is just too much cruft to JIT, heh).
|
||||||
|
* If I were more serious about it, I'd check with other OS's, add more iterations, tweak some compiler options, include
|
||||||
|
regex pattern compilation speed benchmarks, and so on. But I just needed simple proof that speed is reasonable.
|
||||||
|
|
||||||
|
Overall, this shows running Rust on the JVM to be entirely reasonable for certain types of workloads. There are still
|
||||||
|
memory concerns, but not terribly. If given the choice, use a JVM language of course; the safety benefits of Rust don't
|
||||||
|
outweigh the problems of Rust-to-WASM-to-JVM such as build complexity, security concerns (`ByteBuffer` is where all
|
||||||
|
memory lives), debugging concerns, etc. But if you have a library in Rust, exposing it to the JVM sans-JNI is a doable
|
||||||
|
feat if you must.
|
@ -0,0 +1,61 @@
|
|||||||
|
package asmble.examples.rustregex;
|
||||||
|
|
||||||
|
import org.openjdk.jmh.annotations.*;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
@State(Scope.Thread)
|
||||||
|
public class RegexBenchmark {
|
||||||
|
@Param({
|
||||||
|
"Twain",
|
||||||
|
"(?i)Twain",
|
||||||
|
"[a-z]shing",
|
||||||
|
"Huck[a-zA-Z]+|Saw[a-zA-Z]+",
|
||||||
|
"\\b\\w+nn\\b",
|
||||||
|
// Too slow
|
||||||
|
// "[a-q][^u-z]{13}x",
|
||||||
|
"Tom|Sawyer|Huckleberry|Finn",
|
||||||
|
"(?i)Tom|Sawyer|Huckleberry|Finn",
|
||||||
|
".{0,2}(Tom|Sawyer|Huckleberry|Finn)",
|
||||||
|
".{2,4}(Tom|Sawyer|Huckleberry|Finn)",
|
||||||
|
"Tom.{10,25}river|river.{10,25}Tom",
|
||||||
|
"[a-zA-Z]+ing",
|
||||||
|
"\\s[a-zA-Z]{0,12}ing\\s",
|
||||||
|
"([A-Za-z]awyer|[A-Za-z]inn)\\s",
|
||||||
|
"[\"'][^\"']{0,30}[?!\\.][\"']",
|
||||||
|
"\u221E|\u2713",
|
||||||
|
"\\p{Sm}"
|
||||||
|
})
|
||||||
|
private String patternString;
|
||||||
|
|
||||||
|
private String twainString;
|
||||||
|
private JavaLib javaLib;
|
||||||
|
private JavaLib.JavaPattern precompiledJavaPattern;
|
||||||
|
private RustLib rustLib;
|
||||||
|
private RustLib.Ptr preparedRustTarget;
|
||||||
|
private RustLib.RustPattern precompiledRustPattern;
|
||||||
|
|
||||||
|
@Setup
|
||||||
|
public void init() throws IOException {
|
||||||
|
// JMH is not handling this right, so we replace inline
|
||||||
|
if ("?|?".equals(patternString)) {
|
||||||
|
patternString = "\u221E|\u2713";
|
||||||
|
}
|
||||||
|
twainString = Main.loadTwainText();
|
||||||
|
javaLib = new JavaLib();
|
||||||
|
precompiledJavaPattern = javaLib.compile(patternString);
|
||||||
|
rustLib = new RustLib();
|
||||||
|
preparedRustTarget = rustLib.prepareTarget(twainString);
|
||||||
|
precompiledRustPattern = rustLib.compile(patternString);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void javaRegexCheck() {
|
||||||
|
precompiledJavaPattern.matchCount(twainString);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void rustRegexCheck() {
|
||||||
|
precompiledRustPattern.matchCount(preparedRustTarget);
|
||||||
|
}
|
||||||
|
}
|
@ -7,9 +7,9 @@ import java.nio.charset.StandardCharsets;
|
|||||||
|
|
||||||
public class RustLib implements RegexLib<RustLib.Ptr> {
|
public class RustLib implements RegexLib<RustLib.Ptr> {
|
||||||
|
|
||||||
// 300 pages is good for now
|
// 600 pages is enough for our use
|
||||||
private static final int PAGE_SIZE = 65536;
|
private static final int PAGE_SIZE = 65536;
|
||||||
private static final int MAX_MEMORY = 300 * PAGE_SIZE;
|
private static final int MAX_MEMORY = 600 * PAGE_SIZE;
|
||||||
|
|
||||||
private final RustRegex rustRegex;
|
private final RustRegex rustRegex;
|
||||||
|
|
||||||
|
@ -0,0 +1,67 @@
|
|||||||
|
package asmble.examples.rustregex;
|
||||||
|
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Assume;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.runner.RunWith;
|
||||||
|
import org.junit.runners.Parameterized;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
@RunWith(Parameterized.class)
|
||||||
|
public class RegexTest {
|
||||||
|
// Too slow to run regularly
|
||||||
|
private static final String TOO_SLOW = "[a-q][^u-z]{13}x";
|
||||||
|
|
||||||
|
@Parameterized.Parameters(name = "pattern: {0}")
|
||||||
|
public static String[] data() {
|
||||||
|
return new String[] {
|
||||||
|
"Twain",
|
||||||
|
"(?i)Twain",
|
||||||
|
"[a-z]shing",
|
||||||
|
"Huck[a-zA-Z]+|Saw[a-zA-Z]+",
|
||||||
|
"\\b\\w+nn\\b",
|
||||||
|
"[a-q][^u-z]{13}x",
|
||||||
|
"Tom|Sawyer|Huckleberry|Finn",
|
||||||
|
"(?i)Tom|Sawyer|Huckleberry|Finn",
|
||||||
|
".{0,2}(Tom|Sawyer|Huckleberry|Finn)",
|
||||||
|
".{2,4}(Tom|Sawyer|Huckleberry|Finn)",
|
||||||
|
"Tom.{10,25}river|river.{10,25}Tom",
|
||||||
|
"[a-zA-Z]+ing",
|
||||||
|
"\\s[a-zA-Z]{0,12}ing\\s",
|
||||||
|
"([A-Za-z]awyer|[A-Za-z]inn)\\s",
|
||||||
|
"[\"'][^\"']{0,30}[?!\\.][\"']",
|
||||||
|
"\u221E|\u2713",
|
||||||
|
"\\p{Sm}"
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static RustLib rustLib;
|
||||||
|
private static String twainText;
|
||||||
|
private static RustLib.Ptr preparedRustTarget;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUpClass() throws IOException {
|
||||||
|
twainText = Main.loadTwainText();
|
||||||
|
rustLib = new RustLib();
|
||||||
|
preparedRustTarget = rustLib.prepareTarget(twainText);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String pattern;
|
||||||
|
|
||||||
|
public RegexTest(String pattern) {
|
||||||
|
this.pattern = pattern;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void checkJavaVersusRust() {
|
||||||
|
Assume.assumeFalse("Skipped for being too slow", pattern.equals(TOO_SLOW));
|
||||||
|
int expected = new JavaLib().compile(pattern).matchCount(twainText);
|
||||||
|
// System.out.println("Found " + expected + " matches for pattern: " + pattern);
|
||||||
|
Assert.assertEquals(
|
||||||
|
expected,
|
||||||
|
rustLib.compile(pattern).matchCount(preparedRustTarget)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
Reference in New Issue
Block a user