mirror of
https://github.com/fluencelabs/asmble
synced 2025-06-17 08:41:26 +00:00
Added Rust regex benchmark. Fixes issue #9.
This commit is contained in:
@ -0,0 +1,61 @@
|
||||
package asmble.examples.rustregex;
|
||||
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
@State(Scope.Thread)
|
||||
public class RegexBenchmark {
|
||||
@Param({
|
||||
"Twain",
|
||||
"(?i)Twain",
|
||||
"[a-z]shing",
|
||||
"Huck[a-zA-Z]+|Saw[a-zA-Z]+",
|
||||
"\\b\\w+nn\\b",
|
||||
// Too slow
|
||||
// "[a-q][^u-z]{13}x",
|
||||
"Tom|Sawyer|Huckleberry|Finn",
|
||||
"(?i)Tom|Sawyer|Huckleberry|Finn",
|
||||
".{0,2}(Tom|Sawyer|Huckleberry|Finn)",
|
||||
".{2,4}(Tom|Sawyer|Huckleberry|Finn)",
|
||||
"Tom.{10,25}river|river.{10,25}Tom",
|
||||
"[a-zA-Z]+ing",
|
||||
"\\s[a-zA-Z]{0,12}ing\\s",
|
||||
"([A-Za-z]awyer|[A-Za-z]inn)\\s",
|
||||
"[\"'][^\"']{0,30}[?!\\.][\"']",
|
||||
"\u221E|\u2713",
|
||||
"\\p{Sm}"
|
||||
})
|
||||
private String patternString;
|
||||
|
||||
private String twainString;
|
||||
private JavaLib javaLib;
|
||||
private JavaLib.JavaPattern precompiledJavaPattern;
|
||||
private RustLib rustLib;
|
||||
private RustLib.Ptr preparedRustTarget;
|
||||
private RustLib.RustPattern precompiledRustPattern;
|
||||
|
||||
@Setup
|
||||
public void init() throws IOException {
|
||||
// JMH is not handling this right, so we replace inline
|
||||
if ("?|?".equals(patternString)) {
|
||||
patternString = "\u221E|\u2713";
|
||||
}
|
||||
twainString = Main.loadTwainText();
|
||||
javaLib = new JavaLib();
|
||||
precompiledJavaPattern = javaLib.compile(patternString);
|
||||
rustLib = new RustLib();
|
||||
preparedRustTarget = rustLib.prepareTarget(twainString);
|
||||
precompiledRustPattern = rustLib.compile(patternString);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void javaRegexCheck() {
|
||||
precompiledJavaPattern.matchCount(twainString);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void rustRegexCheck() {
|
||||
precompiledRustPattern.matchCount(preparedRustTarget);
|
||||
}
|
||||
}
|
@ -7,9 +7,9 @@ import java.nio.charset.StandardCharsets;
|
||||
|
||||
public class RustLib implements RegexLib<RustLib.Ptr> {
|
||||
|
||||
// 300 pages is good for now
|
||||
// 600 pages is enough for our use
|
||||
private static final int PAGE_SIZE = 65536;
|
||||
private static final int MAX_MEMORY = 300 * PAGE_SIZE;
|
||||
private static final int MAX_MEMORY = 600 * PAGE_SIZE;
|
||||
|
||||
private final RustRegex rustRegex;
|
||||
|
||||
|
@ -0,0 +1,67 @@
|
||||
package asmble.examples.rustregex;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Assume;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
@RunWith(Parameterized.class)
|
||||
public class RegexTest {
|
||||
// Too slow to run regularly
|
||||
private static final String TOO_SLOW = "[a-q][^u-z]{13}x";
|
||||
|
||||
@Parameterized.Parameters(name = "pattern: {0}")
|
||||
public static String[] data() {
|
||||
return new String[] {
|
||||
"Twain",
|
||||
"(?i)Twain",
|
||||
"[a-z]shing",
|
||||
"Huck[a-zA-Z]+|Saw[a-zA-Z]+",
|
||||
"\\b\\w+nn\\b",
|
||||
"[a-q][^u-z]{13}x",
|
||||
"Tom|Sawyer|Huckleberry|Finn",
|
||||
"(?i)Tom|Sawyer|Huckleberry|Finn",
|
||||
".{0,2}(Tom|Sawyer|Huckleberry|Finn)",
|
||||
".{2,4}(Tom|Sawyer|Huckleberry|Finn)",
|
||||
"Tom.{10,25}river|river.{10,25}Tom",
|
||||
"[a-zA-Z]+ing",
|
||||
"\\s[a-zA-Z]{0,12}ing\\s",
|
||||
"([A-Za-z]awyer|[A-Za-z]inn)\\s",
|
||||
"[\"'][^\"']{0,30}[?!\\.][\"']",
|
||||
"\u221E|\u2713",
|
||||
"\\p{Sm}"
|
||||
};
|
||||
}
|
||||
|
||||
private static RustLib rustLib;
|
||||
private static String twainText;
|
||||
private static RustLib.Ptr preparedRustTarget;
|
||||
|
||||
@BeforeClass
|
||||
public static void setUpClass() throws IOException {
|
||||
twainText = Main.loadTwainText();
|
||||
rustLib = new RustLib();
|
||||
preparedRustTarget = rustLib.prepareTarget(twainText);
|
||||
}
|
||||
|
||||
private String pattern;
|
||||
|
||||
public RegexTest(String pattern) {
|
||||
this.pattern = pattern;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void checkJavaVersusRust() {
|
||||
Assume.assumeFalse("Skipped for being too slow", pattern.equals(TOO_SLOW));
|
||||
int expected = new JavaLib().compile(pattern).matchCount(twainText);
|
||||
// System.out.println("Found " + expected + " matches for pattern: " + pattern);
|
||||
Assert.assertEquals(
|
||||
expected,
|
||||
rustLib.compile(pattern).matchCount(preparedRustTarget)
|
||||
);
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user