Added Rust regex benchmark. Fixes issue #9.

This commit is contained in:
Chad Retz
2017-12-06 03:13:52 -06:00
parent 1418ba86cb
commit eaf4137c67
7 changed files with 266 additions and 10 deletions

View File

@ -0,0 +1,61 @@
package asmble.examples.rustregex;
import org.openjdk.jmh.annotations.*;
import java.io.IOException;
@State(Scope.Thread)
public class RegexBenchmark {
@Param({
"Twain",
"(?i)Twain",
"[a-z]shing",
"Huck[a-zA-Z]+|Saw[a-zA-Z]+",
"\\b\\w+nn\\b",
// Too slow
// "[a-q][^u-z]{13}x",
"Tom|Sawyer|Huckleberry|Finn",
"(?i)Tom|Sawyer|Huckleberry|Finn",
".{0,2}(Tom|Sawyer|Huckleberry|Finn)",
".{2,4}(Tom|Sawyer|Huckleberry|Finn)",
"Tom.{10,25}river|river.{10,25}Tom",
"[a-zA-Z]+ing",
"\\s[a-zA-Z]{0,12}ing\\s",
"([A-Za-z]awyer|[A-Za-z]inn)\\s",
"[\"'][^\"']{0,30}[?!\\.][\"']",
"\u221E|\u2713",
"\\p{Sm}"
})
private String patternString;
private String twainString;
private JavaLib javaLib;
private JavaLib.JavaPattern precompiledJavaPattern;
private RustLib rustLib;
private RustLib.Ptr preparedRustTarget;
private RustLib.RustPattern precompiledRustPattern;
@Setup
public void init() throws IOException {
// JMH is not handling this right, so we replace inline
if ("?|?".equals(patternString)) {
patternString = "\u221E|\u2713";
}
twainString = Main.loadTwainText();
javaLib = new JavaLib();
precompiledJavaPattern = javaLib.compile(patternString);
rustLib = new RustLib();
preparedRustTarget = rustLib.prepareTarget(twainString);
precompiledRustPattern = rustLib.compile(patternString);
}
@Benchmark
public void javaRegexCheck() {
precompiledJavaPattern.matchCount(twainString);
}
@Benchmark
public void rustRegexCheck() {
precompiledRustPattern.matchCount(preparedRustTarget);
}
}

View File

@ -7,9 +7,9 @@ import java.nio.charset.StandardCharsets;
public class RustLib implements RegexLib<RustLib.Ptr> {
// 300 pages is good for now
// 600 pages is enough for our use
private static final int PAGE_SIZE = 65536;
private static final int MAX_MEMORY = 300 * PAGE_SIZE;
private static final int MAX_MEMORY = 600 * PAGE_SIZE;
private final RustRegex rustRegex;

View File

@ -0,0 +1,67 @@
package asmble.examples.rustregex;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.io.IOException;
@RunWith(Parameterized.class)
public class RegexTest {
// Too slow to run regularly
private static final String TOO_SLOW = "[a-q][^u-z]{13}x";
@Parameterized.Parameters(name = "pattern: {0}")
public static String[] data() {
return new String[] {
"Twain",
"(?i)Twain",
"[a-z]shing",
"Huck[a-zA-Z]+|Saw[a-zA-Z]+",
"\\b\\w+nn\\b",
"[a-q][^u-z]{13}x",
"Tom|Sawyer|Huckleberry|Finn",
"(?i)Tom|Sawyer|Huckleberry|Finn",
".{0,2}(Tom|Sawyer|Huckleberry|Finn)",
".{2,4}(Tom|Sawyer|Huckleberry|Finn)",
"Tom.{10,25}river|river.{10,25}Tom",
"[a-zA-Z]+ing",
"\\s[a-zA-Z]{0,12}ing\\s",
"([A-Za-z]awyer|[A-Za-z]inn)\\s",
"[\"'][^\"']{0,30}[?!\\.][\"']",
"\u221E|\u2713",
"\\p{Sm}"
};
}
private static RustLib rustLib;
private static String twainText;
private static RustLib.Ptr preparedRustTarget;
@BeforeClass
public static void setUpClass() throws IOException {
twainText = Main.loadTwainText();
rustLib = new RustLib();
preparedRustTarget = rustLib.prepareTarget(twainText);
}
private String pattern;
public RegexTest(String pattern) {
this.pattern = pattern;
}
@Test
public void checkJavaVersusRust() {
Assume.assumeFalse("Skipped for being too slow", pattern.equals(TOO_SLOW));
int expected = new JavaLib().compile(pattern).matchCount(twainText);
// System.out.println("Found " + expected + " matches for pattern: " + pattern);
Assert.assertEquals(
expected,
rustLib.compile(pattern).matchCount(preparedRustTarget)
);
}
}