mirror of
https://github.com/fluencelabs/asmble
synced 2025-04-24 22:32:19 +00:00
parent
4febf34e69
commit
1418ba86cb
@ -1,3 +1,25 @@
|
||||
### Example: Rust Regex
|
||||
|
||||
TODO: The regex crate does not yet build with wasm32-unknown-unknown
|
||||
This shows an example of using the Rust regex library on the JVM. This builds on [rust-simple](../rust-simple) and
|
||||
[rust-string](../rust-string). There is also a simple benchmark checking the performance compared to the built-in Java
|
||||
regex engine.
|
||||
|
||||
#### Main
|
||||
|
||||
In this version, we include the `regex` crate. The main loads a ~15k text file Project Gutenberg collection of Mark
|
||||
Twain works (taken from [this blog post](https://rust-leipzig.github.io/regex/2017/03/28/comparison-of-regex-engines/)
|
||||
that does Rust regex performance benchmarks). Both the Java and Rust regex engines are abstracted into a common
|
||||
interface. When run, it checks how many times the word "Twain" appears via both regex engines.
|
||||
|
||||
To run it yourself, run the following from the root `asmble` dir:
|
||||
|
||||
gradlew --no-daemon :examples:rust-regex:run
|
||||
|
||||
In release mode, the generated class is 903KB w/ ~575 methods. The output:
|
||||
|
||||
'Twain' count in Java: 811
|
||||
'Twain' count in Rust: 811
|
||||
|
||||
#### Benchmarks
|
||||
|
||||
TODO: JMH benchmarks
|
@ -8,15 +8,31 @@ use std::mem;
|
||||
use std::str;
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn compile_pattern(str_ptr: *mut u8, len: usize) -> *const Regex {
|
||||
pub extern "C" fn compile_pattern(str_ptr: *mut u8, len: usize) -> *mut Regex {
|
||||
unsafe {
|
||||
let bytes = Vec::<u8>::from_raw_parts(str_ptr, len, len);
|
||||
let s = str::from_utf8(&bytes).unwrap();
|
||||
let r = Regex::new(s).unwrap();
|
||||
let raw_r = &r as *const Regex;
|
||||
mem::forget(s);
|
||||
let s = str::from_utf8_unchecked(&bytes);
|
||||
let r = Box::new(Regex::new(s).unwrap());
|
||||
Box::into_raw(r)
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn dispose_pattern(r: *mut Regex) {
|
||||
unsafe {
|
||||
let _r = Box::from_raw(r);
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn match_count(r: *mut Regex, str_ptr: *mut u8, len: usize) -> usize {
|
||||
unsafe {
|
||||
let bytes = Vec::<u8>::from_raw_parts(str_ptr, len, len);
|
||||
let s = str::from_utf8_unchecked(&bytes);
|
||||
let r = Box::from_raw(r);
|
||||
let count = r.find_iter(s).count();
|
||||
mem::forget(r);
|
||||
raw_r
|
||||
count
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3,21 +3,26 @@ package asmble.examples.rustregex;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class JavaLib implements RegexLib {
|
||||
public class JavaLib implements RegexLib<String> {
|
||||
@Override
|
||||
public JavaPattern compile(String str) {
|
||||
return new JavaPattern(str);
|
||||
}
|
||||
|
||||
public static class JavaPattern implements RegexPattern {
|
||||
@Override
|
||||
public String prepareTarget(String target) {
|
||||
return target;
|
||||
}
|
||||
|
||||
final Pattern pattern;
|
||||
public class JavaPattern implements RegexPattern<String> {
|
||||
|
||||
JavaPattern(String pattern) {
|
||||
private final Pattern pattern;
|
||||
|
||||
private JavaPattern(String pattern) {
|
||||
this(Pattern.compile(pattern));
|
||||
}
|
||||
|
||||
JavaPattern(Pattern pattern) {
|
||||
private JavaPattern(Pattern pattern) {
|
||||
this.pattern = pattern;
|
||||
}
|
||||
|
||||
|
@ -7,13 +7,16 @@ import java.nio.charset.StandardCharsets;
|
||||
|
||||
public class Main {
|
||||
|
||||
// 20 pages is good for now
|
||||
private static final int PAGE_SIZE = 65536;
|
||||
private static final int MAX_MEMORY = 20 * PAGE_SIZE;
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String twainText = loadTwainText();
|
||||
System.out.println("Appearances of 'Twain': " + new JavaLib().compile("Twain").matchCount(twainText));
|
||||
String twainString = loadTwainText();
|
||||
System.out.println("'Twain' count in Java: " + matchCount(twainString, "Twain", new JavaLib()));
|
||||
System.out.println("'Twain' count in Rust: " + matchCount(twainString, "Twain", new RustLib()));
|
||||
}
|
||||
|
||||
public static <T> int matchCount(String target, String pattern, RegexLib<T> lib) {
|
||||
RegexLib.RegexPattern<T> compiledPattern = lib.compile(pattern);
|
||||
T preparedTarget = lib.prepareTarget(target);
|
||||
return compiledPattern.matchCount(preparedTarget);
|
||||
}
|
||||
|
||||
public static String loadTwainText() throws IOException {
|
||||
|
@ -1,10 +1,12 @@
|
||||
package asmble.examples.rustregex;
|
||||
|
||||
public interface RegexLib {
|
||||
public interface RegexLib<T> {
|
||||
|
||||
RegexPattern compile(String str);
|
||||
RegexPattern<T> compile(String str);
|
||||
|
||||
interface RegexPattern {
|
||||
int matchCount(String target);
|
||||
T prepareTarget(String target);
|
||||
|
||||
interface RegexPattern<T> {
|
||||
int matchCount(T target);
|
||||
}
|
||||
}
|
||||
|
@ -1,19 +1,83 @@
|
||||
package asmble.examples.rustregex;
|
||||
|
||||
public class RustLib implements RegexLib {
|
||||
import asmble.generated.RustRegex;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
public class RustLib implements RegexLib<RustLib.Ptr> {
|
||||
|
||||
// 300 pages is good for now
|
||||
private static final int PAGE_SIZE = 65536;
|
||||
private static final int MAX_MEMORY = 300 * PAGE_SIZE;
|
||||
|
||||
private final RustRegex rustRegex;
|
||||
|
||||
public RustLib() {
|
||||
rustRegex = new RustRegex(MAX_MEMORY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public RustPattern compile(String str) {
|
||||
// TODO
|
||||
return null;
|
||||
return new RustPattern(str);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Ptr prepareTarget(String target) {
|
||||
return ptrFromString(target);
|
||||
}
|
||||
|
||||
public class RustPattern implements RegexPattern {
|
||||
private Ptr ptrFromString(String str) {
|
||||
byte[] bytes = str.getBytes(StandardCharsets.UTF_8);
|
||||
Ptr ptr = new Ptr(bytes.length);
|
||||
ptr.put(bytes);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
public class RustPattern implements RegexPattern<Ptr> {
|
||||
|
||||
private final int pointer;
|
||||
|
||||
private RustPattern(String pattern) {
|
||||
Ptr ptr = ptrFromString(pattern);
|
||||
pointer = rustRegex.compile_pattern(ptr.offset, ptr.size);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matchCount(String target) {
|
||||
// TODO
|
||||
return 0;
|
||||
protected void finalize() throws Throwable {
|
||||
rustRegex.dispose_pattern(pointer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int matchCount(Ptr target) {
|
||||
return rustRegex.match_count(pointer, target.offset, target.size);
|
||||
}
|
||||
}
|
||||
|
||||
public class Ptr {
|
||||
|
||||
final int offset;
|
||||
final int size;
|
||||
|
||||
Ptr(int offset, int size) {
|
||||
this.offset = offset;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
Ptr(int size) {
|
||||
this(rustRegex.alloc(size), size);
|
||||
}
|
||||
|
||||
void put(byte[] bytes) {
|
||||
// Yeah, yeah, not thread safe
|
||||
ByteBuffer memory = rustRegex.getMemory();
|
||||
memory.position(offset);
|
||||
memory.put(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void finalize() throws Throwable {
|
||||
rustRegex.dealloc(offset, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user