Don't use JSON for custom section format

This commit migrates away from using Serde for the custom section in
wasm executables. This is a refactoring of a purely-internal data
structure to `wasm-bindgen` and should have no visible functional change
on users.

The motivation for this commit is two fold:

* First, the compile times using `serde_json` and `serde_derive` for the
  syntax extension isn't the most fun.
* Second, eventually we're going to want to stablize the layout of the
  custom section, and it's highly unlikely to be json!

Primarily, though, the intention of this commit is to improve the
cold-cache compile time of `wasm-bindgen` by ensuring that for new users
this project builds as quickly as possible. By removing some heavyweight
dependencies from the procedural macro, `serde`, `serde_derive`, and
`serde_json`, we're able to get a pretty nice build time improvement for
the `wasm-bindgen` crate itself:

|             | single-core build | parallel build |
|-------------|-------------------|----------------|
| master      |             36.5s |          17.3s |
| this commit |             20.5s |          11.8s |

These are't really end-all-be-all wins but they're much better
especially on the spectrum of weaker CPUs (in theory modeled by the
single-core case showing we have 42% less CPU work in theory).
This commit is contained in:
Alex Crichton
2018-08-26 15:43:33 -07:00
parent aac8696d05
commit f749c7cf95
12 changed files with 752 additions and 385 deletions

View File

@ -0,0 +1,147 @@
use std::str;
pub trait Decode<'src>: Sized {
fn decode(data: &mut &'src [u8]) -> Self;
fn decode_all(mut data: &'src [u8]) -> Self {
let ret = Self::decode(&mut data);
assert!(data.len() == 0);
return ret
}
}
fn get<'a>(b: &mut &'a [u8]) -> u8 {
let r = b[0];
*b = &b[1..];
return r
}
impl<'src> Decode<'src> for bool {
fn decode(data: &mut &'src [u8]) -> Self {
get(data) != 0
}
}
impl<'src> Decode<'src> for u32 {
fn decode(data: &mut &'src [u8]) -> Self {
let mut cur = 0;
let mut offset = 0;
loop {
let byte = get(data);
cur |= ((byte & 0x7f) as u32) << offset;
if byte & 0x80 == 0 {
break cur
}
offset += 7;
}
}
}
impl<'src> Decode<'src> for &'src str {
fn decode(data: &mut &'src [u8]) -> &'src str {
let n = u32::decode(data);
let (a, b) = data.split_at(n as usize);
*data = b;
str::from_utf8(a).unwrap()
}
}
impl<'src, T: Decode<'src>> Decode<'src> for Vec<T> {
fn decode(data: &mut &'src [u8]) -> Self {
let n = u32::decode(data);
let mut v = Vec::with_capacity(n as usize);
for _ in 0..n {
v.push(Decode::decode(data));
}
v
}
}
impl<'src, T: Decode<'src>> Decode<'src> for Option<T> {
fn decode(data: &mut &'src [u8]) -> Self {
match get(data) {
0 => None,
1 => Some(Decode::decode(data)),
_ => unreachable!(),
}
}
}
macro_rules! decode_struct {
($name:ident ($($lt:tt)*) $($field:ident: $ty:ty,)*) => {
pub struct $name <$($lt)*> {
$(pub $field: $ty,)*
}
impl <'a> Decode<'a> for $name <$($lt)*> {
fn decode(_data: &mut &'a [u8]) -> Self {
$name {
$($field: Decode::decode(_data),)*
}
}
}
}
}
macro_rules! decode_enum {
($name:ident ($($lt:tt)*) $($fields:tt)*) => (
pub enum $name <$($lt)*> { $($fields)* }
impl <'a> Decode<'a> for $name <$($lt)*> {
fn decode(data: &mut &'a [u8]) -> Self {
use self::$name::*;
decode_enum!(@arms data dst (0) () $($fields)*)
}
}
);
(@arms $data:ident $dst:ident ($cnt:expr) ($($arms:tt)*)) => (
decode_enum!(@expr match get($data) { $($arms)* _ => unreachable!() })
);
(@arms $data:ident $dst:ident ($cnt:expr) ($($arms:tt)*) $name:ident, $($rest:tt)*) => (
decode_enum!(
@arms
$data
$dst
($cnt+1)
($($arms)* n if n == $cnt => $name, )
$($rest)*
)
);
(@arms $data:ident $dst:ident ($cnt:expr) ($($arms:tt)*) $name:ident($t:ty), $($rest:tt)*) => (
decode_enum!(
@arms
$data
$dst
($cnt+1)
($($arms)* n if n == $cnt => $name(Decode::decode($data)), )
$($rest)*
)
);
(@expr $e:expr) => ($e);
}
macro_rules! decode_api {
() => ();
(struct $name:ident<'a> { $($fields:tt)* } $($rest:tt)*) => (
decode_struct!($name ('a) $($fields)*);
decode_api!($($rest)*);
);
(struct $name:ident { $($fields:tt)* } $($rest:tt)*) => (
decode_struct!($name () $($fields)*);
decode_api!($($rest)*);
);
(enum $name:ident<'a> { $($variants:tt)* } $($rest:tt)*) => (
decode_enum!($name ('a) $($variants)*);
decode_api!($($rest)*);
);
(enum $name:ident { $($variants:tt)* } $($rest:tt)*) => (
decode_enum!($name () $($variants)*);
decode_api!($($rest)*);
);
}
shared_api!(decode_api);

View File

@ -2,6 +2,7 @@ use std::collections::{HashMap, HashSet};
use std::fmt::Write;
use std::mem;
use decode;
use failure::{Error, ResultExt};
use parity_wasm::elements::*;
use shared;
@ -25,8 +26,8 @@ pub struct Context<'a> {
pub typescript: String,
pub exposed_globals: HashSet<&'static str>,
pub required_internal_exports: HashSet<&'static str>,
pub imported_functions: HashSet<String>,
pub imported_statics: HashSet<String>,
pub imported_functions: HashSet<&'a str>,
pub imported_statics: HashSet<&'a str>,
pub config: &'a Bindgen,
pub module: &'a mut Module,
@ -37,7 +38,7 @@ pub struct Context<'a> {
/// from, `None` being the global module. The second key is a map of
/// identifiers we've already imported from the module to what they're
/// called locally.
pub imported_names: HashMap<Option<String>, HashMap<String, String>>,
pub imported_names: HashMap<Option<&'a str>, HashMap<String, String>>,
/// A set of all imported identifiers to the number of times they've been
/// imported, used to generate new identifiers.
@ -59,9 +60,9 @@ pub struct ExportedClass {
}
pub struct SubContext<'a, 'b: 'a> {
pub program: &'a shared::Program,
pub program: &'b decode::Program<'b>,
pub cx: &'a mut Context<'b>,
pub vendor_prefixes: HashMap<String, Vec<String>>,
pub vendor_prefixes: HashMap<&'b str, Vec<&'b str>>,
}
const INITIAL_SLAB_VALUES: &[&str] = &["undefined", "null", "true", "false"];
@ -1690,7 +1691,7 @@ impl<'a, 'b> SubContext<'a, 'b> {
})?;
}
for f in self.program.imports.iter() {
if let shared::ImportKind::Type(ty) = &f.kind {
if let decode::ImportKind::Type(ty) = &f.kind {
self.register_vendor_prefix(ty);
}
}
@ -1707,7 +1708,7 @@ impl<'a, 'b> SubContext<'a, 'b> {
Ok(())
}
fn generate_export(&mut self, export: &shared::Export) -> Result<(), Error> {
fn generate_export(&mut self, export: &decode::Export<'b>) -> Result<(), Error> {
if let Some(ref class) = export.class {
return self.generate_export_for_class(class, export);
}
@ -1734,8 +1735,8 @@ impl<'a, 'b> SubContext<'a, 'b> {
fn generate_export_for_class(
&mut self,
class_name: &str,
export: &shared::Export,
class_name: &'b str,
export: &decode::Export,
) -> Result<(), Error> {
let wasm_name = shared::struct_function_export_name(class_name, &export.function.name);
@ -1785,9 +1786,9 @@ impl<'a, 'b> SubContext<'a, 'b> {
Ok(())
}
fn generate_import(&mut self, import: &shared::Import) -> Result<(), Error> {
fn generate_import(&mut self, import: &decode::Import<'b>) -> Result<(), Error> {
match import.kind {
shared::ImportKind::Function(ref f) => {
decode::ImportKind::Function(ref f) => {
self.generate_import_function(import, f).with_context(|_| {
format!(
"failed to generate bindings for JS import `{}`",
@ -1795,29 +1796,29 @@ impl<'a, 'b> SubContext<'a, 'b> {
)
})?;
}
shared::ImportKind::Static(ref s) => {
decode::ImportKind::Static(ref s) => {
self.generate_import_static(import, s).with_context(|_| {
format!("failed to generate bindings for JS import `{}`", s.name)
})?;
}
shared::ImportKind::Type(ref ty) => {
decode::ImportKind::Type(ref ty) => {
self.generate_import_type(import, ty).with_context(|_| {
format!("failed to generate bindings for JS import `{}`", ty.name,)
})?;
}
shared::ImportKind::Enum(_) => {}
decode::ImportKind::Enum(_) => {}
}
Ok(())
}
fn generate_import_static(
&mut self,
info: &shared::Import,
import: &shared::ImportStatic,
info: &decode::Import<'b>,
import: &decode::ImportStatic<'b>,
) -> Result<(), Error> {
// The same static can be imported in multiple locations, so only
// generate bindings once for it.
if !self.cx.imported_statics.insert(import.shim.clone()) {
if !self.cx.imported_statics.insert(import.shim) {
return Ok(());
}
@ -1841,8 +1842,8 @@ impl<'a, 'b> SubContext<'a, 'b> {
fn generate_import_function(
&mut self,
info: &shared::Import,
import: &shared::ImportFunction,
info: &decode::Import<'b>,
import: &decode::ImportFunction<'b>,
) -> Result<(), Error> {
if !self.cx.wasm_import_needed(&import.shim) {
return Ok(());
@ -1850,7 +1851,7 @@ impl<'a, 'b> SubContext<'a, 'b> {
// It's possible for the same function to be imported in two locations,
// but we only want to generate one.
if !self.cx.imported_functions.insert(import.shim.clone()) {
if !self.cx.imported_functions.insert(import.shim) {
return Ok(());
}
@ -1872,8 +1873,8 @@ impl<'a, 'b> SubContext<'a, 'b> {
fn generated_import_target(
&mut self,
info: &shared::Import,
import: &shared::ImportFunction,
info: &decode::Import<'b>,
import: &decode::ImportFunction,
descriptor: &Descriptor,
) -> Result<String, Error> {
let method_data = match &import.method {
@ -1896,14 +1897,14 @@ impl<'a, 'b> SubContext<'a, 'b> {
let class = self.import_name(info, &method_data.class)?;
let op = match &method_data.kind {
shared::MethodKind::Constructor => return Ok(format!("new {}", class)),
shared::MethodKind::Operation(op) => op,
decode::MethodKind::Constructor => return Ok(format!("new {}", class)),
decode::MethodKind::Operation(op) => op,
};
let target = if import.structural {
let location = if op.is_static { &class } else { "this" };
match &op.kind {
shared::OperationKind::Regular => {
decode::OperationKind::Regular => {
let nargs = descriptor.unwrap_function().arguments.len();
let mut s = format!("function(");
for i in 0..nargs - 1 {
@ -1924,31 +1925,31 @@ impl<'a, 'b> SubContext<'a, 'b> {
s.push_str(");\n}");
s
}
shared::OperationKind::Getter(g) => format!(
decode::OperationKind::Getter(g) => format!(
"function() {{
return {}.{};
}}",
location, g
),
shared::OperationKind::Setter(s) => format!(
decode::OperationKind::Setter(s) => format!(
"function(y) {{
{}.{} = y;
}}",
location, s
),
shared::OperationKind::IndexingGetter => format!(
decode::OperationKind::IndexingGetter => format!(
"function(y) {{
return {}[y];
}}",
location
),
shared::OperationKind::IndexingSetter => format!(
decode::OperationKind::IndexingSetter => format!(
"function(y, z) {{
{}[y] = z;
}}",
location
),
shared::OperationKind::IndexingDeleter => format!(
decode::OperationKind::IndexingDeleter => format!(
"function(y) {{
delete {}[y];
}}",
@ -1960,30 +1961,30 @@ impl<'a, 'b> SubContext<'a, 'b> {
class,
if op.is_static { "" } else { ".prototype" });
let (mut target, name) = match &op.kind {
shared::OperationKind::Regular => {
decode::OperationKind::Regular => {
(format!("{}.{}", target, import.function.name), &import.function.name)
}
shared::OperationKind::Getter(g) => {
decode::OperationKind::Getter(g) => {
self.cx.expose_get_inherited_descriptor();
(format!(
"GetOwnOrInheritedPropertyDescriptor({}, '{}').get",
target, g,
), g)
}
shared::OperationKind::Setter(s) => {
decode::OperationKind::Setter(s) => {
self.cx.expose_get_inherited_descriptor();
(format!(
"GetOwnOrInheritedPropertyDescriptor({}, '{}').set",
target, s,
), s)
}
shared::OperationKind::IndexingGetter => {
decode::OperationKind::IndexingGetter => {
panic!("indexing getter should be structural")
}
shared::OperationKind::IndexingSetter => {
decode::OperationKind::IndexingSetter => {
panic!("indexing setter should be structural")
}
shared::OperationKind::IndexingDeleter => {
decode::OperationKind::IndexingDeleter => {
panic!("indexing deleter should be structural")
}
};
@ -2009,8 +2010,8 @@ impl<'a, 'b> SubContext<'a, 'b> {
fn generate_import_type(
&mut self,
info: &shared::Import,
import: &shared::ImportType,
info: &decode::Import<'b>,
import: &decode::ImportType,
) -> Result<(), Error> {
if !self.cx.wasm_import_needed(&import.instanceof_shim) {
return Ok(());
@ -2029,7 +2030,7 @@ impl<'a, 'b> SubContext<'a, 'b> {
Ok(())
}
fn generate_enum(&mut self, enum_: &shared::Enum) {
fn generate_enum(&mut self, enum_: &decode::Enum) {
let mut variants = String::new();
for variant in enum_.variants.iter() {
@ -2052,7 +2053,7 @@ impl<'a, 'b> SubContext<'a, 'b> {
self.cx.typescript.push_str("}\n");
}
fn generate_struct(&mut self, struct_: &shared::Struct) -> Result<(), Error> {
fn generate_struct(&mut self, struct_: &decode::Struct) -> Result<(), Error> {
let mut dst = String::new();
let mut ts_dst = String::new();
for field in struct_.fields.iter() {
@ -2098,7 +2099,7 @@ impl<'a, 'b> SubContext<'a, 'b> {
let class = self
.cx
.exported_classes
.entry(struct_.name.clone())
.entry(struct_.name.to_string())
.or_insert_with(Default::default);
class.comments = format_doc_comments(&struct_.comments, None);
class.contents.push_str(&dst);
@ -2110,18 +2111,18 @@ impl<'a, 'b> SubContext<'a, 'b> {
fn register_vendor_prefix(
&mut self,
info: &shared::ImportType,
info: &decode::ImportType<'b>,
) {
if info.vendor_prefixes.len() == 0 {
return
}
self.vendor_prefixes
.entry(info.name.to_string())
.entry(info.name)
.or_insert(Vec::new())
.extend(info.vendor_prefixes.iter().cloned());
}
fn import_name(&mut self, import: &shared::Import, item: &str) -> Result<String, Error> {
fn import_name(&mut self, import: &decode::Import<'b>, item: &str) -> Result<String, Error> {
// First up, imports don't work at all in `--no-modules` mode as we're
// not sure how to import them.
if self.cx.config.no_modules {
@ -2180,7 +2181,7 @@ impl<'a, 'b> SubContext<'a, 'b> {
let identifier = self
.cx
.imported_names
.entry(import.module.clone())
.entry(import.module)
.or_insert_with(Default::default)
.entry(name_to_import.to_string())
.or_insert_with(|| {
@ -2207,7 +2208,7 @@ impl<'a, 'b> SubContext<'a, 'b> {
switch(imports_post, &name, "", vendor_prefixes);
imports_post.push_str(";\n");
fn switch(dst: &mut String, name: &str, prefix: &str, left: &[String]) {
fn switch(dst: &mut String, name: &str, prefix: &str, left: &[&str]) {
if left.len() == 0 {
dst.push_str(prefix);
return dst.push_str(name);
@ -2254,7 +2255,7 @@ fn generate_identifier(name: &str, used_names: &mut HashMap<String, usize>) -> S
}
}
fn format_doc_comments(comments: &Vec<String>, js_doc_comments: Option<String>) -> String {
fn format_doc_comments(comments: &[&str], js_doc_comments: Option<String>) -> String {
let body: String = comments
.iter()
.map(|c| format!("*{}\n", c.trim_matches('"')))

View File

@ -1,7 +1,7 @@
#![doc(html_root_url = "https://docs.rs/wasm-bindgen-cli-support/0.2")]
extern crate parity_wasm;
extern crate serde_json;
#[macro_use]
extern crate wasm_bindgen_shared as shared;
extern crate wasm_bindgen_gc;
#[macro_use]
@ -13,10 +13,12 @@ use std::env;
use std::fs;
use std::mem;
use std::path::{Path, PathBuf};
use std::str;
use failure::{Error, ResultExt};
use parity_wasm::elements::*;
mod decode;
mod descriptor;
mod js;
pub mod wasm2es6js;
@ -137,7 +139,8 @@ impl Bindgen {
(module, stem)
}
};
let programs = extract_programs(&mut module)
let mut program_storage = Vec::new();
let programs = extract_programs(&mut module, &mut program_storage)
.with_context(|_| "failed to extract wasm-bindgen custom sections")?;
// Here we're actually instantiating the module we've parsed above for
@ -289,35 +292,50 @@ impl Bindgen {
}
}
fn extract_programs(module: &mut Module) -> Result<Vec<shared::Program>, Error> {
let version = shared::version();
let mut ret = Vec::new();
fn extract_programs<'a>(
module: &mut Module,
program_storage: &'a mut Vec<Vec<u8>>,
) -> Result<Vec<decode::Program<'a>>, Error> {
let my_version = shared::version();
let mut to_remove = Vec::new();
assert!(program_storage.is_empty());
for (i, s) in module.sections().iter().enumerate() {
let custom = match *s {
Section::Custom(ref s) => s,
for (i, s) in module.sections_mut().iter_mut().enumerate() {
let custom = match s {
Section::Custom(s) => s,
_ => continue,
};
if custom.name() != "__wasm_bindgen_unstable" {
continue;
}
to_remove.push(i);
program_storage.push(mem::replace(custom.payload_mut(), Vec::new()));
}
let mut payload = custom.payload();
while payload.len() > 0 {
let len = ((payload[0] as usize) << 0)
| ((payload[1] as usize) << 8)
| ((payload[2] as usize) << 16)
| ((payload[3] as usize) << 24);
let (a, b) = payload[4..].split_at(len as usize);
payload = b;
for i in to_remove.into_iter().rev() {
module.sections_mut().remove(i);
}
let p: shared::ProgramOnlySchema = match serde_json::from_slice(&a) {
Ok(f) => f,
Err(e) => bail!("failed to decode what looked like wasm-bindgen data: {}", e),
};
if p.schema_version != shared::SCHEMA_VERSION {
let mut ret = Vec::new();
for program in program_storage.iter() {
let mut payload = &program[..];
while let Some(data) = get_remaining(&mut payload) {
// Historical versions of wasm-bindgen have used JSON as the custom
// data section format. Newer versions, however, are using a custom
// serialization protocol that looks much more like the wasm spec.
//
// We, however, want a sanity check to ensure that if we're running
// against the wrong wasm-bindgen we get a nicer error than an
// internal decode error. To that end we continue to verify a tiny
// bit of json at the beginning of each blob before moving to the
// next blob. This should keep us compatible with older wasm-bindgen
// instances as well as forward-compatible for now.
//
// Note, though, that as `wasm-pack` picks up steam it's hoped we
// can just delete this entirely. The `wasm-pack` project already
// manages versions for us, so we in theory should need this check
// less and less over time.
if let Some(their_version) = verify_schema_matches(data)? {
bail!(
"
@ -341,24 +359,67 @@ or you can update the binary with
if this warning fails to go away though and you're not sure what to do feel free
to open an issue at https://github.com/rustwasm/wasm-bindgen/issues!
",
p.version,
version
their_version,
my_version,
);
}
let p: shared::Program = match serde_json::from_slice(&a) {
Ok(f) => f,
Err(e) => bail!("failed to decode what looked like wasm-bindgen data: {}", e),
};
ret.push(p);
let next = get_remaining(&mut payload).unwrap();
ret.push(<decode::Program as decode::Decode>::decode_all(next));
}
}
for i in to_remove.into_iter().rev() {
module.sections_mut().remove(i);
}
Ok(ret)
}
fn get_remaining<'a>(data: &mut &'a [u8]) -> Option<&'a [u8]> {
if data.len() == 0 {
return None
}
let len = ((data[0] as usize) << 0)
| ((data[1] as usize) << 8)
| ((data[2] as usize) << 16)
| ((data[3] as usize) << 24);
let (a, b) = data[4..].split_at(len);
*data = b;
Some(a)
}
fn verify_schema_matches<'a>(data: &'a [u8])
-> Result<Option<&'a str>, Error>
{
macro_rules! bad {
() => (bail!("failed to decode what looked like wasm-bindgen data"))
}
let data = match str::from_utf8(data) {
Ok(s) => s,
Err(_) => bad!(),
};
if !data.starts_with("{") || !data.ends_with("}") {
bad!()
}
let needle = "\"schema_version\":\"";
let rest = match data.find(needle) {
Some(i) => &data[i + needle.len()..],
None => bad!(),
};
let their_schema_version = match rest.find("\"") {
Some(i) => &rest[..i],
None => bad!(),
};
if their_schema_version == shared::SCHEMA_VERSION {
return Ok(None)
}
let needle = "\"version\":\"";
let rest = match data.find(needle) {
Some(i) => &data[i + needle.len()..],
None => bad!(),
};
let their_version = match rest.find("\"") {
Some(i) => &rest[..i],
None => bad!(),
};
Ok(Some(their_version))
}
fn reset_indentation(s: &str) -> String {
let mut indent: u32 = 0;
let mut dst = String::new();