From b9b240ef61924e818e6cdb6a6b05cd76d82981e5 Mon Sep 17 00:00:00 2001 From: Matthew Pope Date: Wed, 21 Feb 2024 15:48:37 -0800 Subject: [PATCH] Hard coded system symbol table --- .../ion/impl/Ion_1_0_SystemSymbolTable.java | 200 ++++++++++++++++++ .../amazon/ion/impl/SharedSymbolTable.java | 32 +-- .../ion/impl/bin/AbstractSymbolTable.java | 2 +- .../java/com/amazon/ion/impl/bin/Symbols.java | 158 +------------- 4 files changed, 210 insertions(+), 182 deletions(-) create mode 100644 src/main/java/com/amazon/ion/impl/Ion_1_0_SystemSymbolTable.java diff --git a/src/main/java/com/amazon/ion/impl/Ion_1_0_SystemSymbolTable.java b/src/main/java/com/amazon/ion/impl/Ion_1_0_SystemSymbolTable.java new file mode 100644 index 0000000000..bab63dd688 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/Ion_1_0_SystemSymbolTable.java @@ -0,0 +1,200 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.amazon.ion.impl; + +import com.amazon.ion.IonException; +import com.amazon.ion.SymbolTable; +import com.amazon.ion.SymbolToken; +import com.amazon.ion.impl.bin.AbstractSymbolTable; + +import java.util.Iterator; +import java.util.NoSuchElementException; + +import static com.amazon.ion.SystemSymbols.*; +import static com.amazon.ion.impl._Private_Utils.newSymbolToken; + +public final class Ion_1_0_SystemSymbolTable extends AbstractSymbolTable { + + private Ion_1_0_SystemSymbolTable() { + super(ION, 1); + } + + public static final Ion_1_0_SystemSymbolTable INSTANCE = new Ion_1_0_SystemSymbolTable(); + + // All Ion 1.0 System Symbol Tokens + static final SymbolTokenImpl ION_TOKEN = newSymbolToken(ION, ION_SID); + static final SymbolTokenImpl ION_1_0_TOKEN = newSymbolToken(ION_1_0, ION_1_0_SID); + static final SymbolTokenImpl ION_SYMBOL_TABLE_TOKEN = newSymbolToken(ION_SYMBOL_TABLE, ION_SYMBOL_TABLE_SID); + static final SymbolTokenImpl NAME_TOKEN = newSymbolToken(NAME, NAME_SID); + static final SymbolTokenImpl VERSION_TOKEN = newSymbolToken(VERSION, VERSION_SID); + static final SymbolTokenImpl IMPORTS_TOKEN = newSymbolToken(IMPORTS, IMPORTS_SID); + static final SymbolTokenImpl SYMBOLS_TOKEN = newSymbolToken(SYMBOLS, SYMBOLS_SID); + static final SymbolTokenImpl MAX_ID_TOKEN = newSymbolToken(MAX_ID, MAX_ID_SID); + static final SymbolTokenImpl ION_SHARED_SYMBOL_TABLE_TOKEN = newSymbolToken(ION_SHARED_SYMBOL_TABLE, ION_SHARED_SYMBOL_TABLE_SID); + + // Hashes of all Ion 1.0 System Symbol Text + private static final int ION_HASHCODE = ION.hashCode(); + private static final int ION_1_0_HASHCODE = ION_1_0.hashCode(); + private static final int ION_SYMBOL_TABLE_HASHCODE = ION_SYMBOL_TABLE.hashCode(); + private static final int NAME_HASHCODE = NAME.hashCode(); + private static final int VERSION_HASHCODE = VERSION.hashCode(); + private static final int IMPORTS_HASHCODE = IMPORTS.hashCode(); + private static final int SYMBOLS_HASHCODE = SYMBOLS.hashCode(); + private static final int MAX_ID_HASHCODE = MAX_ID.hashCode(); + private static final int ION_SHARED_SYMBOL_TABLE_HASHCODE = ION_SHARED_SYMBOL_TABLE.hashCode(); + + // Trivial method implementations + public SymbolTable[] getImportedTables() { + return null; + } + + public int getImportedMaxId() { + return 0; + } + + public boolean isSystemTable() { + return true; + } + + public boolean isSubstitute() { + return false; + } + + public boolean isSharedTable() { + return true; + } + + public boolean isReadOnly() { + return true; + } + + public boolean isLocalTable() { + return false; + } + + public SymbolTable getSystemSymbolTable() { + return this; + } + + public int getMaxId() { + return ION_1_0_MAX_ID; + } + + // Interesting method implementations + + public SymbolToken intern(final String text) { + SymbolToken symbol = find(text); + if (symbol == null) { + throw new IonException("Cannot intern new symbol into system symbol table"); + } + return symbol; + } + + public String findKnownSymbol(final int id) { + return staticFindKnownSymbol(id); + } + + public static String staticFindKnownSymbol(final int id) { + // This compiles into a jump table, which seems to be marginally faster than an array lookup based on + // some informal performance testing most likely due to the fact that the array needs to be loaded from the heap. + switch (id) { + // TODO: It is unclear whether an exception should be thrown here. Existing implementation is inconsistent. + // case 0: throw new IllegalArgumentException("SID cannot be less than 1: " + id); + case ION_SID: return ION; + case ION_1_0_SID: return ION_1_0; + case ION_SYMBOL_TABLE_SID: return ION_SYMBOL_TABLE; + case NAME_SID: return NAME; + case VERSION_SID: return VERSION; + case IMPORTS_SID: return IMPORTS; + case SYMBOLS_SID: return SYMBOLS; + case MAX_ID_SID: return MAX_ID; + case ION_SHARED_SYMBOL_TABLE_SID: return ION_SHARED_SYMBOL_TABLE; + default: return null; + } + } + + public static SymbolToken staticFindKnownSymbolToken(final int id) { + // This compiles into a jump table, which seems to be marginally faster than an array lookup based on + // some informal performance testing most likely due to the fact that the array needs to be loaded from the heap. + switch (id) { + case ION_SID: return ION_TOKEN; + case ION_1_0_SID: return ION_1_0_TOKEN; + case ION_SYMBOL_TABLE_SID: return ION_SYMBOL_TABLE_TOKEN; + case NAME_SID: return NAME_TOKEN; + case VERSION_SID: return VERSION_TOKEN; + case IMPORTS_SID: return IMPORTS_TOKEN; + case SYMBOLS_SID: return SYMBOLS_TOKEN; + case MAX_ID_SID: return MAX_ID_TOKEN; + case ION_SHARED_SYMBOL_TABLE_SID: return ION_SHARED_SYMBOL_TABLE_TOKEN; + default: return null; + } + } + + public SymbolToken find(String text) { + // Check all symbol hashes without branching! + int hash = text.hashCode(); + long result = (long) (hash - ION_HASHCODE) * + (hash - ION_1_0_HASHCODE) * + (hash - ION_SYMBOL_TABLE_HASHCODE) * + (hash - NAME_HASHCODE) * + (hash - VERSION_HASHCODE) * + (hash - IMPORTS_HASHCODE) * + (hash - SYMBOLS_HASHCODE) * + (hash - MAX_ID_HASHCODE) * + (hash - ION_SHARED_SYMBOL_TABLE_HASHCODE); + // If no hash collisions, then it's not in this symbol table + if (result != 0) return null; + + // If there was any hash collision, we'll fall back to checking equality. In JDK 8, this compiles + // to a lookup table based on the string's hashcode, and then checks for equality, so (unlike repeated + // if/else) it doesn't result in a full string comparison for every one of the declared symbols. + switch (text) { + case ION: + return ION_TOKEN; + case ION_1_0: + return ION_1_0_TOKEN; + case ION_SYMBOL_TABLE: + return ION_SYMBOL_TABLE_TOKEN; + case NAME: + return NAME_TOKEN; + case VERSION: + return VERSION_TOKEN; + case IMPORTS: + return IMPORTS_TOKEN; + case SYMBOLS: + return SYMBOLS_TOKEN; + case MAX_ID: + return MAX_ID_TOKEN; + case ION_SHARED_SYMBOL_TABLE: + return ION_SHARED_SYMBOL_TABLE_TOKEN; + default: + return null; + } + } + + public Iterator iterateDeclaredSymbolNames() { + return new Ion_1_0_SystemSymbolIterator(); + } + + /** + * Rather than fetch an iterator for a list, we can define an iterator that is hard coded to iterate only the + * declared symbols for Ion 1.0. This less indirection than iterating a List of SymbolTokens. + */ + private static class Ion_1_0_SystemSymbolIterator implements Iterator { + private int i = 0; + + public boolean hasNext() { + return i < ION_1_0_MAX_ID; + } + + public String next() { + if (i == ION_1_0_MAX_ID) throw new NoSuchElementException(); + return staticFindKnownSymbol(++i); + } + + public void remove() { + throw new UnsupportedOperationException(); + } + } +} diff --git a/src/main/java/com/amazon/ion/impl/SharedSymbolTable.java b/src/main/java/com/amazon/ion/impl/SharedSymbolTable.java index 02051143a5..1b5dd0f89e 100644 --- a/src/main/java/com/amazon/ion/impl/SharedSymbolTable.java +++ b/src/main/java/com/amazon/ion/impl/SharedSymbolTable.java @@ -50,40 +50,10 @@ final class SharedSymbolTable implements SymbolTable { - /** - * The array of system symbols as defined by Ion 1.0. - */ - private static final String[] SYSTEM_SYMBOLS = - { - SystemSymbols.ION, - SystemSymbols.ION_1_0, - SystemSymbols.ION_SYMBOL_TABLE, - SystemSymbols.NAME, - SystemSymbols.VERSION, - SystemSymbols.IMPORTS, - SystemSymbols.SYMBOLS, - SystemSymbols.MAX_ID, - SystemSymbols.ION_SHARED_SYMBOL_TABLE - }; - /** * The singleton instance of Ion 1.0 system symbol table. - *

- * TODO amazon-ion/ion-java/issues/34 Optimize system symtabs by using our custom backing impl. */ - private static final SymbolTable ION_1_0_SYSTEM_SYMTAB; - static - { - Map systemSymbolsMap = new HashMap(); - - for (int i = 0; i < SYSTEM_SYMBOLS.length; i++) - { - systemSymbolsMap.put(SYSTEM_SYMBOLS[i], i+1); - } - - ION_1_0_SYSTEM_SYMTAB = - new SharedSymbolTable(ION, 1, SYSTEM_SYMBOLS, systemSymbolsMap); - } + private static final SymbolTable ION_1_0_SYSTEM_SYMTAB = Ion_1_0_SystemSymbolTable.INSTANCE; /** * The name of this shared symbol table. If this is a system symbol diff --git a/src/main/java/com/amazon/ion/impl/bin/AbstractSymbolTable.java b/src/main/java/com/amazon/ion/impl/bin/AbstractSymbolTable.java index a014440c57..add0e6fd6e 100644 --- a/src/main/java/com/amazon/ion/impl/bin/AbstractSymbolTable.java +++ b/src/main/java/com/amazon/ion/impl/bin/AbstractSymbolTable.java @@ -35,7 +35,7 @@ /** * Provides the basic implementation bits for {@link SymbolTable}. */ -/*package*/ abstract class AbstractSymbolTable implements SymbolTable +public abstract class AbstractSymbolTable implements SymbolTable { private final String name; private final int version; diff --git a/src/main/java/com/amazon/ion/impl/bin/Symbols.java b/src/main/java/com/amazon/ion/impl/bin/Symbols.java index 8821a0d387..f9ea695ce0 100644 --- a/src/main/java/com/amazon/ion/impl/bin/Symbols.java +++ b/src/main/java/com/amazon/ion/impl/bin/Symbols.java @@ -15,40 +15,20 @@ package com.amazon.ion.impl.bin; -import static com.amazon.ion.SystemSymbols.IMPORTS; -import static com.amazon.ion.SystemSymbols.IMPORTS_SID; -import static com.amazon.ion.SystemSymbols.ION; -import static com.amazon.ion.SystemSymbols.ION_1_0; import static com.amazon.ion.SystemSymbols.ION_1_0_MAX_ID; -import static com.amazon.ion.SystemSymbols.ION_1_0_SID; -import static com.amazon.ion.SystemSymbols.ION_SHARED_SYMBOL_TABLE; -import static com.amazon.ion.SystemSymbols.ION_SHARED_SYMBOL_TABLE_SID; -import static com.amazon.ion.SystemSymbols.ION_SID; -import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE; -import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE_SID; -import static com.amazon.ion.SystemSymbols.MAX_ID; -import static com.amazon.ion.SystemSymbols.MAX_ID_SID; -import static com.amazon.ion.SystemSymbols.NAME; -import static com.amazon.ion.SystemSymbols.NAME_SID; -import static com.amazon.ion.SystemSymbols.SYMBOLS; -import static com.amazon.ion.SystemSymbols.SYMBOLS_SID; -import static com.amazon.ion.SystemSymbols.VERSION; -import static com.amazon.ion.SystemSymbols.VERSION_SID; -import static java.util.Arrays.asList; import static java.util.Collections.unmodifiableList; -import static java.util.Collections.unmodifiableMap; -import com.amazon.ion.IonException; import com.amazon.ion.SymbolTable; import com.amazon.ion.SymbolToken; +import com.amazon.ion.impl.Ion_1_0_SystemSymbolTable; import com.amazon.ion.impl._Private_Utils; import java.util.Collection; -import java.util.HashMap; import java.util.Iterator; import java.util.List; -import java.util.Map; import java.util.NoSuchElementException; +import java.util.stream.Collectors; +import java.util.stream.IntStream; /** * Utilities for dealing with {@link SymbolToken} and {@link SymbolTable}. @@ -65,40 +45,10 @@ public static SymbolToken symbol(final String name, final int val) return _Private_Utils.newSymbolToken(name, val); } - /** Lazy iterator over the symbol names of an iterator of symbol tokens. */ - public static Iterator symbolNameIterator(final Iterator tokenIter) - { - return new Iterator() - { - public boolean hasNext() - { - return tokenIter.hasNext(); - } - - public String next() - { - return tokenIter.next().getText(); - } - - public void remove() - { - throw new UnsupportedOperationException(); - } - }; - } - private static final List SYSTEM_TOKENS = unmodifiableList( - asList( - symbol(ION, ION_SID) - , symbol(ION_1_0, ION_1_0_SID) - , symbol(ION_SYMBOL_TABLE, ION_SYMBOL_TABLE_SID) - , symbol(NAME, NAME_SID) - , symbol(VERSION, VERSION_SID) - , symbol(IMPORTS, IMPORTS_SID) - , symbol(SYMBOLS, SYMBOLS_SID) - , symbol(MAX_ID, MAX_ID_SID) - , symbol(ION_SHARED_SYMBOL_TABLE, ION_SHARED_SYMBOL_TABLE_SID) - ) + IntStream.rangeClosed(1, ION_1_0_MAX_ID) + .mapToObj(Ion_1_0_SystemSymbolTable::staticFindKnownSymbolToken) + .collect(Collectors.toList()) ); /** Returns a symbol token for a system SID. */ @@ -107,105 +57,13 @@ public static SymbolToken systemSymbol(final int sid) { { throw new IllegalArgumentException("No such system SID: " + sid); } - return SYSTEM_TOKENS.get(sid - 1); - } - - private static final Map SYSTEM_TOKEN_MAP; - static { - final Map symbols = new HashMap(); - for (final SymbolToken token : SYSTEM_TOKENS) - { - symbols.put(token.getText(), token); - } - SYSTEM_TOKEN_MAP = unmodifiableMap(symbols); + return Ion_1_0_SystemSymbolTable.staticFindKnownSymbolToken(sid); } - private static SymbolTable SYSTEM_SYMBOL_TABLE = new AbstractSymbolTable(ION, 1) - { - public SymbolTable[] getImportedTables() - { - return null; - } - - public int getImportedMaxId() - { - return 0; - } - - public boolean isSystemTable() - { - return true; - } - - public boolean isSubstitute() - { - return false; - } - - public boolean isSharedTable() - { - return true; - } - - public boolean isReadOnly() - { - return true; - } - - public boolean isLocalTable() - { - return false; - } - - public SymbolToken intern(final String text) - { - final SymbolToken token = SYSTEM_TOKEN_MAP.get(text); - if (token == null) - { - throw new IonException("Cannot intern new symbol into system symbol table"); - } - return token; - } - - public String findKnownSymbol(int id) - { - if (id < 1) - { - throw new IllegalArgumentException("SID cannot be less than 1: " + id); - } - if (id > ION_1_0_MAX_ID) - { - return null; - } - - return SYSTEM_TOKENS.get(id - 1).getText(); - } - - public SymbolToken find(String text) - { - return SYSTEM_TOKEN_MAP.get(text); - } - - public SymbolTable getSystemSymbolTable() - { - return this; - } - - public int getMaxId() - { - return ION_1_0_MAX_ID; - } - - public Iterator iterateDeclaredSymbolNames() - { - return symbolNameIterator(SYSTEM_TOKENS.iterator()); - } - }; - /** Returns a representation of the system symbol table. */ public static SymbolTable systemSymbolTable() { - return SYSTEM_SYMBOL_TABLE; + return Ion_1_0_SystemSymbolTable.INSTANCE; } /** Returns the system symbols as a collection. */