TMI-TIFF: Faster LZW encoder.

This commit is contained in:
Harald Kuhr 2015-04-30 10:21:35 +02:00
parent 051a1dcb5b
commit 1a8948ece9

View File

@ -33,21 +33,22 @@ import com.twelvemonkeys.io.enc.Encoder;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.Map; import java.util.Arrays;
import java.util.TreeMap;
import static com.twelvemonkeys.imageio.plugins.tiff.LZWDecoder.LZWString;
/** /**
* LZWEncoder * LZWEncoder
* <p/>
* Inspired by LZWTreeEncoder by <a href="mailto:yuwen_66@yahoo.com">Wen Yu</a> and the
* <a href="http://gingko.homeip.net/docs/file_formats/lzwgif.html#bob">algorithm described by Bob Montgomery</a>
* which
* "[...] uses a tree method to search if a new string is already in the table,
* which is much simpler, faster, and easier to understand than hashing."
* *
* @author <a href="mailto:harald.kuhr@gmail.com">Harald Kuhr</a> * @author <a href="mailto:harald.kuhr@gmail.com">Harald Kuhr</a>
* @author last modified by $Author: haraldk$ * @author last modified by $Author: haraldk$
* @version $Id: LZWEncoder.java,v 1.0 02.12.13 14:13 haraldk Exp$ * @version $Id: LZWEncoder.java,v 1.0 02.12.13 14:13 haraldk Exp$
*/ */
final class LZWEncoder implements Encoder { final class LZWEncoder implements Encoder {
// TODO: Consider extracting LZWStringTable from LZWDecoder
/** Clear: Re-initialize tables. */ /** Clear: Re-initialize tables. */
static final int CLEAR_CODE = 256; static final int CLEAR_CODE = 256;
/** End of Information. */ /** End of Information. */
@ -58,16 +59,18 @@ final class LZWEncoder implements Encoder {
private static final int TABLE_SIZE = 1 << MAX_BITS; private static final int TABLE_SIZE = 1 << MAX_BITS;
private final LZWString[] table = new LZWString[TABLE_SIZE]; // A child is made up of a parent (or prefix) code plus a suffix byte
private final Map<LZWString, Integer> reverseTable = new TreeMap<>(); // This is foobar // and siblings are strings with a common parent(or prefix) and different
// private final Map<LZWString, Integer> reverseTable = new HashMap<>(TABLE_SIZE); // This is foobar // suffix bytes
private int tableLength; private final short[] CHILDREN = new short[TABLE_SIZE];
LZWString omega = LZWString.EMPTY; private final short[] SIBLINGS = new short[TABLE_SIZE];
private final short[] SUFFIXES = new short[TABLE_SIZE];
int bitsPerCode; // Initial setup
private int oldCode = CLEAR_CODE; private int parent = -1;
private int maxCode; private int bitsPerCode = MIN_BITS;
int bitMask; private int nextValidCode = EOI_CODE + 1;
private int maxCode = maxValue(bitsPerCode);
// Buffer for partial codes // Buffer for partial codes
private int bits = 0; private int bits = 0;
@ -76,120 +79,116 @@ final class LZWEncoder implements Encoder {
// Keep track of how many bytes we will write, to make sure we write EOI at correct position // Keep track of how many bytes we will write, to make sure we write EOI at correct position
private long remaining; private long remaining;
protected LZWEncoder(final int length) { LZWEncoder(final long length) {
this.remaining = length; remaining = length;
// First 258 entries of table is always fixed
for (int i = 0; i < 256; i++) {
table[i] = new LZWString((byte) i);
}
init();
}
private void init() {
tableLength = 258;
bitsPerCode = MIN_BITS;
bitMask = bitmaskFor(bitsPerCode);
maxCode = maxCode();
reverseTable.clear();
} }
@Override
public void encode(final OutputStream stream, final ByteBuffer buffer) throws IOException { public void encode(final OutputStream stream, final ByteBuffer buffer) throws IOException {
// InitializeStringTable(); encodeBytes(stream, buffer);
// WriteCode(ClearCode);
// Ω = the empty string;
// for each character in the strip {
// K = GetNextCharacter();
// if Ω+K is in the string table {
// Ω = Ω+K;/* string concatenation */
// }
// else{
// WriteCode (CodeFromString( Ω));
// AddTableEntry(Ω+K);
// Ω=K;
// } }/*end of for loop*/
// WriteCode (CodeFromString(Ω));
// WriteCode (EndOfInformation);
if (remaining < 0) {
throw new IOException("Write past end of stream");
}
// TODO: Write 9 bit clear code ONLY first time!
if (oldCode == CLEAR_CODE) {
writeCode(stream, CLEAR_CODE);
}
int len = buffer.remaining();
while (buffer.hasRemaining()) {
byte k = buffer.get();
LZWString string = omega.concatenate(k);
int tableIndex = isInTable(string);
if (tableIndex >= 0) {
omega = string;
oldCode = tableIndex;
}
else {
writeCode(stream, oldCode);
addStringToTable(string);
oldCode = k & 0xff;
omega = table[k & 0xff];
// Handle table (almost) full
if (tableLength >= TABLE_SIZE - 2) {
writeCode(stream, CLEAR_CODE);
init();
}
}
}
remaining -= len;
// Write EOI when er are done (the API isn't very supportive of this)
if (remaining <= 0) { if (remaining <= 0) {
writeCode(stream, oldCode); // Write EOI when er are done (the API isn't very supportive of this at the moment)
writeCode(stream, parent);
writeCode(stream, EOI_CODE); writeCode(stream, EOI_CODE);
// Flush partial codes by writing 0 pad
if (bitPos > 0) { if (bitPos > 0) {
writeCode(stream, 0); writeCode(stream, 0);
} }
} }
} }
private int isInTable(final LZWString string) { void encodeBytes(final OutputStream stream, final ByteBuffer buffer) throws IOException {
if (string.length == 1) { int length = buffer.remaining();
return string.value & 0xff;
if (length == 0) {
return;
} }
Integer index = reverseTable.get(string); if (parent == -1) {
return index != null ? index : -1; // Init stream
writeCode(stream, CLEAR_CODE);
parent = buffer.get() & 0xff;
}
while (buffer.hasRemaining()) {
int value = buffer.get() & 0xff;
int child = CHILDREN[parent];
if (child > 0) {
if (SUFFIXES[child] == value) {
parent = child;
}
else {
int sibling = child;
while (true) {
if (SIBLINGS[sibling] > 0) {
sibling = SIBLINGS[sibling];
if (SUFFIXES[sibling] == value) {
parent = sibling;
break;
}
}
else {
SIBLINGS[sibling] = (short) nextValidCode;
SUFFIXES[nextValidCode] = (short) value;
writeCode(stream, parent);
parent = value;
nextValidCode++;
increaseCodeSizeOrResetIfNeeded(stream);
break;
}
}
}
}
else {
CHILDREN[parent] = (short) nextValidCode;
SUFFIXES[nextValidCode] = (short) value;
writeCode(stream, parent);
parent = value;
nextValidCode++;
increaseCodeSizeOrResetIfNeeded(stream);
}
}
remaining -= length;
} }
private int addStringToTable(final LZWString string) { private void increaseCodeSizeOrResetIfNeeded(final OutputStream stream) throws IOException {
final int index = tableLength++; if (nextValidCode > maxCode) {
table[index] = string; if (bitsPerCode == MAX_BITS) {
reverseTable.put(string, index); // Reset stream by writing Clear code
writeCode(stream, CLEAR_CODE);
if (tableLength > maxCode) { // Reset tables
bitsPerCode++; resetTables();
}
if (bitsPerCode > MAX_BITS) { else {
throw new IllegalStateException(String.format("TIFF LZW with more than %d bits per code encountered (table overflow)", MAX_BITS)); // Increase code size
bitsPerCode++;
maxCode = maxValue(bitsPerCode);
} }
bitMask = bitmaskFor(bitsPerCode);
maxCode = maxCode();
} }
}
return index; private void resetTables() {
Arrays.fill(CHILDREN, (short) 0);
Arrays.fill(SIBLINGS, (short) 0);
bitsPerCode = MIN_BITS;
maxCode = maxValue(bitsPerCode);
nextValidCode = EOI_CODE + 1;
} }
private void writeCode(final OutputStream stream, final int code) throws IOException { private void writeCode(final OutputStream stream, final int code) throws IOException {
// System.err.printf("LZWEncoder.writeCode: 0x%04x\n", code); // System.err.printf("LZWEncoder.writeCode: 0x%04x\n", code);
bits = (bits << bitsPerCode) | (code & bitMask); bits = (bits << bitsPerCode) | (code & maxCode);
bitPos += bitsPerCode; bitPos += bitsPerCode;
while (bitPos >= 8) { while (bitPos >= 8) {
@ -202,11 +201,11 @@ final class LZWEncoder implements Encoder {
bits &= bitmaskFor(bitPos); bits &= bitmaskFor(bitPos);
} }
private static int bitmaskFor(final int bits) { private static int maxValue(final int codeLen) {
return (1 << bits) - 1; return (1 << codeLen) - 1;
} }
protected int maxCode() { private static int bitmaskFor(final int bits) {
return bitMask; return maxValue(bits);
} }
} }