From 9cb21dbfc9c321a985e6839f237c68ea88c5bdeb Mon Sep 17 00:00:00 2001 From: Harald Kuhr Date: Thu, 1 Mar 2012 15:22:07 +0100 Subject: [PATCH] Added UUID factory for creating various Version 1 and Version 5 UUIDs. --- .../com/twelvemonkeys/util/UUIDFactory.java | 350 ++++++++++++++++++ .../twelvemonkeys/util/UUIDFactoryTest.java | 258 +++++++++++++ 2 files changed, 608 insertions(+) create mode 100644 sandbox/sandbox-common/src/main/java/com/twelvemonkeys/util/UUIDFactory.java create mode 100644 sandbox/sandbox-common/src/test/java/com/twelvemonkeys/util/UUIDFactoryTest.java diff --git a/sandbox/sandbox-common/src/main/java/com/twelvemonkeys/util/UUIDFactory.java b/sandbox/sandbox-common/src/main/java/com/twelvemonkeys/util/UUIDFactory.java new file mode 100644 index 00000000..ef8fb9ca --- /dev/null +++ b/sandbox/sandbox-common/src/main/java/com/twelvemonkeys/util/UUIDFactory.java @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2012, Harald Kuhr + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name "TwelveMonkeys" nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package com.twelvemonkeys.util; + +import com.twelvemonkeys.lang.StringUtil; + +import java.net.NetworkInterface; +import java.net.SocketException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.security.SecureRandom; +import java.util.ArrayList; +import java.util.Enumeration; +import java.util.List; +import java.util.UUID; + +/** + * A factory for creating UUIDs not directly supported by {@link java.util.UUID}. + * This class can create + * version 1 (time based, using either MAC aka IEEE 802 address or Random "node" value) + * and version 5 (SHA1 hash based) UUIDs. + * + * @author Harald Kuhr + * @author last modified by $Author: haraldk$ + * @version $Id: UUIDFactory.java,v 1.0 27.02.12 09:45 haraldk Exp$ + * + * @see RFC 4122 + * @see Wikipedia + * @see java.util.UUID + */ +public class UUIDFactory { + private static final String NODE_PROPERTY = "com.twelvemonkeys.util.UUID.node"; + + /** + * Nil UUID: {@code "00000000-0000-0000-0000-000000000000"}. + * + * The nil UUID is special form of UUID that is specified to have all + * 128 bits set to zero. + * + * @see RFC 4122 + */ + public static final UUID NIL = new UUID(0l, 0l); + + private static final SecureRandom SECURE_RANDOM = new SecureRandom(); + + // Assumes MAC address is constant, which it may not be on clients moving from ethernet to wifi etc... + // TODO: Update at some interval + static final long MAC_ADDRESS_NODE = getMacAddressNode(); + + static final long SECURE_RANDOM_NODE = getSecureRandomNode(); + + private static long getSecureRandomNode() { + /* + Obtain a 47-bit cryptographic quality random + number and use it as the low 47 bits of the node ID, with the least + significant bit of the first octet of the node ID set to one. This + bit is the unicast/multicast bit, which will never be set in IEEE 802 + addresses obtained from network cards. Hence, there can never be a + conflict between UUIDs generated by machines with and without network + cards. (Recall that the IEEE 802 spec talks about transmission + order) + */ + + /* + In addition, items such as the computer's name and the name of the + operating system, while not strictly speaking random, will help + differentiate the results from those obtained by other systems. + + The exact algorithm to generate a node ID using these data is system + specific, because both the data available and the functions to obtain + them are often very system specific. A generic approach, however, is + to accumulate as many sources as possible into a buffer, use a + message digest such as MD5 [4] or SHA-1 [8], take an arbitrary 6 + bytes from the hash value, and set the multicast bit as described + above. + */ + + // TODO: Verify that nextLong is still cryptographically strong after the bit masking + // TODO: Consider using the hashing approach above + + return 1l << 40 | SECURE_RANDOM.nextLong() & 0xffffffffffffl; + } + + private static long getMacAddressNode() { + long[] addressNodes; + + String nodeProperty = System.getProperty(NODE_PROPERTY); + + // Read mac address/node from system property, to allow user-specified node addresses. + if (!StringUtil.isEmpty(nodeProperty)) { + addressNodes = parseMacAddressNodes(nodeProperty); + } + else { + addressNodes = MacAddressFinder.getMacAddressNodes(); + } + + // TODO: The UUID spec allows us to use multiple nodes, when available, to create more UUIDs per time unit... + // For example in a round robin fashion? + return addressNodes != null && addressNodes.length > 0 ? addressNodes[0] : -1; + } + + static long[] parseMacAddressNodes(final String nodeProperty) { + // Parse comma-separated list mac addresses on format 00:11:22:33:44:55 / 00-11-22-33-44-55 + String[] nodesStrings = nodeProperty.trim().split(",\\W*"); + long[] addressNodes = new long[nodesStrings.length]; + + for (int i = 0, nodesStringsLength = nodesStrings.length; i < nodesStringsLength; i++) { + String nodesString = nodesStrings[i]; + + try { + String[] nodes = nodesString.split("(?<=(^|\\W)[0-9a-fA-F]{2})\\W(?=[0-9a-fA-F]{2}($|\\W))", 6); + + long nodeAddress = 0; + + // Network byte order + nodeAddress |= (long) (Integer.parseInt(nodes[0], 16) & 0xff) << 40; + nodeAddress |= (long) (Integer.parseInt(nodes[1], 16) & 0xff) << 32; + nodeAddress |= (long) (Integer.parseInt(nodes[2], 16) & 0xff) << 24; + nodeAddress |= (long) (Integer.parseInt(nodes[3], 16) & 0xff) << 16; + nodeAddress |= (long) (Integer.parseInt(nodes[4], 16) & 0xff) << 8; + nodeAddress |= (long) (Integer.parseInt(nodes[5], 16) & 0xff); + + addressNodes[i] = nodeAddress; + } + catch (RuntimeException e) { + // May be NumberformatException from parseInt or ArrayIndexOutOfBounds from nodes array + NumberFormatException formatException = new NumberFormatException(String.format("Bad IEEE 802 node address: '%s' (from system property %s)", nodesString, NODE_PROPERTY)); + formatException.initCause(e); + throw formatException; + } + } + + return addressNodes; + } + + // See also http://tools.ietf.org/html/rfc4122#appendix-B + // See http://tools.ietf.org/html/rfc4122: 4.3. Algorithm for Creating a Name-Based UUID + // TODO: Naming (of the method) + // TODO: Read up on creating these UUIDs in RFC, mentions something about UUID for namespace as input..? + static UUID nameUUIDFromBytesSHA1(byte[] name) { + // Based on code from OpenJDK UUID#nameUUIDFromBytes + private byte[] constructor + MessageDigest md; + + try { + md = MessageDigest.getInstance("SHA1"); + } + catch (NoSuchAlgorithmException nsae) { + throw new InternalError("SHA1 not supported"); + } + + byte[] sha1Bytes = md.digest(name); + sha1Bytes[6] &= 0x0f; /* clear version */ + sha1Bytes[6] |= 0x50; /* set to version 5 */ + sha1Bytes[8] &= 0x3f; /* clear variant */ + sha1Bytes[8] |= 0x80; /* set to IETF variant */ + + long msb = 0; + long lsb = 0; + + // NOTE: According to RFC 4122, only first 16 bytes are used, meaning + // bytes 17-20 in the 160 bit SHA1 hash are simply discarded in this case... + for (int i=0; i<8; i++) { + msb = (msb << 8) | (sha1Bytes[i] & 0xff); + } + for (int i=8; i<16; i++) { + lsb = (lsb << 8) | (sha1Bytes[i] & 0xff); + } + + return new UUID(msb, lsb); + } + + // Creatse version 1 node based UUIDs as specified in rfc422 + // See http://tools.ietf.org/html/rfc4122#appendix-B + // See http://en.wikipedia.org/wiki/MAC_address + // TODO: Naming (of the method) + static UUID timeNodeBasedV1() { + if (MAC_ADDRESS_NODE == -1) { + // TODO: OR fall back to Random?? + throw new IllegalStateException("Could not determine IEEE 802 (mac) address for node"); + } + + return new UUID(createTimeAndVersion(), createClockSeqAndNode(MAC_ADDRESS_NODE)); + } + + // Creates version 1 "node" based UUIDs, using 47 bit secure random number + lsb of first octet + // (unicast/multicast bit) set to 1 as described in rfc422: 4.5. Node IDs that Do Not Identify the Host + // See http://tools.ietf.org/html/rfc4122#appendix-B + // TODO: Naming (of the method) + // TODO: Document that these can never clash with node based v1 UUIDs due to unicast/multicast bit + static UUID timeRandomBasedV1() { + return new UUID(createTimeAndVersion(), createClockSeqAndNode(SECURE_RANDOM_NODE)); + } + + // TODO: Version 2 UUID? + /* + Version 2 UUIDs are similar to Version 1 UUIDs, with the upper byte of the clock sequence replaced by the + identifier for a "local domain" (typically either the "POSIX UID domain" or the "POSIX GID domain") + and the first 4 bytes of the timestamp replaced by the user's POSIX UID or GID (with the "local domain" + identifier indicating which it is).[2][3] + */ + + private static long createClockSeqAndNode(final long node) { + // Variant (2) + Clock seq high and low + node + return 0x8000000000000000l | (Clock.getClockSequence() << 48) | node & 0xffffffffffffl; + } + + private static long createTimeAndVersion() { + long clockTime = Clock.currentTimeHundredNanos(); + + long time = clockTime << 32; // Time low + time |= (clockTime & 0xFFFF00000000L) >> 16; // Time mid + time |= ((clockTime >> 48) & 0x0FFF); // Time high + time |= 0x1000; // Version (1) + + return time; + } + + // TODO: Implement compare as spec'ed, see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7025832 + // - Probably create a comparator + + /** + * A high-resolution timer for use in creating version 1 UUIDs. + */ + static final class Clock { + // Java: 0:00, Jan. 1st, 1970 vs UUID: 0:00, Oct 15th, 1582 + private static final long JAVA_EPOCH_OFFSET = 122192928000000000L; + + private static int clockSeq = SECURE_RANDOM.nextInt(); + + private static long initialNanos; + private static long initialTime; + + private static long lastMeasuredTime; + private static long lastTime; + + static { + initClock(); + } + + private static void initClock() { + long millis = System.currentTimeMillis(); + long nanos = System.nanoTime(); + + initialTime = JAVA_EPOCH_OFFSET + millis * 10000 + (nanos / 100) % 10000; + initialNanos = nanos; + } + + public static synchronized long currentTimeHundredNanos() { + // Measure delta since init and compute accurate time + long time; + + while ((time = initialTime + (System.nanoTime() - initialNanos) / 100) < lastMeasuredTime) { + // Reset clock seq (should happen VERY rarely) + initClock(); + clockSeq++; + } + + lastMeasuredTime = time; + + if (time <= lastTime) { + // This typically means the clock isn't accurate enough, use auto-incremented time. + // It is possible that more timestamps than available are requested for + // each time unit in the system clock, but that is extremely unlikely. + // TODO: RFC 4122 says we should wait in the case of too many requests... + time = ++lastTime; + } + else { + lastTime = time; + } + + return time; + } + + public static synchronized long getClockSequence() { + return clockSeq & 0x3fff; + } + } + + /** + * Static inner class for 1.5 compatibility. + */ + static final class MacAddressFinder { + public static long[] getMacAddressNodes() { + List nodeAddresses = new ArrayList(); + try { + Enumeration interfaces = NetworkInterface.getNetworkInterfaces(); + + if (interfaces != null) { + while (interfaces.hasMoreElements()) { + NetworkInterface nic = interfaces.nextElement(); + + if (!nic.isVirtual()) { + long nodeAddress = 0; + + byte[] hardware = nic.getHardwareAddress(); // 1.6 method + + if (hardware != null && hardware.length == 6 && hardware[1] != (byte) 0xff) { + // Network byte order + nodeAddress |= (long) (hardware[0] & 0xff) << 40; + nodeAddress |= (long) (hardware[1] & 0xff) << 32; + nodeAddress |= (long) (hardware[2] & 0xff) << 24; + nodeAddress |= (long) (hardware[3] & 0xff) << 16; + nodeAddress |= (long) (hardware[4] & 0xff) << 8; + nodeAddress |= (long) (hardware[5] & 0xff); + + nodeAddresses.add(nodeAddress); + } + } + } + } + } + catch (SocketException ex) { + return null; + } + + long[] unwrapped = new long[nodeAddresses.size()]; + for (int i = 0, nodeAddressesSize = nodeAddresses.size(); i < nodeAddressesSize; i++) { + unwrapped[i] = nodeAddresses.get(i); + } + + return unwrapped; + } + } +} diff --git a/sandbox/sandbox-common/src/test/java/com/twelvemonkeys/util/UUIDFactoryTest.java b/sandbox/sandbox-common/src/test/java/com/twelvemonkeys/util/UUIDFactoryTest.java new file mode 100644 index 00000000..cecce632 --- /dev/null +++ b/sandbox/sandbox-common/src/test/java/com/twelvemonkeys/util/UUIDFactoryTest.java @@ -0,0 +1,258 @@ +package com.twelvemonkeys.util; + +import org.junit.Test; + +import java.io.UnsupportedEncodingException; +import java.math.BigInteger; +import java.util.Arrays; +import java.util.UUID; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import static org.junit.Assert.*; + +public class UUIDFactoryTest { + + // Nil UUID + + @Test + public void testNilUUID() { + UUID nil = UUIDFactory.NIL; + UUID a = UUID.fromString("00000000-0000-0000-0000-000000000000"); + UUID b = new UUID(0l, 0l); + + assertEquals(nil, b); + assertEquals(nil, a); + assertEquals(a, b); + + assertEquals(0, nil.variant()); + assertEquals(0, nil.version()); + } + + // Version 3 UUIDs (for comparison with v5) + + @Test + public void testVersion3NameBasedMD5() throws UnsupportedEncodingException { + String name = "http://www.example.com/uuid/"; + UUID a = UUID.nameUUIDFromBytes(name.getBytes("UTF-8")); + assertEquals(3, a.version()); + assertEquals(2, a.variant()); + + UUID b = UUID.nameUUIDFromBytes(name.getBytes("UTF-8")); + assertEquals(a, b); + + assertFalse(a.equals(UUIDFactory.nameUUIDFromBytesSHA1(name.getBytes("UTF-8")))); + + assertEquals(a, UUID.fromString(a.toString())); + } + + // Version 5 UUIDs + + @Test + public void testVersion5NameBasedSHA1() throws UnsupportedEncodingException { + String name = "http://www.example.com/uuid/"; + UUID a = UUIDFactory.nameUUIDFromBytesSHA1(name.getBytes("UTF-8")); + assertEquals(5, a.version()); + assertEquals(2, a.variant()); + + UUID b = UUIDFactory.nameUUIDFromBytesSHA1(name.getBytes("UTF-8")); + assertEquals(a, b); + + assertFalse(a.equals(UUID.nameUUIDFromBytes(name.getBytes("UTF-8")))); + assertEquals(a, UUID.fromString(a.toString())); + } + + // Version 1 UUIDs + + @Test + public void testVersion1NodeBased() { + UUID uuid = UUIDFactory.timeNodeBasedV1(); + System.err.println("uuid: " + uuid); + + assertEquals(1, uuid.version()); + assertEquals(2, uuid.variant()); + + assertEquals(UUIDFactory.MAC_ADDRESS_NODE, uuid.node()); + // TODO: Test that this is actually a Mac address from the local computer, or specified through system property + + assertEquals(uuid, UUID.fromString(uuid.toString())); + + assertEquals(UUIDFactory.Clock.getClockSequence(), uuid.clockSequence()); + // Test time fields (within reasonable limits +/- 100 ms or so?) + // TODO: Compare with system clock for sloppier resolution + assertEquals(UUIDFactory.Clock.currentTimeHundredNanos(), uuid.timestamp(), 1e6); + + assertEquals(0, (uuid.node() >> 40) & 1); + } + + @Test + public void testVersion1NodeBasedUnique() { + UUID a = UUIDFactory.timeNodeBasedV1(); + UUID b = UUIDFactory.timeNodeBasedV1(); + + System.err.println("a: " + a); + System.err.println("b: " + b); + + assertFalse(a.equals(b)); + } + + @Test + public void testVersion1SecureRandom() { + UUID uuid = UUIDFactory.timeRandomBasedV1(); + System.err.println("uuid: " + uuid); + + assertEquals(1, uuid.version()); + assertEquals(2, uuid.variant()); + + assertEquals(UUIDFactory.SECURE_RANDOM_NODE, uuid.node()); + + assertEquals(uuid, UUID.fromString(uuid.toString())); + + assertEquals(UUIDFactory.Clock.getClockSequence(), uuid.clockSequence()); + + // TODO: Test time fields (within reasonable limits +/- 100 ms or so?) + assertEquals(UUIDFactory.Clock.currentTimeHundredNanos(), uuid.timestamp(), 1e6); + + assertEquals(1, (uuid.node() >> 40) & 1); + } + + @Test + public void testVersion1SecureRandomUnique() { + UUID a = UUIDFactory.timeRandomBasedV1(); + UUID b = UUIDFactory.timeRandomBasedV1(); + + System.err.println("a: " + a); + System.err.println("b: " + b); + + assertFalse(a.equals(b)); + } + + // Clock tests + + @Test(timeout = 10000l) + public void testClock() throws InterruptedException { + final long[] times = new long[100000]; + + ExecutorService service = Executors.newFixedThreadPool(20); + for (int i = 0; i < times.length; i++) { + final int index = i; + + service.submit(new Runnable() { + public void run() { + times[index] = UUIDFactory.Clock.currentTimeHundredNanos(); + } + }); + } + + service.shutdown(); + assertTrue("Execution timed out", service.awaitTermination(10, TimeUnit.SECONDS)); + + Arrays.sort(times); + + for (int i = 0, timesLength = times.length; i < timesLength; i++) { + if (i == 0) { + continue; + } + + assertFalse(String.format("times[%d] == times[%d]: 0x%016x", i - 1, i, times[i]), times[i - 1] == times[i]); + } + } + + @Test(timeout = 10000l) + public void testClockSkew() throws InterruptedException { + long clockSequence = UUIDFactory.Clock.getClockSequence(); + + ExecutorService service = Executors.newFixedThreadPool(10); + for (int i = 0; i < 100000; i++) { + service.submit(new Runnable() { + public void run() { + UUIDFactory.Clock.currentTimeHundredNanos(); + } + }); + } + + service.shutdown(); + assertTrue("Execution timed out", service.awaitTermination(10, TimeUnit.SECONDS)); + + assertEquals(clockSequence, UUIDFactory.Clock.getClockSequence(), 1); // Verify that clock skew doesn't happen "often" + } + + // Tests for node address system property + + @Test + public void testParseNodeAddressesSingle() { + long[] nodes = UUIDFactory.parseMacAddressNodes("00:11:22:33:44:55"); + + assertEquals(1, nodes.length); + assertEquals(0x001122334455l, nodes[0]); + } + + @Test + public void testParseNodeAddressesSingleWhitespace() { + long[] nodes = UUIDFactory.parseMacAddressNodes(" 00:11:22:33:44:55\r\n"); + + assertEquals(1, nodes.length); + assertEquals(0x001122334455l, nodes[0]); + } + + @Test + public void testParseNodeAddressesMulti() { + long[] nodes = UUIDFactory.parseMacAddressNodes("00:11:22:33:44:55, aa:bb:cc:dd:ee:ff, \n\t 0a-1b-2c-3d-4e-5f,"); + + assertEquals(3, nodes.length); + assertEquals(0x001122334455l, nodes[0]); + assertEquals(0xaabbccddeeffl, nodes[1]); + assertEquals(0x0a1b2c3d4e5fl, nodes[2]); + } + + @Test(expected = NullPointerException.class) + public void testParseNodeAddressesNull() { + UUIDFactory.parseMacAddressNodes(null); + } + + @Test(expected = NumberFormatException.class) + public void testParseNodeAddressesEmpty() { + UUIDFactory.parseMacAddressNodes(""); + } + + @Test(expected = NumberFormatException.class) + public void testParseNodeAddressesNonAddress() { + UUIDFactory.parseMacAddressNodes("127.0.0.1"); + } + + @Test(expected = NumberFormatException.class) + public void testParseNodeAddressesBadAddress() { + UUIDFactory.parseMacAddressNodes("00a:11:22:33:44:55"); + } + + @Test(expected = NumberFormatException.class) + public void testParseNodeAddressesBadAddress4() { + long[] longs = UUIDFactory.parseMacAddressNodes("00:11:22:33:44:550"); + System.err.println("Long: " + Long.toHexString(longs[0])); + } + + @Test(expected = NumberFormatException.class) + public void testParseNodeAddressesBadAddress2() { + UUIDFactory.parseMacAddressNodes("0x:11:22:33:44:55"); + } + + @Test(expected = NumberFormatException.class) + public void testParseNodeAddressesBadAddress3() { + UUIDFactory.parseMacAddressNodes("00:11:22:33:44:55:99"); + } + + // Various testing + + @Test + public void testOracleSYS_GUID() { + String str = "AEB87F28E222D08AE043803BD559D08A"; + BigInteger bigInteger = new BigInteger(str, 16); // ALT: Create byte array of every 2 chars. + long msb = bigInteger.shiftRight(64).longValue(); + long lsb = bigInteger.longValue(); + UUID uuid = new UUID(msb, lsb); + System.err.println("uuid: " + uuid); + System.err.println("uuid.variant(): " + uuid.variant()); + System.err.println("uuid.version(): " + uuid.version()); + } +} \ No newline at end of file