diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..b1810e4 Binary files /dev/null and b/.DS_Store differ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cca12fc --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +# standard gitignore +.DS_Store \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/ArduinoECCX08.iml b/.idea/ArduinoECCX08.iml new file mode 100644 index 0000000..d6ebd48 --- /dev/null +++ b/.idea/ArduinoECCX08.iml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..e208459 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..a8c3eb9 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/examples/.DS_Store b/examples/.DS_Store new file mode 100644 index 0000000..d792f30 Binary files /dev/null and b/examples/.DS_Store differ diff --git a/examples/AES-GCM/AES-GCM.ino b/examples/AES-GCM/AES-GCM.ino new file mode 100644 index 0000000..1ce301c --- /dev/null +++ b/examples/AES-GCM/AES-GCM.ino @@ -0,0 +1,174 @@ +#include +#include +#include +#include +#include + + +int slot = 0; +size_t ivLength = 12; +size_t textLength = 32; +const uint8_t originalPlainText[32] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0E, 0x0D, 0x0F,0x10, 0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F}; +uint8_t encrypted[32] = {0xff, 0xff,0xff, 0xff,0xff, 0xff,0xff, 0xff,0xff, 0xff,0xff, 0xff,0xff, 0xff,0xff, 0xff,0xff, 0xff,0xff, 0xff,0xff, 0xff,0xff, 0xff,0xff, 0xff,0xff, 0xff,0xff, 0xff,0xff, 0xff}; +uint8_t decryptedPlainText[32]; +int senderKeySlot = 0; +int recieverKeySlot = 1; + +void printCipher(uint8_t *cipher, size_t cipherLength) +{ + for (int index = 0; index < cipherLength; index++){ + Serial.printf("%x ", cipher[index]); + } + Serial.println(""); +} + + + +void confirmSymmetry(const uint8_t *originalPlaintext, uint8_t *decryptedPlainText, size_t sizeOfText){ + bool Symmetry = true; + for(int index = 0; index < sizeOfText; index++){ + if(originalPlainText[index] != decryptedPlainText[index]){ + bool Symmetry = false; + break; + } + } + if (Symmetry == false){ + Serial.print("The original Plaintext is not the same as the decrypted plaintext"); + }else{ + Serial.print("The original plaintext matches the decrypted plaintext"); + } +} + +void wakeloop(){ + Serial.println("Waking up the ECCXO8"); + //tell ECCX08 to wake up, and if it doesn't KEEP WAKING IT UP. + int ECCAwake = 0; + ECCAwake = ECCX08.begin(); + while(!ECCAwake){ + Serial.println("ECCX08 chip did not wake up"); + delay(500); + ECCAwake = ECCX08.begin(); + } +} + +void setup() { + Serial.begin(9600); + while(!Serial); + Serial.println("Getting things started"); + //encryption stuff + int senderSlot = 1; + int receiverSlot = 0; + int tempKeySlot = 0xFFFF; + uint8_t mode = 0b00001000; + uint8_t tag[5] = {0x00, 0x51,0x32, 0xaa, 0x00}; + + + + + byte receiverPublicKey[64]; + + wakeloop(); + + + + + Serial.printf("\nMaking a random IV: "); + const uint8_t iv[] = {ECCX08.random(0xFF), ECCX08.random(0xFF), ECCX08.random(0xFF), ECCX08.random(0xFF), ECCX08.random(0xFF), ECCX08.random(0xFF), ECCX08.random(0xFF), ECCX08.random(0xFF), ECCX08.random(0xFF), ECCX08.random(0xFF), ECCX08.random(0xFF), ECCX08.random(0xFF)}; + for(int index = 0; index < 12; index++){ + Serial.printf("%x ", iv[index]); + } + + int receiverKeyReport = ECCX08.generatePublicKey(recieverKeySlot,receiverPublicKey); + Serial.printf("receiver key reports: %d\n", receiverKeyReport); + Serial.printf("\nGenerating an ECDH secret from the sender's private key and the reciever's public key\n"); + byte dumbyKey[64]; + int dumbKeySuccess = ECCX08.generatePublicKey(0xFFFF, dumbyKey);//ecdhKeyGen only works if you've previously generated a key into the Tempkey slot. not sure why. + int ecdhSuccess = ECCX08.ecdhKeyGen(0b00001000, senderKeySlot, receiverPublicKey); + if(ecdhSuccess ==0){ + Serial.printf("\nECDH keygen ran\n"); + } + else{ + Serial.printf("ECDH keygen reports: %d\n",ecdhSuccess); + } + delay(26); + + GCM *gcmaes256 = 0; + gcmaes256 = new GCM(); + GCM gcm; + Serial.printf("\n++++setting IV for encryption++++\n"); + gcm.setIV(iv, sizeof(iv)); + + Serial.println("++++Encrypting++++"); + Serial.printf("Original plaintext: ");//loop because print cipher does not like const + for (int index = 0; index < 32; index++){ + Serial.printf("%x ", originalPlainText[index]); + } + Serial.printf("\n"); + + + gcm.encrypt(encrypted, originalPlainText, textLength); + Serial.printf("\n++++++++Tagging++++++++\n"); + gcm.computeTag(tag, sizeof(tag)); + ECCX08.end(); + Serial.printf("\n++++++++Turned off chip++++++++"); + delay(1000); + Serial.printf("\nCiphertext: "); + printCipher(encrypted, 32); + + ////Decrypting!!!!!!! + ECCX08.begin(); + Serial.printf("\n++++++++Turned chip back on++++++++"); + Serial.printf("\n+++++++++Begin Decrypting++++++++++"); + byte senderPublicKey[64]; + int senderKeyReport = ECCX08.generatePublicKey(senderKeySlot,senderPublicKey); + Serial.printf("sender key reports: %d\n", senderKeyReport); + Serial.printf("\n sender's public key"); + for(int index = 16; index < 64; index++){ + Serial.printf("%x ", senderPublicKey[index]); + } + Serial.printf("\n reciever's old public Key\n"); + for(int index = 16; index < 64; index++){ + Serial.printf("%x ", receiverPublicKey[index]); + } + int secondReceiverKeyReport = ECCX08.generatePublicKey(recieverKeySlot,receiverPublicKey); + //Serial.printf("reciever key reports: %d\n", senderKeyReport); + Serial.printf("\n reciever's public Key after chip sleep, restart, and regeneration\n"); + for(int index = 16; index < 64; index++){ + Serial.printf("%x ", receiverPublicKey[index]); + } + + + + Serial.println(""); + Serial.println("Generating an ECDH secret from the receivers's private key and the sender's public key"); + dumbKeySuccess = ECCX08.generatePublicKey(tempKeySlot, dumbyKey); + ecdhSuccess = ECCX08.ecdhKeyGen(0b00001000, recieverKeySlot, senderPublicKey); + if(ecdhSuccess ==0){ + Serial.printf("\nECDH keygen ran\n"); + } else{ + Serial.printf("\nECDH keygen reports: %d\n",ecdhSuccess); + } + delay(26); + Serial.println("inputing IV for decrypt, in a live case, you would transmit the IV in the clear"); + Serial.println("with the cyphertext, this is secure according to GCM experts"); + GCM *gcmaes256second = 0; + gcmaes256second = new GCM(); + GCM gcm2; + gcm2.setIV(iv, sizeof(iv)); + + Serial.println("++++Decrypting++++"); + gcm2.decrypt(decryptedPlainText, encrypted, textLength); + if (!gcm.checkTag(tag, sizeof(tag))) { + Serial.print("data is invalid"); + } + Serial.print("\nDecrypted: "); + printCipher(decryptedPlainText, 32); + confirmSymmetry(originalPlainText, decryptedPlainText, 32); + ECCX08.end(); +} + + + +void loop() { + +} diff --git a/examples/AES/.DS_Store b/examples/AES/.DS_Store new file mode 100644 index 0000000..f7cd611 Binary files /dev/null and b/examples/AES/.DS_Store differ diff --git a/examples/AES/AES.ino b/examples/AES/AES.ino new file mode 100644 index 0000000..269be37 --- /dev/null +++ b/examples/AES/AES.ino @@ -0,0 +1,34 @@ +#include + +void setup() { + Serial.begin(9600); + while (!Serial); + + if (!ECCX08.begin()) { + Serial.println("Failed to communicate with ECC508/ECC608!"); + while (1); + } + + if (!ECCX08.locked()) { + Serial.println("The ECC508/ECC608 is not locked!"); + while (1); + } +} + +void loop() { + byte mode = 0; + int slot = 0; + byte data[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + byte result[16]; + + int success = ECCX08.aesEncryptECB(mode, slot, data, result); + + if (success == 1) { + Serial.println("AES encryption succeeded!"); + } else { + Serial.println("AES encryption failed!"); + } + + delay(1000); +} + diff --git a/examples/AESandECDH/AESandECDH.ino b/examples/AESandECDH/AESandECDH.ino new file mode 100644 index 0000000..7202d5e --- /dev/null +++ b/examples/AESandECDH/AESandECDH.ino @@ -0,0 +1,285 @@ +#include + +void printMessage(char* message){ + Serial.printf("\n"); + Serial.println("++++++++++++++++++++++++++++++++"); + Serial.println(message); + Serial.println("++++++++++++++++++++++++++++++++"); +} + +void interpretReportCodes(int report){ + if(report == 2){ + Serial.print(" call to wakeup chip failed"); + } + if(report == 3){ + Serial.print(" command failed"); + } +} + + int testGFM(){ + byte mode = 0; + uint16_t slot = 0xFFFF;//this demands a tempkey be generated!!! + byte data[16] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0E, 0x0D, 0x0F}; + byte mData[16]= {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0E, 0x0D, 0x0F}; + byte encrypted[16]; + byte result[16]; + byte h[16] ={ 0x10, 0x15, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F}; + + Serial.println("original mData: "); + for(int index = 0; index < 16 ; index++){ + Serial.printf("%x ",mData[index]); + } + int gfmSuccess = ECCX08.aesMultiply(slot, mData, h, result); + Serial.print("\ngfmSucces return: "); + Serial.println(gfmSuccess); + Serial.println("result data: "); + for(int index = 0; index < 16 ; index++){ + Serial.printf("%x ",result[index]); + } + Serial.println(" "); +} + +void testAES(int keyslot) { + printMessage("Starting AES test"); + uint16_t slot; + + if (keyslot == 33){ + Serial.printf("generating a new nonce for the Tempkey, using it as key \n"); + slot = 0xFFFF;//this demands a tempkey be generated! + byte setTempkey = 0x00; + bool nonceOutputMode = 1; + byte nonceRandomInitilaizer[20] = {0x08,0x07,0x06,0x05,0x04,0x03,0x02,0x01,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; + ECCX08.nonce(setTempkey, nonceOutputMode, nonceRandomInitilaizer, 20);//!!!!!!make a working tempkey + } + else if(keyslot == 32){//asks to use tempkey, does not generate new key in tempkey + Serial.printf("using the tempkey slot without generating a new tempkey \n"); + slot = 0xFFFF; + } + else if (keyslot <16){ + slot = keyslot; + Serial.printf("using slot %d as the key", keyslot); + } + else{ + Serial.printf("keyslot was not between 0-16, or 32 for using the tempkey or 33 for usging and generating a tempkey"); + + } + byte data[16] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0E, 0x0D, 0x0F}; + byte mData[32]; + byte encrypted[16]; + byte result[16]; + + Serial.println("Original Data"); + for(int index = 0; index < 16 ; index++){ + Serial.printf("%x ",data[index]); + } + Serial.printf("\n"); + + int aesSuccess1 = ECCX08.aes(0, slot, data, 16, encrypted); + Serial.print("aesTest1 return: "); + Serial.println(aesSuccess1); + interpretReportCodes(aesSuccess1); + Serial.println("Encrypted Data"); + for(int index = 0; index < 16 ; index++){ + Serial.printf("%x ",encrypted[index]); + } + Serial.printf("\n"); + delay(1000); + + int aesSuccessDMode = ECCX08.aes(0b00000001, slot, encrypted, 16, result);//decrypt + Serial.print("aesTestDMode return: "); + Serial.println(aesSuccessDMode); + interpretReportCodes(aesSuccessDMode); + Serial.println("unencrypted: "); + for(int index = 0; index < 16 ; index++){ + Serial.printf("%x ",result[index]); + } + Serial.printf("\n"); + delay(1000); + + int aesSuccessMMode = ECCX08.aes(0b00000011, slot, mData, 16, encrypted);//GFM mode + Serial.print("aesTestMMode return: "); + Serial.println(aesSuccessMMode); + delay(1000); + + int encryptSuccess = ECCX08.aesEncryptECB(slot, data, encrypted); + if (encryptSuccess == 1) { + Serial.println("AES encryption succeeded!"); + } + else { + Serial.print("AES encryption failed: "); + Serial.println(encryptSuccess); + return; + } + + int decryptSuccess = ECCX08.aesDecryptECB(slot, encrypted, result);//slot is 0xFFFF, sending to tempkey, + + if (decryptSuccess == 1) { + Serial.println("AES decryption succeeded!"); + } + else { + Serial.print("AES decryption failed: "); + Serial.println(decryptSuccess); + return; + } + + int success = memcmp(data, result, 16); + + if (success == 0) { + Serial.println("AES crypto succeeded!"); + } + else { + Serial.println("AES crypto failed!"); + return; + } + + delay(1000); +} + +// void testECDH() { +// printMessage("Starting ECDH"); +// if (!ECCX08.locked()) { +// Serial.println("Chip must be locked to generate an ECDH key."); +// return; +// } +// +// //see table 11-9 section 11.5 +// int senderSlot = 1; +// int receiverSlot = 0; +// int tempKeySlot = 0xFFFF; +// uint8_t ecdhMode = 0b00001000;//this is bits 3-2 as 10, which ought be sending the result to tempkey +// byte plaintext[16] ={0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0E, 0x0D, 0x0F}; +// byte receiverPublicKey[64]; +// byte senderPublicKey[64]; +// byte cyphertext[16]; +// byte decrypted[16]; +// +// //generate a key for the Sender +// int key3Status = ECCX08.generatePrivateKey(senderSlot, senderPublicKey); +// +// if (key3Status != 1) { +// Serial.printf("Sender Private Key generation failed with code %d \n", key3Status); +// } else {//print the first and second 32 bytes of the public key. +// Serial.print("Sender Public Key:\n"); +// for(int bytes = 0; bytes <32; bytes++){ +// Serial.printf("%x ", senderPublicKey[bytes]); +// } +// Serial.printf("\n"); +// for(int bytes = 32; bytes < 64; bytes++){ +// Serial.printf("%x ", senderPublicKey[bytes]); +// } +// } +// +// +// //generate a key for the Receiver. +// int key1Status = ECCX08.generatePrivateKey(receiverSlot, receiverPublicKey); +// +// if (key1Status != 1) { +// Serial.printf("\nReciever Private Key generation failed with code %d \n", key1Status); +// } else { +// Serial.printf("\nReciever Public Key: \n"); +// for(int bytes = 0; bytes <32; bytes++){ +// Serial.printf("%x ", receiverPublicKey[bytes]); +// } +// Serial.printf("\n"); +// for(int bytes = 32; bytes < 64; bytes++){ +// Serial.printf("%x ", receiverPublicKey[bytes]); +// } +// Serial.printf("\n"); +// } +// delay(26); +// Serial.println("placing a dumby key in Tempkey"); +// byte dumbyKey[64]; +// int dumbKeySuccess = ECCX08.generatePublicKey(0xFFFF, dumbyKey);//ecdhKeyGen only works if you've previously generated a key into the Tempkey slot. not sure why. +// int success = ECCX08.ecdhKeyGen(ecdhMode, senderSlot, receiverPublicKey); +// //mode = uint8_t ecdhMode = 0b00001000; +// if (success != 0) { +// Serial.println("ECDH key generation failed."); +// Serial.print("ECDH output: "); +// Serial.println(success); +// } else { +// Serial.println("ECDH secret made from sender's PrivateKey and receiver's Public Key"); +// } +// +// Serial.println("Encrypting using the first secret"); +// //so, set the slot to tempkey, +// int encryptSuccess = ECCX08.aesEncryptECB(tempKeySlot, plaintext, cyphertext); +// if (encryptSuccess == 1) { +// Serial.print("AES encryption succeeded!: "); +// for(int index = 0; index < 16; index++){ +// Serial.printf("%x ",cyphertext[index]); +// } +// } else { +// Serial.print("AES encryption failed: "); +// Serial.println(encryptSuccess); +// return; +// } +// +// int secondSecretSuccess = ECCX08.ecdhKeyGen(ecdhMode, receiverSlot, senderPublicKey); +// if (secondSecretSuccess != 0) { +// Serial.println("ECDH key generation failed."); +// Serial.print("ECDH output: "); +// Serial.print(secondSecretSuccess); +// } else { +// Serial.printf("\n Generate a new secret from receiver's private key and sender's public key, and decrypt using that /n"); +// } +// +// +// Serial.println("decrypt using that"); +// int decryptSuccess = ECCX08.aesDecryptECB(tempKeySlot, cyphertext, decrypted);//slot is 0xFFFF, sending to tempkey, +// +// if (decryptSuccess == 1) { +// Serial.println("AES decryption succeeded!: "); +// for(int index = 0; index < 16; index++){ +// Serial.printf("%x ",decrypted[index]); +// } +// } else { +// Serial.print("AES decryption failed: "); +// Serial.println(decryptSuccess); +// return; +// } +// } + + +void printConfig(byte *data){ + ECCX08.readConfiguration(data); + for (int index = 0; index<128; index++){ + if (index % 16 == 0 && index !=0){ //this block spaces out our bits into arrays. + Serial.println(""); + } + if (data[index] < 0x10){ + Serial.printf("0%x ", data[index]); + } + else{ + Serial.printf("%x ", data[index]); + } + } +} + + + +void setup(){ + byte mData[16] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0E, 0x0D, 0x0F}; + byte mData2[32] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0E, 0x0D, 0x0F,0x10, 0x15, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F}; + byte gfmOutput[16] ={0xFF, 0xFF, 0xFF, 0xFF,0xFF, 0xFF, 0xFF, 0xFF,0xFF, 0xFF, 0xFF, 0xFF,0xFF, 0xFF, 0xFF, 0xFF}; + byte h[16] ={ 0x10, 0x15, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F}; + int aesKeySlot = 2; + byte aesPublicKey[32]; + + Serial.begin(9600); + while(!Serial); //won't advance untill I open the serial + ECCX08.begin(); + + //testAES(33); //this asks it to make a nonce + //testGFM(); + testECDH(); + //testAES(2); +} + + +void loop() { + // put your main code here, to run repeatedly: + + + delay(17000); +} diff --git a/examples/ConfigAndKeys/Config.h b/examples/ConfigAndKeys/Config.h new file mode 100644 index 0000000..6559691 --- /dev/null +++ b/examples/ConfigAndKeys/Config.h @@ -0,0 +1,75 @@ +const byte ECCX08_Skyhook_CONFIG[128] = {//[0:3] SN 0-3 +0x01, 0x23, 0xf8, 0x5c, +//[4:7] RevNum +0x00, 0x00, 0x60, 0x02, +//[8:12] SN +0x43, 0xa1, 0xd5, 0xa7, 0xee, +//[13]AES_Enable +0xc1, //Relevant bit: 1 +//01110001 +//[14]I2C Enable +0x71, +//01110001 +//[15]Reserved +0x0, +//[16]IC2Address +0xc0, +//01110001 +//[17] Reserved +0x00, +//[18]Count Match +0x00, +//01110001 +//[19] ChipMode +0x00, +//01110001 +//[20:51] SlotConfig +0x83, 0x20, 0x87, 0x20, 0x8f, 0x20, 0x8f, 0x20, +0x8f, 0x20, 0x8f, 0x20, 0x8f, 0x20, 0x8f, 0x20, +0b00011111, 0b10001111, 0b00011111, 0b10001111, 0b00011111, 0b10001111,0b00011111, 0b10001111, +0b00011111, 0b10001111, 0b00011111, 0b10001111, 0b00011111, 0b10001111,0b00011111, 0b10001111, +//10000011, 00100000, 10000111, 00100000, 10001111, 00100000, 11000100, 10001111, +//10001111, 10001111, 10001111, 10001111, 10011111, 10001111, 10101111, 10001111, +//00000000, 00000000, 00000000, 00000000, 00000000, 00000000, 00000000, 00000000, +//00000000, 00000000, 00000000, 00000000, 00000000, 00000000, 10101111, 10001111, +//[52:59] Counter[0] +0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, +//11111111, 11111111, 11111111, 11111111, 00000000, 00000000, 00000000, +//[60:67] Counter [1] +0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, +//[68] UseLock +0x00, +//[69] Volatile Key Permission +0x00, +//[70:71]SecureBoot +0x00, 0x00, +//[72]KdflvLoc +0x00, +//[73:74] KdflvStr +0x00, 0x00, +//[75:83] Reserved, must be 0 +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +//[84] UserExtra +0x00, +//[85] UserExtraAdd +0x00, +//[86] LockValue +0x00, +//[87] LockConfig +0x00, +//[88:89] SlotLocked +0xff, 0xff, +//[90:91] ChipOptions +0x00, 0x00, +//[92:95]X509format +0x00, 0x00, 0x00, 0x00, +//[96:127]KeyConfig +0x33, 0x00, 0x33, 0x00, 0x33, 0x00, 0x33, 0x00, +0x33, 0x00, 0x33, 0x00, 0x33, 0x00, 0x33, 0x00, +0x3c, 0x00, 0x3c, 0x00, 0x3c, 0x00, 0x3c, 0x00, +0x3c, 0x00, 0x3c, 0x00, 0x3c, 0x00, 0x1c, 0x00 +//00110011, 00000000, 00110011, 00000000, 00110011, 00000000, 00011100, 00000000, +//00011100, 00000000, 00011100, 00000000, 00011100, 00000000, 00011100, 00000000, +//00111100, 00000000, 00111100, 00000000, 00111100, 00000000, 00111100, 00000000, +//00111100, 00000000, 00111100, 00000000, 00111100, 00000000, 00011100, 00000000 +}; diff --git a/examples/ConfigAndKeys/ConfigAndKeys.ino b/examples/ConfigAndKeys/ConfigAndKeys.ino new file mode 100644 index 0000000..891e7c5 --- /dev/null +++ b/examples/ConfigAndKeys/ConfigAndKeys.ino @@ -0,0 +1,209 @@ +//WIP that configures the chip to do ecdh, generates a private key, +//and saves the public key to arduino flash memory. + +#include "Config.h" +#include +#include "SdFat.h" +#include "Adafruit_SPIFlash.h" + +#if defined(CUSTOM_CS) && defined(CUSTOM_SPI) + Adafruit_FlashTransport_SPI flashTransport(CUSTOM_CS, CUSTOM_SPI); + +#elif defined(ARDUINO_ARCH_ESP32) + // ESP32 use same flash device that store code. + // Therefore there is no need to specify the SPI and SS + Adafruit_FlashTransport_ESP32 flashTransport; + +#else + // On-board external flash (QSPI or SPI) macros should already + // defined in your board variant if supported + #if defined(EXTERNAL_FLASH_USE_QSPI) + Adafruit_FlashTransport_QSPI flashTransport; + + #elif defined(EXTERNAL_FLASH_USE_SPI) + Adafruit_FlashTransport_SPI flashTransport(EXTERNAL_FLASH_USE_CS, EXTERNAL_FLASH_USE_SPI); + + #else + #error No QSPI/SPI flash are defined on your board variant.h ! + #endif +#endif + + +#define PWRSW_INT 15 // this changes state to indicate to the feather that the power switch has been pressed by going from high to low +#define PWRSW_CLR 5 // setting this pin low will cause the MAX16150's OUT pin to go low thereby turning off all power supply regulators including the feather's + + +//flash object +Adafruit_SPIFlash flash(&flashTransport); +//file systme object from SdFat +FatFileSystem fatfs; +File keyFile; + +void savePublicKey(String pubKeyID, uint8_t publicKey[64]){ + Serial.print("Initializing Filesystem on external flash..."); + // Init external flash + flash.begin(); + // Open file system on the flash + if ( !fatfs.begin(&flash) ) { + Serial.println(" Error: filesystem is not existed. Please try SdFat_format example to make one."); + while(1) yield(); + } + Serial.println(" initialization done."); + + // open the file. note that only one file can be open at a time, + // so you have to close this one before opening another. + keyFile = fatfs.open("PublicKeys.txt", FILE_WRITE); + + //make dataline that can be printed from printline + Serial.print("\n Turning the Public Key into a Dataline"); + String dataline = "\n"; + String temp; + dataline+=String(pubKeyID) + ")"; + //load the key into the dataline + for(int bytes = 0; bytes < 64; bytes++){ + String temp = String(publicKey[bytes], HEX); + dataline += " " + temp; + } + dataline += " "; //put a space at the end so we can pick up the last character easily. + + Serial.print("...Dataline Done"); + + // if the file opened okay, write to it: + if (keyFile) { + Serial.print(" Writing to PublicKeys.txt..."); + keyFile.println(dataline); + // close the file: + keyFile.close(); + Serial.println("done."); + } else { + // if the file didn't open, print an error: + Serial.println(" error opening ssid.txt"); + } +} + +void printConfig(){ + byte data[128]; + ECCX08.readConfiguration(data); + for (int index = 0; index<128; index++){ + if (index % 16 == 0 && index !=0){ //this block spaces out our bits into arrays. + Serial.println(""); + } + if (data[index] < 0x10){ + Serial.printf("0%x ", data[index]); + } + else{ + Serial.printf("%x ", data[index]); + } + } +} + +void pwrsw_interrupt_handler() { + //print that interrupt has been detected + //print a 5 sec shutdown count down then set PWRSW_CLR low to turn off all supply regulators + Serial.println("\n\nSHUTTING DOWN REQUEST MADE!"); + Serial.println("\n\nClear Display & refresh"); + //display.clearDisplay(); + //display.refresh(); + Serial.println("\n\nSHUTTING DOWN NOW!\n\n"); + digitalWrite(PWRSW_CLR, LOW); // turn off all power supply regulators + delay(20); // Full shutdown takes a few milliseconds + Serial.println("\nTHIS SHOULD NEVER PRINT!\n\n"); +} + +void setup() { + // Setup power switch interrupt pin + // Falling means the button has been pushed long enough to be debounced + // if it stays low then the int period is longer but out is also deasserted which powers off the device so no need to handle this? + pinMode(PWRSW_INT, INPUT); + attachInterrupt(digitalPinToInterrupt(PWRSW_INT), pwrsw_interrupt_handler, FALLING); + // Always set the pin state before setting the pin mode!!! Arduino defaults pin state to LOW, this can cause unexpected behavior. + digitalWrite(PWRSW_CLR, HIGH); // set to LOW to disable power to system, so feather can turn itself and everything else off. + pinMode(PWRSW_CLR, OUTPUT); // This pin is used to turn off the whole system via the power switch management IC + + + byte data[128]; + + Serial.begin(115200); + while(!Serial); + + ECCX08.begin(); + Serial.print("\n Original Config: \n"); + printConfig();//four calls to 0x02 "read", for 32 bytes + Serial.print("\nECC Serial Number: "); + Serial.println(ECCX08.serialNumber());//three calls to 0x02 "read" + + if(ECCX08.locked()){//one call to 0x02 read" + Serial.print("\nthis chip is locked, cannot write the config\n"); + } else { + Serial.print("\nThe chip is Unlocked"); + delay(500); + Serial.print("\nafterDelay"); + int configSuccess = ECCX08.writeConfiguration(ECCX08_Skyhook_CONFIG); + Serial.printf("\nconfigSuccess %d", configSuccess); + if(configSuccess == 1){ + Serial.print("\nDevice Configuration written"); + } else{ + Serial.print("\nwriteConfiguaration did not complete"); + } + } + + + + ECCX08.lock();//lock first the config and then the data zone. + Serial.println("Device Locked"); + + Serial.printf("\n Config on Chip: \n"); + printConfig(); + + //declare variables + byte publicKeyOne[64]; + byte publicKeyTwo[64]; + int slotOne = 0b00000001; + int slotTwo = 0b00000010; + + //generate keys 1 + // calls to generatePrivateKey will create or recreate the Public Key. + // Do not call this function unless you want your public key recreated. + + int privateKeyReport = ECCX08.generatePrivateKey(slotOne,publicKeyOne);//report is 35599 if it works. + if (privateKeyReport == 35499) + { + Serial.print("\n\nPrivate Key Generated"); + } else{ + if (privateKeyReport == 2) + { + Serial.print("\n\n The Crypto Chip did not respond to the wake command"); + }else if (privateKeyReport == 3) + { + Serial.print("\n\n The Crypto Chip did not like the command formatting"); + }else + { + Serial.printf("\n\nPrivate key generation report: %d", privateKeyReport); + } + } + + int publicKeyReport = ECCX08.generatePublicKey(slotOne,publicKeyOne);//report is 1 if it works. + Serial.printf("\n\Public keyreport: %d", publicKeyReport); + Serial.printf("\nPublic Key One : \n"); + for(int index = 0; index<64;index++){ + Serial.printf("0x%x, ",publicKeyOne[index]); + } + + + //save pubKeys to flash + //savePublicKey("1", publicKeyOne); + //savePublicKey("2", publicKeyTwo); + + ECCX08.lock();//lock first the config and then the data zone. + + pinMode(LED_BUILTIN, OUTPUT); +} + +void loop() { + // put your main code here, to run repeatedly: + digitalWrite(LED_BUILTIN, HIGH); // turn the LED on (HIGH is the voltage level) + delay(1000); // wait for a second + digitalWrite(LED_BUILTIN, LOW); // turn the LED off by making the voltage LOW + delay(1000); + +} \ No newline at end of file diff --git a/examples/ConfigParser/ConfigParser.ino b/examples/ConfigParser/ConfigParser.ino new file mode 100644 index 0000000..b4f6672 --- /dev/null +++ b/examples/ConfigParser/ConfigParser.ino @@ -0,0 +1,561 @@ +/*ConfigParser + * A sketch that grabs the configuration of an ATECC608B chip and prints out the menu options + * held inside in a human readable format. + * INCOMPLETE. Currenlty only includes functionality for bytes 20-51, the SlotConfig elements + */ + +#include +#include +#include +#include +#include + +void printMessage(String message){ + Serial.printf("\n"); + Serial.println("+++++++++++++++++++++++++++++++++++++++++++++++"); + Serial.println(message); + Serial.println("+++++++++++++++++++++++++++++++++++++++++++++++"); +} + + +/* printBits A helperfunction to print the bits of a block from the config. + * parameters + * number(in) a single byte from the config. +*/ +void printBits(byte number){ + int test1 = (int)number; + int testOut; + int temp; + + temp = test1 & 0b10000000; + testOut = temp >> 7; + Serial.printf("%d" ,testOut); + + temp = test1 & 0b01000000; + testOut = temp >> 6; + Serial.printf("%d" ,testOut); + + temp = test1 & 0b00100000; + testOut = temp >> 5; + Serial.printf("%d" ,testOut); + + temp = test1 & 0b00010000; + testOut = temp >> 4; + Serial.printf("%d" ,testOut); + + temp = test1 & 0b00001000; + testOut = temp >> 3; + Serial.printf("%d" ,testOut); + + temp = test1 & 0b00000100; + testOut = temp >> 2; + Serial.printf("%d" ,testOut); + + temp = test1 & 0b00000010; + testOut = temp >> 1; + Serial.printf("%d" ,testOut); + + testOut = test1 & 0b00000001; + Serial.printf("%d ", testOut); +} + +void printConfig(byte *data){ + ECCX08.readConfiguration(data); + + Serial.printf("\n//[0:3] SN 0-3\n"); + for (int index = 0; index<4; index++){ + if (data[index] < 0x10){ + Serial.printf("0x0%x, ", data[index]); + } else{ + Serial.printf("0x%x, ", data[index]); + } + } + + Serial.printf("\n//[4:7] RevNum\n"); + for (int index = 4; index<8; index++){ + if (data[index] < 0x10){ + Serial.printf("0x0%x, ", data[index]); + } else{ + Serial.printf("0x%x, ", data[index]); + } + } + + Serial.printf("\n//[8:12] SN\n"); + for (int index = 8; index<13; index++){ + if (data[index] < 0x10){ + Serial.printf("0x0%x, ", data[index]); + } else{ + Serial.printf("0x%x, ", data[index]); + } + } + + Serial.printf("\n//[13]AES_Enable\n"); + if (data[13] < 0x10){ + Serial.printf("0x0%x, ", data[13]); + } else{ + Serial.printf("0x%x, ", data[13]); + } + Serial.printf("//Relevant bit: %d" ,data[13]&0b00000001); + + Serial.printf("\n//[14]I2C Enable\n"); + if (data[14] < 0x10){ + Serial.printf("0x0%x, ", data[14]); + } else{ + Serial.printf("0x%x, ", data[14]); + } + Serial.printf("//Relevant bit: %d" ,data[14]&0b00000001); + + Serial.printf("\n//[15]Reserved\n"); + if (data[14] < 0x10){ + Serial.printf("0x0%x, ", data[15]); + } else{ + Serial.printf("0x%x, ", data[15]); + } + + Serial.printf("\n//[16]IC2Address\n"); + if (data[16] < 0x10){ + Serial.printf("0x0%x, ", data[16]); + } else{ + Serial.printf("0x%x, ", data[16]); + } + + Serial.printf("\n//[17] Reserved\n"); + if (data[17] < 0x10){ + Serial.printf("0x0%x, ", data[17]); + } else{ + Serial.printf("0x%x, ", data[17]); + } + + Serial.printf("\n//[18]Count Match\n"); + if (data[18] < 0x10){ + Serial.printf("0x0%x, ", data[18]); + } else{ + Serial.printf("0x%x, ", data[18]); + } + + Serial.printf("\n//[19] ChipMode\n"); + if (data[19] < 0x10){ + Serial.printf("0x0%x, ", data[19]); + } else{ + Serial.printf("0x%x, ", data[19]); + } + + Serial.printf("\n[//20:51] SlotConfig\n"); + for (int index = 20; index<52; index++){ + if ((index-20) % 8 == 0 && (index-20) !=0){ //this block spaces out our bits into arrays. + Serial.println(""); + } + if (data[index] < 0x10){ + Serial.printf("0x0%x, ", data[index]); + } else{ + Serial.printf("0x%x, ", data[index]); + } + } + + Serial.printf("\n//[52:59] Counter[0]\n"); + for (int index = 52; index<60; index++){ + if (data[index] < 0x10){ + Serial.printf("0x0%x, ", data[index]); + } else{ + Serial.printf("0x%x, ", data[index]); + } + } + + Serial.printf("\n//[60:67] Counter [1]\n"); + for (int index = 60; index<68; index++){ + if (data[index] < 0x10){ + Serial.printf("0x0%x, ", data[index]); + } else{ + Serial.printf("0x%x, ", data[index]); + } + } + + Serial.printf("\n//[68] UseLock\n"); + if (data[68] < 0x10){ + Serial.printf("0x0%x, ", data[68]); + } else{ + Serial.printf("0x%x, ", data[68]); + } + + Serial.printf("\n//[69] Volatile Key Permission\n"); + if (data[69] < 0x10){ + Serial.printf("0x0%x, ", data[69]); + } else{ + Serial.printf("0x%x, ", data[69]); + } + + Serial.printf("\n//[70:71]SecureBoot\n"); + for(int index = 70; index< 72; index++){ + if (data[index] < 0x10){ + Serial.printf("0x0%x, ", data[index]); + } else{ + Serial.printf("0x%x, ", data[index]); + } + } + + Serial.printf("\n//[72]KdflvLoc\n"); + if (data[72] < 0x10){ + Serial.printf("0x0%x, ", data[72]); + } else{ + Serial.printf("0x%x, ", data[72]); + } + + Serial.printf("\n//[73:74] KdflvStr\n"); + for(int index = 73; index< 75; index++){ + if (data[index] < 0x10){ + Serial.printf("0x0%x, ", data[index]); + } else{ + Serial.printf("0x%x, ", data[index]); + } + } + + Serial.printf("\n//[75:83] Reserved, must be 0 \n"); + for(int index = 75; index< 84; index++){ + if (data[index] < 0x10){ + Serial.printf("0x0%x, ", data[index]); + } else{ + Serial.printf("0x%x, ", data[index]); + } + } + + Serial.printf("\n//[84] UserExtra \n"); + if (data[84] < 0x10){ + Serial.printf("0x0%x, ", data[84]); + } else{ + Serial.printf("0x%x, ", data[84]); + } + + Serial.printf("\n//[85] UserExtraAdd\n"); + if (data[85] < 0x10){ + Serial.printf("0x0%x, ", data[85]); + } else{ + Serial.printf("0x%x, ", data[85]); + } + + Serial.printf("\n//[86] LockValue \n"); + if (data[86] < 0x10){ + Serial.printf("0x0%x, ", data[86]); + } else{ + Serial.printf("0x%x, ", data[86]); + } + + Serial.printf("\n//[87] LockConfig\n"); + if (data[87] < 0x10){ + Serial.printf("0x0%x, ", data[87]); + } else{ + Serial.printf("0x%x, ", data[87]); + } + + Serial.printf("\n//[88:89] SlotLocked\n"); + for(int index = 88; index< 90; index++){ + if (data[index] < 0x10){ + Serial.printf("0x0%x, ", data[index]); + } else{ + Serial.printf("0x%x, ", data[index]); + } + } + + Serial.printf("\n//[90:91] ChipOptions\n"); + for(int index = 90; index< 92; index++){ + if (data[index] < 0x10){ + Serial.printf("0x0%x, ", data[index]); + } else{ + Serial.printf("0x%x, ", data[index]); + } + } + + Serial.printf("\n//[92:95]X509format\n"); + for(int index = 92; index< 96; index++){ + if (data[index] < 0x10){ + Serial.printf("0x0%x, ", data[index]); + } else{ + Serial.printf("0x%x, ", data[index]); + } + } + + Serial.printf("\n//[96:127]KeyConfig\n"); + for (int index = 96; index<127; index++){ + if ((index-96) % 8 == 0 && (index-96) !=0){ //this block spaces out our bits into arrays. + Serial.println(""); + } + if (data[index] < 0x10){ + Serial.printf("0x0%x, ", data[index]); + } else{ + Serial.printf("0x%x, ", data[index]); + } + } + //Final bit of the KeyConfig does not get a comma + if (data[128] < 0x10){ + Serial.printf("0x0%x ", data[128]); + } else{ + Serial.printf("0x%x", data[128]); + } +} + +void printConfigSlots(byte *slots){ + /* Byte options, table 2-6 from ATECC608 Dataset + * **** 0000 0000 0000 WriteConfig Bits + * 0000 **** 0000 0000 WriteKey Bits + * 0000 0000 *000 0000 Is Secret Bit + * 0000 0000 0*00 0000 EncryptRead Bit + * 0000 0000 00*0 0000 LimitedUse Bit + * 0000 0000 000* 0000 NoMac Bit + * 0000 0000 0000 **** ReadKey Bits + * + */ + int slotNumber = 0; + for(int index = 20; index < 51; index++){//core loop of the Keyslot reader, I'm pretty sure there's only 16 slots, so the 17th slot (number 16) is wrong. + Serial.printf(" \n"); + String message = "Slot Number " + String(slotNumber); + printMessage(message); + Serial.print("Config Slots one and two: "); + printBits(slots[index]); + printBits(slots[index+1]); + slotNumber +=1; + /*From table 2-6*/ + byte writeConfigBits = slots[index] & 0b11110000; + byte writeKeyConfigBits = slots[index] & 0b00001111; + byte isSecretBit = slots[index + 1] & 0b10000000; + byte encryptedReadBit = slots[index + 1] & 0b01000000; + byte limitedUseBit = slots[index + 1] & 0b00100000; + byte noMacBit = slots[index + 1] & 0b00010000; + byte readKeyBits = slots[index + 1] & 0b00001111; + /*From table 2-12 */ + byte keyConfigX509id = slots[index+76] & 0b11000000; + byte keyConfigRFU = slots[index+76] & 0b00100000; + byte keyConfigPersistentDisable = slots[index+76] & 0b00010000; + byte keyConfigAuthKey = slots[index+76] & 0b00001111; + byte keyConfigReqAuth = slots[index+77] & 0b10000000; + byte keyConfigReqRandom = slots[index+77] & 0b01000000; + byte keyConfigLockable = slots[index+77] & 0b00100000; + byte keyConfigKeyType = slots[index+77] & 0b00011100; + byte keyConfigPubInfo = slots[index+77] & 0b00000010; + byte keyConfigPrivate = slots[index+77] & 0b00000001; + + Serial.printf("\nWriteConfig: %x \n",writeConfigBits); + Serial.println(""); + Serial.printf("WriteKey: %x \n",writeKeyConfigBits); + Serial.println(""); + + Serial.printf("0000 0000 *000 0000 IsSecretBit \n"); + if (isSecretBit == 0){ + Serial.printf("isSecretBit is not set, this slot should not hold any secret data \n"); + Serial.printf("Genkey and Sign commands will fail for ECC private keys \n"); + } + else { + Serial.printf("isSecretBit is set, the contents of this slot are secret \n"); + } + + Serial.println(""); + Serial.printf("0000 0000 0*00 0000 EncryptedReadBit \n"); + if (encryptedReadBit == 0){ + Serial.printf("Clear text Reads may be permitted \n"); + } + else { + Serial.printf("Reads from this slot will be encrypted according to the ReadKey bits.\n"); + Serial.printf("If this bit is set, so must be isSecretBit \n"); + } + Serial.println(""); + Serial.printf("0000 0000 000* 0000, NoMacBit \n"); + if (noMacBit == 0){ + Serial.printf("The key stored in this slot can be used by all commands. \n"); + } + else{ + Serial.println("the key stored in this slot cannot be used by the MAC command, and"); + Serial.println("Tempkeys generated using this slot cannot be used by the MAC command"); + } + Serial.println("");//spacing + //byte readKeyBits = slots[index + 1] & 0b00001111; + Serial.printf("0000 0000 0000 **** ReadKey control bits \n"); + if (readKeyBits == 0){ + Serial.println("This key can be used for the CheckMac copy operation. Do not set it"); + Serial.println("this way as a default, only when CheckMac copy operation is explictly desired"); + } + else Serial.println("This key cannot be used for the CheckMac copy operation"); + if ((readKeyBits & 0b00000001) == 0) {//bit 0 + Serial.println("External Signatures of arbitary messages are not enabled"); + } + else{ + Serial.println("External Signatures of arbitrary messages are enabled");//I'm not clear on what this means either + } + + if ((readKeyBits &0b00000010)== 0){ //bit 1 + Serial.println("Internal Signatures of Messages generated by GenDig or GenKey are not enabled"); + } + else{ + Serial.println("Internal Signatures of messages generated by GenDig or GenKey are enabled"); + } + if ((readKeyBits & 0b00000100) == 0){//bit 2 + Serial.println("ECDH operation is not permitted using this keyslot"); + } + else{ + Serial.println("ECDH operation is permitted using this keyslot"); + if ((readKeyBits & 0b00001000) == 0){ //bit 3 depends on bit 2 being 1, is ignored otherwise + Serial.println("ECDH master Secret may be output in the clear"); + } + else{ + Serial.println("the ECDH master Secret may be written into slot N|1"); + } + + } + index +=1; + printMessage("KeyConfig Section, table 2-12"); + Serial.printf("KeyConfig for slot %d: ", slotNumber); + printBits(slots[index+76]); + printBits(slots[index+77]); + Serial.println(""); + + if(keyConfigX509id ==0){ + Serial.println("This slot can contain something other than a public Key"); + } + else{ + Serial.print(keyConfigX509id); + Serial.println(" Points to the X509 forma32thet array in config slots 92-95"); + + } + Serial.println(""); + if(keyConfigRFU != 0){ + Serial.print(keyConfigRFU); + printBits(keyConfigRFU); + Serial.println("the RFU bit, bit 13 of the KeyConfig, must be 0"); + } + if(keyConfigPersistentDisable == 0){ + Serial.println("The use of this key does not depend on the state of the Persistent Latch"); + } + else{ + Serial.println("The use of this key is prohibited for all commands other than GenKey"); + } + + if(keyConfigReqAuth == 1){ + Serial.println("This key requires an authrization key to be set in KeyConfig AuthKey"); + } + + if((keyConfigAuthKey == 0) && (keyConfigReqAuth != 0)){ + Serial.println("ERROR: KeyConfig AuthKey must be set to zero if ReqAuth is zero"); + } + else if(keyConfigAuthKey == 0){ + Serial.println("This key does not require authorization"); + } + else{ + Serial.println("OPERATION REQUIRES TESTING"); + Serial.printf("the key %d is used to authenticate this key \n", keyConfigAuthKey); + } + if(keyConfigLockable == 0){ + Serial.println("The remaining bits, 0-4, control modification permission with bits in SlotConfig"); + } + else{ + Serial.println("this slot is lockable based on the coresponding slot in bytes 88 and 89 of the Config"); + printBits(slots[88]); + printBits(slots[89]); + } + int keyType = keyConfigKeyType >> 2; + if(keyType == 4){ + Serial.println("Key is a P256 NIST ECC Key, CheckMac, DeriveKey, MAC and AES commands will result in an error"); + } + + else if(keyType ==6){ + Serial.println("This key is an AES Key"); + } + else if (keyType ==7){ + Serial.println("This key is an SHA key or other data"); + } + else{ + Serial.println("This keytype is reserved for future use"); + } + if(keyConfigPrivate = 0){ + Serial.println("This keyslot does not contain an ECC private key and cannot be accessed by the"); + Serial.println("Sign, Genkey, or PrivWrite commands. It may contain an ECC public Key, a ShA key, an AES key, or data"); + if(keyConfigPubInfo = 0){ + Serial.println("the public key in this slot can be used by the Verify command without being validated"); + } + else{ + Serial.println("Table 2-12, bit 1, part 2, case 2, and written confusingly: "); + Serial.println("The public key in this slot can be used by the Verify Command only if the public key in the slot has been validated."); + Serial.println("When this slot is written for any reason, the most significant four bits of byte 0 of block 0 will be set to 0xA to"); + Serial.println("invalidate the slot. The Verify command can be used to write those bits to validate the slot"); + } + } + else{ + Serial.println("This keyslot contains an ECC private key and can be accessed only by the Sign, Genkey, and PrivWrite commands"); + if (keyConfigPubInfo == 0){ + Serial.println("The public version of this key can never be generated, this mode provides the highest security"); + } + else { + Serial.println("The public version of this key can be generated"); + } + + } + if ((keyType == 6)||(keyType == 7)){ + if (keyConfigPubInfo == 0){ + Serial.println("The KDF Command cannot write to this slot"); + } + else{ + Serial.println("The KDF command can write to this slot"); + } + } + } +} + +/* Function which reads and displays basic setup from the begining of the config + * parameters + * *data, the pointer to the variable holding the config data + */ +void generalInfo(byte *data){ + printMessage("Chip General Info"); + Serial.print("\nChip Serial Number: "); + for(int firstSerialSegment = 0; firstSerialSegment <4; firstSerialSegment ++){ + Serial.printf("%x ", data[firstSerialSegment]); + } + for(int secondSerialSegment = 8; secondSerialSegment <13; secondSerialSegment ++){ + Serial.printf("%x ", data[secondSerialSegment]); + } + Serial.printf("\nChip Revision Number: "); + for(int revNumIndex = 4; revNumIndex <8; revNumIndex++){ + Serial.printf("%x ", data[revNumIndex]); + } + int aesEnableBit = data[13] % 0b000000001; + if(aesEnableBit == 0){ + Serial.printf("\nAES byte: %x", data[13]); + Serial.printf("\nAES is not enabled on this chip. AES And KDF commands are going to fail"); + } + else{ + Serial.printf("\nAES is enabled on this chip"); + } + int i2cEnableBit = data[14] %0b00000001; + if(i2cEnableBit ==0){ + Serial.printf("\nSingle Wire Interface mode is active"); + } + else{ + Serial.printf("\nI2C Interface mode is active"); + Serial.printf("\n%x is the address of the chip", data[16]); + } + int CountMatchEnableBit = data[18] & 0b00000001; + if(CountMatchEnableBit == 0){ + Serial.printf("\n Counter match function is disabled"); + } + else{ + int CountMatchKey = data[18] &0b11110000; + Serial.printf("\nCounter match function is enabled using keyslot %d", CountMatchKey>4); + } +} + + +void setup() { + Serial.begin(9600); + while(!Serial); + + byte data[128]; + + + ECCX08.begin(); + printMessage("Displaying the Config File"); + printConfig(data); + + generalInfo(data); + printMessage("Config bits [20:51], the slot Config Section"); + printConfigSlots(data); + +} + +void loop() { + } diff --git a/examples/OpenSecrets/OpenSecrets.ino b/examples/OpenSecrets/OpenSecrets.ino new file mode 100644 index 0000000..5a8d4c9 --- /dev/null +++ b/examples/OpenSecrets/OpenSecrets.ino @@ -0,0 +1,98 @@ +//This file demonstrates the ECDH function of the ATECCX08 chips. + +#include + +void setup() { + // put your setup code here, to run once: + Serial.begin(9600); + while(!Serial); + Serial.print("\n+++++++++++++++++++++\n+++++++++++++++++++++\n+++++++++++++++++++++\n"); + ECCX08.begin(); + uint8_t senderKeySlot = 0b00000001; + uint8_t receiverKeySlot = 0b00000010; + uint16_t tempKeySlot = 0xFFFF; + byte receiverPublicKey[64]; + byte senderPublicKey[64]; + uint8_t zeroblock[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + uint8_t encrypted[16] ={1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; + int encryptReport; + + //dumbkey inits + byte setTempkey = 0x00; + //bool nonceOutputMode = 1; + //byte nonceRandomInitilaizer[20] = {0x08,0x07,0x06,0x05,0x04,0x03,0x02,0x01,0x00,0x00, +// 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; + + + + + //Generate Private Keys + //Do not run calls to: generatePrivateKey unless you want to reset your devices public keys! + + //int receiverPrivateKeyReport = ECCX08.generatePrivateKey(receiverKeySlot,receiverPublicKey); + //Serial.printf("\nslot %d Private Key generation report: %d",receiverKeySlot, receiverPrivateKeyReport); + + + //int senderPrivateKeyReport = ECCX08.generatePrivateKey(senderKeySlot,senderPublicKey); + //Serial.printf("\nslot %d Private Key generation report: %d", senderKeySlot, senderPrivateKeyReport); + //ECCX08.lock(); + + //Derive Public Keys + // int receiverPublicKeyReport = ECCX08.generatePublicKey(receiverKeySlot,receiverPublicKey);//report is 1 if it works. + // Serial.printf("\nReciever Public keyreport: %d", receiverPublicKeyReport); + + + + Serial.printf("\nReceiver Public Key: \n"); + for(int index = 0; index<64;index++){ + Serial.printf("%x ",receiverPublicKey[index]); + } + + + + //Make the Secret + int ecdhSuccess = ECCX08.ecdhKeyGen(0b00001000, senderKeySlot, receiverPublicKey);//in production, we will have to have already loaded the receiver's pulic key onto the device. + Serial.printf("\necdh success: %d",ecdhSuccess); + Serial.printf("\necdh slot: %d", senderKeySlot); + + + encryptReport = ECCX08.aesEncryptECB(tempKeySlot, zeroblock, encrypted); + Serial.printf("\nencryption report: %d",encryptReport); + Serial.printf("\nzeroblock encrypted with the first secret: "); + for(int index =0; index <16; index++){ + Serial.printf("%x ", encrypted[index]); + } + + Serial.printf("\nSleeping Chip\n"); + + + + Serial.printf("\n++++Second Secret++++"); + + int senderPublicKeyReport = ECCX08.generatePublicKey(senderKeySlot,senderPublicKey);//report is 1 if it works. + Serial.printf("\nSender Public Key report: %d", senderPublicKeyReport); + Serial.printf("\nSender Public Key: \n"); + for(int index = 0; index<64;index++){ + Serial.printf("%x ",senderPublicKey[index]); + } + + + int secondEcdhSuccess = ECCX08.ecdhKeyGen(0b00001000, receiverKeySlot, senderPublicKey); + Serial.printf("\nSecond ecdh success: %d", secondEcdhSuccess); + Serial.printf("\necdh slot: %d", receiverKeySlot); + + encryptReport = ECCX08.aesEncryptECB(tempKeySlot, zeroblock, encrypted); + Serial.printf("\nencryption report: %d",encryptReport); + Serial.printf("\nzeroblock encrypted with the second secret: "); + for(int index =0; index <16; index++){ + Serial.printf("%x ", encrypted[index]); + } + + + ECCX08.end(); +} + + +void loop() { + +} diff --git a/examples/TestAES/TestAES.ino b/examples/TestAES/TestAES.ino new file mode 100644 index 0000000..c63e331 --- /dev/null +++ b/examples/TestAES/TestAES.ino @@ -0,0 +1,175 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* +This example runs tests on the AES implementation to verify correct behaviour. +*/ + +#include +#include +#include + +struct TestVector +{ + const char *name; + byte key[32]; + byte plaintext[16]; + byte ciphertext[16]; +}; + +// Define the ECB test vectors from the FIPS specification. +static TestVector const testVectorAES128 = { + .name = "AES-128-ECB", + .key = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F}, + .plaintext = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF}, + .ciphertext = {0x69, 0xC4, 0xE0, 0xD8, 0x6A, 0x7B, 0x04, 0x30, + 0xD8, 0xCD, 0xB7, 0x80, 0x70, 0xB4, 0xC5, 0x5A} +}; +static TestVector const testVectorAES192 = { + .name = "AES-192-ECB", + .key = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17}, + .plaintext = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF}, + .ciphertext = {0xDD, 0xA9, 0x7C, 0xA4, 0x86, 0x4C, 0xDF, 0xE0, + 0x6E, 0xAF, 0x70, 0xA0, 0xEC, 0x0D, 0x71, 0x91} +}; +static TestVector const testVectorAES256 = { + .name = "AES-256-ECB", + .key = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F}, + .plaintext = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF}, + .ciphertext = {0x8E, 0xA2, 0xB7, 0xCA, 0x51, 0x67, 0x45, 0xBF, + 0xEA, 0xFC, 0x49, 0x90, 0x4B, 0x49, 0x60, 0x89} +}; + +AES128 aes128; +AES192 aes192; +AES256 aes256; + +byte buffer[16]; + +void testCipher(BlockCipher *cipher, const struct TestVector *test) +{ + crypto_feed_watchdog(); + Serial.print(test->name); + Serial.print(" Encryption ... "); + cipher->setKey(test->key, cipher->keySize()); + cipher->encryptBlock(buffer, test->plaintext); + if (memcmp(buffer, test->ciphertext, 16) == 0) + Serial.println("Passed"); + else + Serial.println("Failed"); + + Serial.print(test->name); + Serial.print(" Decryption ... "); + cipher->decryptBlock(buffer, test->ciphertext); + if (memcmp(buffer, test->plaintext, 16) == 0) + Serial.println("Passed"); + else + Serial.println("Failed"); +} + +void perfCipher(BlockCipher *cipher, const struct TestVector *test) +{ + unsigned long start; + unsigned long elapsed; + int count; + + crypto_feed_watchdog(); + + Serial.print(test->name); + Serial.print(" Set Key ... "); + start = micros(); + for (count = 0; count < 10000; ++count) { + cipher->setKey(test->key, cipher->keySize()); + } + elapsed = micros() - start; + Serial.print(elapsed / 10000.0); + Serial.print("us per operation, "); + Serial.print((10000.0 * 1000000.0) / elapsed); + Serial.println(" per second"); + + Serial.print(test->name); + Serial.print(" Encrypt ... "); + start = micros(); + for (count = 0; count < 5000; ++count) { + cipher->encryptBlock(buffer, buffer); + } + elapsed = micros() - start; + Serial.print(elapsed / (5000.0 * 16.0)); + Serial.print("us per byte, "); + Serial.print((16.0 * 5000.0 * 1000000.0) / elapsed); + Serial.println(" bytes per second"); + + Serial.print(test->name); + Serial.print(" Decrypt ... "); + start = micros(); + for (count = 0; count < 5000; ++count) { + cipher->decryptBlock(buffer, buffer); + } + elapsed = micros() - start; + Serial.print(elapsed / (5000.0 * 16.0)); + Serial.print("us per byte, "); + Serial.print((16.0 * 5000.0 * 1000000.0) / elapsed); + Serial.println(" bytes per second"); + + Serial.println(); +} + +void setup() +{ + Serial.begin(9600); + + Serial.println(); + + Serial.println("State Sizes:"); + Serial.print("AES128 ... "); + Serial.println(sizeof(AES128)); + Serial.print("AES192 ... "); + Serial.println(sizeof(AES192)); + Serial.print("AES256 ... "); + Serial.println(sizeof(AES256)); + Serial.println(); + + Serial.println("Test Vectors:"); + testCipher(&aes128, &testVectorAES128); + testCipher(&aes192, &testVectorAES192); + testCipher(&aes256, &testVectorAES256); + + Serial.println(); + + Serial.println("Performance Tests:"); + perfCipher(&aes128, &testVectorAES128); + perfCipher(&aes192, &testVectorAES192); + perfCipher(&aes256, &testVectorAES256); +} + +void loop() +{ +} diff --git a/examples/Tools/ConfigParser/ConfigParser.ino b/examples/Tools/ConfigParser/ConfigParser.ino new file mode 100644 index 0000000..e15ae7c --- /dev/null +++ b/examples/Tools/ConfigParser/ConfigParser.ino @@ -0,0 +1,299 @@ +/*ConfigParser + * A sketch that grabs the configuration of an ATECC608B chip and prints out the menu options + * held inside in a human readable format. + * INCOMPLETE. Currenlty only includes functionality for bytes 20-51, the SlotConfig elements + */ + +#include +#include +#include +#include +#include + +void printMessage(String message){ + Serial.printf("\n"); + Serial.println("+++++++++++++++++++++++++++++++++++++++++++++++"); + Serial.println(message); + Serial.println("+++++++++++++++++++++++++++++++++++++++++++++++"); +} + + +/* printBits A helperfunction to print the bits of a block from the config. + * parameters + * number(in) a single byte from the config. +*/ +void printBits(byte number){ + int test1 = (int)number; + int testOut; + int temp; + + temp = test1 & 0b10000000; + testOut = temp >> 7; + Serial.printf("%d" ,testOut); + + temp = test1 & 0b01000000; + testOut = temp >> 6; + Serial.printf("%d" ,testOut); + + temp = test1 & 0b00100000; + testOut = temp >> 5; + Serial.printf("%d" ,testOut); + + temp = test1 & 0b00010000; + testOut = temp >> 4; + Serial.printf("%d" ,testOut); + + temp = test1 & 0b00001000; + testOut = temp >> 3; + Serial.printf("%d" ,testOut); + + temp = test1 & 0b00000100; + testOut = temp >> 2; + Serial.printf("%d" ,testOut); + + temp = test1 & 0b00000010; + testOut = temp >> 1; + Serial.printf("%d" ,testOut); + + testOut = test1 & 0b00000001; + Serial.printf("%d ", testOut); +} + +void printConfig(byte *data){ + ECCX08.readConfiguration(data); + for (int index = 0; index<128; index++){ + if (index % 16 == 0 && index !=0){ //this block spaces out our bits into arrays. + Serial.println(""); + } + if (data[index] < 0x10){ + Serial.printf("0%x ", data[index]); + } + else{ + Serial.printf("%x ", data[index]); + } + } +} + +void printConfigSlots(byte *slots){ + /* Byte options, table 2-6 from ATECC608 Dataset + * **** 0000 0000 0000 WriteConfig Bits + * 0000 **** 0000 0000 WriteKey Bits + * 0000 0000 *000 0000 Is Secret Bit + * 0000 0000 0*00 0000 EncryptRead Bit + * 0000 0000 00*0 0000 LimitedUse Bit + * 0000 0000 000* 0000 NoMac Bit + * 0000 0000 0000 **** ReadKey Bits + * + */ + int slotNumber = 0; + for(int index = 20; index < 51; index++){//core loop of the Keyslot reader, I'm pretty sure there's only 16 slots, so the 17th slot (number 16) is wrong. + Serial.printf(" \n"); + String message = "Slot Number " + String(slotNumber); + printMessage(message); + Serial.print("Config Slots one and two: "); + printBits(slots[index]); + printBits(slots[index+1]); + slotNumber +=1; + /*From table 2-6*/ + byte writeConfigBits = slots[index] & 0b11110000; + byte writeKeyConfigBits = slots[index] & 0b00001111; + byte isSecretBit = slots[index + 1] & 0b10000000; + byte encryptedReadBit = slots[index + 1] & 0b01000000; + byte limitedUseBit = slots[index + 1] & 0b00100000; + byte noMacBit = slots[index + 1] & 0b00010000; + byte readKeyBits = slots[index + 1] & 0b00001111; + /*From table 2-12 */ + byte keyConfigX509id = slots[index+76] & 0b11000000; + byte keyConfigRFU = slots[index+76] & 0b00100000; + byte keyConfigPersistentDisable = slots[index+76] & 0b00010000; + byte keyConfigAuthKey = slots[index+76] & 0b00001111; + byte keyConfigReqAuth = slots[index+77] & 0b10000000; + byte keyConfigReqRandom = slots[index+77] & 0b01000000; + byte keyConfigLockable = slots[index+77] & 0b00100000; + byte keyConfigKeyType = slots[index+77] & 0b00011100; + byte keyConfigPubInfo = slots[index+77] & 0b00000010; + byte keyConfigPrivate = slots[index+77] & 0b00000001; + + Serial.printf("\nWriteConfig: %x \n",writeConfigBits); + Serial.println(""); + Serial.printf("WriteKey: %x \n",writeKeyConfigBits); + Serial.println(""); + + Serial.printf("0000 0000 *000 0000 IsSecretBit \n"); + if (isSecretBit == 0){ + Serial.printf("isSecretBit is not set, this slot should not hold any secret data \n"); + Serial.printf("Genkey and Sign commands will fail for ECC private keys \n"); + } + else { + Serial.printf("isSecretBit is set, the contents of this slot are secret \n"); + } + + Serial.println(""); + Serial.printf("0000 0000 0*00 0000 EncryptedReadBit \n"); + if (encryptedReadBit == 0){ + Serial.printf("Clear text Reads may be permitted \n"); + } + else { + Serial.printf("Reads from this slot will be encrypted according to the ReadKey bits.\n"); + Serial.printf("If this bit is set, so must be isSecretBit \n"); + } + Serial.println(""); + Serial.printf("0000 0000 000* 0000, NoMacBit \n"); + if (noMacBit == 0){ + Serial.printf("The key stored in this slot can be used by all commands. \n"); + } + else{ + Serial.println("the key stored in this slot cannot be used by the MAC command, and"); + Serial.println("Tempkeys generated using this slot cannot be used by the MAC command"); + } + Serial.println("");//spacing + //byte readKeyBits = slots[index + 1] & 0b00001111; + Serial.printf("0000 0000 0000 **** ReadKey control bits \n"); + if (readKeyBits == 0){ + Serial.println("This key can be used for the CheckMac copy operation. Do not set it"); + Serial.println("this way as a default, only when CheckMac copy operation is explictly desired"); + } + else Serial.println("This key cannot be used for the CheckMac copy operation"); + if ((readKeyBits & 0b00000001) == 0) {//bit 0 + Serial.println("External Signatures of arbitary messages are not enabled"); + } + else{ + Serial.println("External Signatures of arbitrary messages are enabled");//I'm not clear on what this means either + } + + if ((readKeyBits &0b00000010)== 0){ //bit 1 + Serial.println("Internal Signatures of Messages generated by GenDig or GenKey are not enabled"); + } + else{ + Serial.println("Internal Signatures of messages generated by GenDig or GenKey are enabled"); + } + if ((readKeyBits & 0b00000100) == 0){//bit 2 + Serial.println("ECDH operation is not permitted using this keyslot"); + } + else{ + Serial.println("ECDH operation is permitted using this keyslot"); + if ((readKeyBits & 0b00001000) == 0){ //bit 3 depends on bit 2 being 1, is ignored otherwise + Serial.println("ECDH master Secret may be output in the clear"); + } + else{ + Serial.println("the ECDH master Secret may be written into slot N|1"); + } + + } + index +=1; + printMessage("KeyConfig Section, table 2-12"); + Serial.printf("KeyConfig for slot %d: ", slotNumber); + printBits(slots[index+76]); + printBits(slots[index+77]); + Serial.println(""); + + if(keyConfigX509id ==0){ + Serial.println("This slot can contain something other than a public Key"); + } + else{ + Serial.print(keyConfigX509id); + Serial.println(" Points to the X509 forma32thet array in config slots 92-95"); + + } + Serial.println(""); + if(keyConfigRFU != 0){ + Serial.print(keyConfigRFU); + printBits(keyConfigRFU); + Serial.println("the RFU bit, bit 13 of the KeyConfig, must be 0"); + } + if(keyConfigPersistentDisable == 0){ + Serial.println("The use of this key does not depend on the state of the Persistent Latch"); + } + else{ + Serial.println("The use of this key is prohibited for all commands other than GenKey"); + } + + if(keyConfigReqAuth == 1){ + Serial.println("This key requires an authrization key to be set in KeyConfig AuthKey"); + } + + if((keyConfigAuthKey == 0) && (keyConfigReqAuth != 0)){ + Serial.println("ERROR: KeyConfig AuthKey must be set to zero if ReqAuth is zero"); + } + else if(keyConfigAuthKey == 0){ + Serial.println("This key does not require authorization"); + } + else{ + Serial.println("OPERATION REQUIRES TESTING"); + Serial.printf("the key %d is used to authenticate this key \n", keyConfigAuthKey); + } + if(keyConfigLockable == 0){ + Serial.println("The remaining bits, 0-4, control modification permission with bits in SlotConfig"); + } + else{ + Serial.println("this slot is lockable based on the coresponding slot in bytes 88 and 89 of the Config"); + printBits(slots[88]); + printBits(slots[89]); + } + int keyType = keyConfigKeyType >> 2; + if(keyType == 4){ + Serial.println("Key is a P256 NIST ECC Key, CheckMac, DeriveKey, MAC and AES commands will result in an error"); + } + + else if(keyType ==6){ + Serial.println("This key is an AES Key"); + } + else if (keyType ==7){ + Serial.println("This key is an SHA key or other data"); + } + else{ + Serial.println("This keytype is reserved for future use"); + } + if(keyConfigPrivate = 0){ + Serial.println("This keyslot does not contain an ECC private key and cannot be accessed by the"); + Serial.println("Sign, Genkey, or PrivWrite commands. It may contain an ECC public Key, a ShA key, an AES key, or data"); + if(keyConfigPubInfo = 0){ + Serial.println("the public key in this slot can be used by the Verify command without being validated"); + } + else{ + Serial.println("Table 2-12, bit 1, part 2, case 2, and written confusingly: "); + Serial.println("The public key in this slot can be used by the Verify Command only if the public key in the slot has been validated."); + Serial.println("When this slot is written for any reason, the most significant four bits of byte 0 of block 0 will be set to 0xA to"); + Serial.println("invalidate the slot. The Verify command can be used to write those bits to validate the slot"); + } + } + else{ + Serial.println("This keyslot contains an ECC private key and can be accessed only by the Sign, Genkey, and PrivWrite commands"); + if (keyConfigPubInfo == 0){ + Serial.println("The public version of this key can never be generated, this mode provides the highest security"); + } + else { + Serial.println("The public version of this key can be generated"); + } + + } + if ((keyType == 6)||(keyType == 7)){ + if (keyConfigPubInfo == 0){ + Serial.println("The KDF Command cannot write to this slot"); + } + else{ + Serial.println("The KDF command can write to this slot"); + } + } + } +} + + + +void setup() { + Serial.begin(9600); + while(!Serial); + + byte data[128]; + + + ECCX08.begin(); + printMessage("Displaying the Config File"); + printConfig(data); + printMessage("Config bits [20:51], the slot Config Section"); + printConfigSlots(data); + +} + +void loop() { + } diff --git a/src/.DS_Store b/src/.DS_Store new file mode 100644 index 0000000..6771775 Binary files /dev/null and b/src/.DS_Store differ diff --git a/src/AES.h b/src/AES.h new file mode 100644 index 0000000..b5c7b75 --- /dev/null +++ b/src/AES.h @@ -0,0 +1,268 @@ +/* + * Copyright (C) 2015,2018 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_AES_h +#define CRYPTO_AES_h + +#include "BlockCipher.h" + +// Determine which AES implementation to export to applications. +#if defined(ESP32) +#define CRYPTO_AES_ESP32 1 +#else +#define CRYPTO_AES_DEFAULT 1 +#endif + +#if defined(CRYPTO_AES_DEFAULT) || defined(CRYPTO_DOC) + +class AESTiny128; +class AESTiny256; +class AESSmall128; +class AESSmall256; + +class AESCommon : public BlockCipher +{ +public: + virtual ~AESCommon(); + + size_t blockSize() const; + + void encryptBlock(uint8_t *output, const uint8_t *input); + void decryptBlock(uint8_t *output, const uint8_t *input); + + void encryptBlockWithSlot(int slot, uint8_t *output, const uint8_t *input); + void decryptBlockWithSlot(int slot, uint8_t *output, const uint8_t *input); + + void clear(); + +protected: + AESCommon(); + + /** @cond aes_internal */ + uint8_t rounds; + uint8_t *schedule; + + static void subBytesAndShiftRows(uint8_t *output, const uint8_t *input); + static void inverseShiftRowsAndSubBytes(uint8_t *output, const uint8_t *input); + static void mixColumn(uint8_t *output, uint8_t *input); + static void inverseMixColumn(uint8_t *output, const uint8_t *input); + static void keyScheduleCore(uint8_t *output, const uint8_t *input, uint8_t iteration); + static void applySbox(uint8_t *output, const uint8_t *input); + /** @endcond */ + + friend class AESTiny128; + friend class AESTiny256; + friend class AESSmall128; + friend class AESSmall256; +}; + +class AES128 : public AESCommon +{ +public: + AES128(); + virtual ~AES128(); + + size_t keySize() const; + + bool setKey(const uint8_t *key, size_t len); + +private: + uint8_t sched[176]; +}; + +class AES192 : public AESCommon +{ +public: + AES192(); + virtual ~AES192(); + + size_t keySize() const; + + bool setKey(const uint8_t *key, size_t len); + +private: + uint8_t sched[208]; +}; + +class AES256 : public AESCommon +{ +public: + AES256(); + virtual ~AES256(); + + size_t keySize() const; + + bool setKey(const uint8_t *key, size_t len); + +private: + uint8_t sched[240]; +}; + +class AESTiny256 : public BlockCipher +{ +public: + AESTiny256(); + virtual ~AESTiny256(); + + size_t blockSize() const; + size_t keySize() const; + + bool setKey(const uint8_t *key, size_t len); + + void encryptBlock(uint8_t *output, const uint8_t *input); + void decryptBlock(uint8_t *output, const uint8_t *input); + + void encryptBlockWithSlot(int slot, uint8_t *output, const uint8_t *input); + void decryptBlockWithSlot(int slot, uint8_t *output, const uint8_t *input); + + void clear(); + +private: + uint8_t schedule[32]; +}; + +class AESSmall256 : public AESTiny256 +{ +public: + AESSmall256(); + virtual ~AESSmall256(); + + bool setKey(const uint8_t *key, size_t len); + + void decryptBlock(uint8_t *output, const uint8_t *input); + + void clear(); + +private: + uint8_t reverse[32]; +}; + +class AESTiny128 : public BlockCipher +{ +public: + AESTiny128(); + virtual ~AESTiny128(); + + size_t blockSize() const; + size_t keySize() const; + + bool setKey(const uint8_t *key, size_t len); + + void encryptBlock(uint8_t *output, const uint8_t *input); + void decryptBlock(uint8_t *output, const uint8_t *input); + + void encryptBlockWithSlot(int slot, uint8_t *output, const uint8_t *input); + void decryptBlockWithSlot(int slot, uint8_t *output, const uint8_t *input); + + void clear(); + +private: + uint8_t schedule[16]; +}; + +class AESSmall128 : public AESTiny128 +{ +public: + AESSmall128(); + virtual ~AESSmall128(); + + bool setKey(const uint8_t *key, size_t len); + + void decryptBlock(uint8_t *output, const uint8_t *input); + + void clear(); + +private: + uint8_t reverse[16]; +}; + +#endif // CRYPTO_AES_DEFAULT + +#if defined(CRYPTO_AES_ESP32) + +// "hwcrypto/aes.h" includes "rom/aes.h" which defines global enums for +// AES128, AES192, and AES256. The enum definitions interfere with the +// definition of the same-named classes below. The #define's and #undef's +// here work around the problem by defining the enums to different names. +#define AES128 AES128_enum +#define AES192 AES192_enum +#define AES256 AES256_enum +#include "hwcrypto/aes.h" +#undef AES128 +#undef AES192 +#undef AES256 + +class AESCommon : public BlockCipher +{ +public: + virtual ~AESCommon(); + + size_t blockSize() const; + size_t keySize() const; + + bool setKey(const uint8_t *key, size_t len); + + void encryptBlock(uint8_t *output, const uint8_t *input); + void decryptBlock(uint8_t *output, const uint8_t *input); + + void encryptBlockWithSlot(int slot, uint8_t *output, const uint8_t *input); + void decryptBlockWithSlot(int slot, uint8_t *output, const uint8_t *input); + + void clear(); + +protected: + AESCommon(uint8_t keySize); + +private: + esp_aes_context ctx; +}; + +class AES128 : public AESCommon +{ +public: + AES128() : AESCommon(16) {} + virtual ~AES128(); +}; + +class AES192 : public AESCommon +{ +public: + AES192() : AESCommon(24) {} + virtual ~AES192(); +}; + +class AES256 : public AESCommon +{ +public: + AES256() : AESCommon(32) {} + virtual ~AES256(); +}; + +// The ESP32 AES context is so small that it already qualifies as "tiny". +typedef AES128 AESTiny128; +typedef AES256 AESTiny256; +typedef AES128 AESSmall128; +typedef AES256 AESSmall256; + +#endif // CRYPTO_AES_ESP32 + +#endif diff --git a/src/AES128.cpp b/src/AES128.cpp new file mode 100644 index 0000000..533e193 --- /dev/null +++ b/src/AES128.cpp @@ -0,0 +1,356 @@ +/* + * Copyright (C) 2015,2018 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "AES.h" +#include "Crypto.h" +#include + +#if defined(CRYPTO_AES_DEFAULT) || defined(CRYPTO_DOC) + +/** + * \class AES128 AES.h + * \brief AES block cipher with 128-bit keys. + * + * \sa AES192, AES256, AESTiny128, AESSmall128 + */ + +/** + * \brief Constructs an AES 128-bit block cipher with no initial key. + * + * This constructor must be followed by a call to setKey() before the + * block cipher can be used for encryption or decryption. + */ +AES128::AES128() +{ + rounds = 10; + schedule = sched; +} + +AES128::~AES128() +{ + clean(sched); +} + +/** + * \brief Size of a 128-bit AES key in bytes. + * \return Always returns 16. + */ +size_t AES128::keySize() const +{ + return 16; +} + +bool AES128::setKey(const uint8_t *key, size_t len) +{ + if (len != 16) + return false; + + // Copy the key itself into the first 16 bytes of the schedule. + uint8_t *schedule = sched; + memcpy(schedule, key, 16); + + // Expand the key schedule until we have 176 bytes of expanded key. + uint8_t iteration = 1; + uint8_t n = 16; + uint8_t w = 4; + while (n < 176) { + if (w == 4) { + // Every 16 bytes (4 words) we need to apply the key schedule core. + keyScheduleCore(schedule + 16, schedule + 12, iteration); + schedule[16] ^= schedule[0]; + schedule[17] ^= schedule[1]; + schedule[18] ^= schedule[2]; + schedule[19] ^= schedule[3]; + ++iteration; + w = 0; + } else { + // Otherwise just XOR the word with the one 16 bytes previous. + schedule[16] = schedule[12] ^ schedule[0]; + schedule[17] = schedule[13] ^ schedule[1]; + schedule[18] = schedule[14] ^ schedule[2]; + schedule[19] = schedule[15] ^ schedule[3]; + } + + // Advance to the next word in the schedule. + schedule += 4; + n += 4; + ++w; + } + + return true; +} + +/** + * \class AESTiny128 AES.h + * \brief AES block cipher with 128-bit keys and tiny memory usage. + * + * This class differs from the AES128 class in the following ways: + * + * \li RAM requirements are vastly reduced. The key is stored directly + * and then expanded to the full key schedule round by round. The setKey() + * method is very fast because of this. + * \li Performance of encryptBlock() is slower than for AES128 due to + * expanding the key on the fly rather than ahead of time. + * \li The decryptBlock() function is not supported, which means that CBC + * mode cannot be used but the CTR, CFB, OFB, EAX, and GCM modes can be used. + * + * This class is useful when RAM is at a premium, CBC mode is not required, + * and reduced encryption performance is not a hindrance to the application. + * + * The companion AESSmall128 class supports decryptBlock() at the cost of + * some additional memory and slower setKey() times. + * + * \sa AESSmall128, AES128 + */ + +/** @cond */ + +// Helper macros. +#define KCORE(n) \ + do { \ + AESCommon::keyScheduleCore(temp, schedule + 12, (n)); \ + schedule[0] ^= temp[0]; \ + schedule[1] ^= temp[1]; \ + schedule[2] ^= temp[2]; \ + schedule[3] ^= temp[3]; \ + } while (0) +#define KXOR(a, b) \ + do { \ + schedule[(a) * 4] ^= schedule[(b) * 4]; \ + schedule[(a) * 4 + 1] ^= schedule[(b) * 4 + 1]; \ + schedule[(a) * 4 + 2] ^= schedule[(b) * 4 + 2]; \ + schedule[(a) * 4 + 3] ^= schedule[(b) * 4 + 3]; \ + } while (0) + +/** @endcond */ + +/** + * \brief Constructs an AES 128-bit block cipher with no initial key. + * + * This constructor must be followed by a call to setKey() before the + * block cipher can be used for encryption or decryption. + */ +AESTiny128::AESTiny128() +{ +} + +AESTiny128::~AESTiny128() +{ + clean(schedule); +} + +/** + * \brief Size of an AES block in bytes. + * \return Always returns 16. + */ +size_t AESTiny128::blockSize() const +{ + return 16; +} + +/** + * \brief Size of a 128-bit AES key in bytes. + * \return Always returns 16. + */ +size_t AESTiny128::keySize() const +{ + return 16; +} + +bool AESTiny128::setKey(const uint8_t *key, size_t len) +{ + if (len == 16) { + // Make a copy of the key - it will be expanded in encryptBlock(). + memcpy(schedule, key, 16); + return true; + } + return false; +} + +void AESTiny128::encryptBlock(uint8_t *output, const uint8_t *input) +{ + uint8_t schedule[16]; + uint8_t posn; + uint8_t round; + uint8_t state1[16]; + uint8_t state2[16]; + uint8_t temp[4]; + + // Start with the key in the schedule buffer. + memcpy(schedule, this->schedule, 16); + + // Copy the input into the state and XOR with the key schedule. + for (posn = 0; posn < 16; ++posn) + state1[posn] = input[posn] ^ schedule[posn]; + + // Perform the first 9 rounds of the cipher. + for (round = 1; round <= 9; ++round) { + // Expand the next 16 bytes of the key schedule. + KCORE(round); + KXOR(1, 0); + KXOR(2, 1); + KXOR(3, 2); + + // Encrypt using the key schedule. + AESCommon::subBytesAndShiftRows(state2, state1); + AESCommon::mixColumn(state1, state2); + AESCommon::mixColumn(state1 + 4, state2 + 4); + AESCommon::mixColumn(state1 + 8, state2 + 8); + AESCommon::mixColumn(state1 + 12, state2 + 12); + for (posn = 0; posn < 16; ++posn) + state1[posn] ^= schedule[posn]; + } + + // Expand the final 16 bytes of the key schedule. + KCORE(10); + KXOR(1, 0); + KXOR(2, 1); + KXOR(3, 2); + + // Perform the final round. + AESCommon::subBytesAndShiftRows(state2, state1); + for (posn = 0; posn < 16; ++posn) + output[posn] = state2[posn] ^ schedule[posn]; +} + +void AESTiny128::decryptBlock(uint8_t *output, const uint8_t *input) +{ + // Decryption is not supported by AESTiny128. +} + +void AESTiny128::clear() +{ + clean(schedule); +} + +/** + * \class AESSmall128 AES.h + * \brief AES block cipher with 128-bit keys and reduced memory usage. + * + * This class differs from the AES128 class in that the RAM requirements are + * vastly reduced. The key schedule is expanded round by round instead of + * being generated and stored by setKey(). The performance of encryption + * and decryption is slightly less because of this. + * + * This class is useful when RAM is at a premium and reduced encryption + * performance is not a hindrance to the application. + * + * The companion AESTiny128 class uses even less RAM but only supports the + * encryptBlock() operation. Block cipher modes like CTR, EAX, and GCM + * do not need the decryptBlock() operation, so AESTiny128 may be a better + * option than AESSmall128 for many applications. + * + * \sa AESTiny128, AES128 + */ + +/** + * \brief Constructs an AES 128-bit block cipher with no initial key. + * + * This constructor must be followed by a call to setKey() before the + * block cipher can be used for encryption or decryption. + */ +AESSmall128::AESSmall128() +{ +} + +AESSmall128::~AESSmall128() +{ + clean(reverse); +} + +bool AESSmall128::setKey(const uint8_t *key, size_t len) +{ + uint8_t *schedule; + uint8_t round; + uint8_t temp[4]; + + // Set the encryption key first. + if (!AESTiny128::setKey(key, len)) + return false; + + // Expand the key schedule up to the last round which gives + // us the round keys to use for the final two rounds. We can + // then work backwards from there in decryptBlock(). + schedule = reverse; + memcpy(schedule, key, 16); + for (round = 1; round <= 10; ++round) { + KCORE(round); + KXOR(1, 0); + KXOR(2, 1); + KXOR(3, 2); + } + + // Key is ready to go. + return true; +} + +void AESSmall128::decryptBlock(uint8_t *output, const uint8_t *input) +{ + uint8_t schedule[16]; + uint8_t round; + uint8_t posn; + uint8_t state1[16]; + uint8_t state2[16]; + uint8_t temp[4]; + + // Start with the end of the decryption schedule. + memcpy(schedule, reverse, 16); + + // Copy the input into the state and reverse the final round. + for (posn = 0; posn < 16; ++posn) + state1[posn] = input[posn] ^ schedule[posn]; + AESCommon::inverseShiftRowsAndSubBytes(state2, state1); + KXOR(3, 2); + KXOR(2, 1); + KXOR(1, 0); + KCORE(10); + + // Perform the next 9 rounds of the decryption process. + for (round = 9; round >= 1; --round) { + // Decrypt using the key schedule. + for (posn = 0; posn < 16; ++posn) + state2[posn] ^= schedule[posn]; + AESCommon::inverseMixColumn(state1, state2); + AESCommon::inverseMixColumn(state1 + 4, state2 + 4); + AESCommon::inverseMixColumn(state1 + 8, state2 + 8); + AESCommon::inverseMixColumn(state1 + 12, state2 + 12); + AESCommon::inverseShiftRowsAndSubBytes(state2, state1); + + // Expand the next 16 bytes of the key schedule in reverse. + KXOR(3, 2); + KXOR(2, 1); + KXOR(1, 0); + KCORE(round); + } + + // Reverse the initial round and create the output words. + for (posn = 0; posn < 16; ++posn) + output[posn] = state2[posn] ^ schedule[posn]; +} + +void AESSmall128::clear() +{ + clean(reverse); + AESTiny128::clear(); +} + +#endif // CRYPTO_AES_DEFAULT diff --git a/src/AES192.cpp b/src/AES192.cpp new file mode 100644 index 0000000..57055d3 --- /dev/null +++ b/src/AES192.cpp @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "AES.h" +#include "Crypto.h" +#include + +#if defined(CRYPTO_AES_DEFAULT) || defined(CRYPTO_DOC) + +/** + * \class AES192 AES.h + * \brief AES block cipher with 192-bit keys. + * + * \sa AES128, AES256 + */ + +/** + * \brief Constructs an AES 192-bit block cipher with no initial key. + * + * This constructor must be followed by a call to setKey() before the + * block cipher can be used for encryption or decryption. + */ +AES192::AES192() +{ + rounds = 12; + schedule = sched; +} + +AES192::~AES192() +{ + clean(sched); +} + +/** + * \brief Size of a 192-bit AES key in bytes. + * \return Always returns 24. + */ +size_t AES192::keySize() const +{ + return 24; +} + +bool AES192::setKey(const uint8_t *key, size_t len) +{ + if (len != 24) + return false; + + // Copy the key itself into the first 24 bytes of the schedule. + uint8_t *schedule = sched; + memcpy(schedule, key, 24); + + // Expand the key schedule until we have 208 bytes of expanded key. + uint8_t iteration = 1; + uint8_t n = 24; + uint8_t w = 6; + while (n < 208) { + if (w == 6) { + // Every 24 bytes (6 words) we need to apply the key schedule core. + keyScheduleCore(schedule + 24, schedule + 20, iteration); + schedule[24] ^= schedule[0]; + schedule[25] ^= schedule[1]; + schedule[26] ^= schedule[2]; + schedule[27] ^= schedule[3]; + ++iteration; + w = 0; + } else { + // Otherwise just XOR the word with the one 24 bytes previous. + schedule[24] = schedule[20] ^ schedule[0]; + schedule[25] = schedule[21] ^ schedule[1]; + schedule[26] = schedule[22] ^ schedule[2]; + schedule[27] = schedule[23] ^ schedule[3]; + } + + // Advance to the next word in the schedule. + schedule += 4; + n += 4; + ++w; + } + + return true; +} + +#endif // CRYPTO_AES_DEFAULT diff --git a/src/AES256.cpp b/src/AES256.cpp new file mode 100644 index 0000000..1540d8f --- /dev/null +++ b/src/AES256.cpp @@ -0,0 +1,401 @@ +/* + * Copyright (C) 2015,2018 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "AES.h" +#include "Crypto.h" +#include + +#if defined(CRYPTO_AES_DEFAULT) || defined(CRYPTO_DOC) + +/** + * \class AES256 AES.h + * \brief AES block cipher with 256-bit keys. + * + * \sa AES128, AES192, AESTiny256, AESSmall256 + */ + +/** + * \brief Constructs an AES 256-bit block cipher with no initial key. + * + * This constructor must be followed by a call to setKey() before the + * block cipher can be used for encryption or decryption. + */ +AES256::AES256() +{ + rounds = 14; + schedule = sched; +} + +AES256::~AES256() +{ + clean(sched); +} + +/** + * \brief Size of a 256-bit AES key in bytes. + * \return Always returns 32. + */ +size_t AES256::keySize() const +{ + return 32; +} + +bool AES256::setKey(const uint8_t *key, size_t len) +{ + if (len != 32) + return false; + + // Copy the key itself into the first 32 bytes of the schedule. + uint8_t *schedule = sched; + memcpy(schedule, key, 32); + + // Expand the key schedule until we have 240 bytes of expanded key. + uint8_t iteration = 1; + uint8_t n = 32; + uint8_t w = 8; + while (n < 240) { + if (w == 8) { + // Every 32 bytes (8 words) we need to apply the key schedule core. + keyScheduleCore(schedule + 32, schedule + 28, iteration); + schedule[32] ^= schedule[0]; + schedule[33] ^= schedule[1]; + schedule[34] ^= schedule[2]; + schedule[35] ^= schedule[3]; + ++iteration; + w = 0; + } else if (w == 4) { + // At the 16 byte mark we need to apply the S-box. + applySbox(schedule + 32, schedule + 28); + schedule[32] ^= schedule[0]; + schedule[33] ^= schedule[1]; + schedule[34] ^= schedule[2]; + schedule[35] ^= schedule[3]; + } else { + // Otherwise just XOR the word with the one 32 bytes previous. + schedule[32] = schedule[28] ^ schedule[0]; + schedule[33] = schedule[29] ^ schedule[1]; + schedule[34] = schedule[30] ^ schedule[2]; + schedule[35] = schedule[31] ^ schedule[3]; + } + + // Advance to the next word in the schedule. + schedule += 4; + n += 4; + ++w; + } + + return true; +} + +/** + * \class AESTiny256 AES.h + * \brief AES block cipher with 256-bit keys and tiny memory usage. + * + * This class differs from the AES256 class in the following ways: + * + * \li RAM requirements are vastly reduced. The key is stored directly + * and then expanded to the full key schedule round by round. The setKey() + * method is very fast because of this. + * \li Performance of encryptBlock() is slower than for AES256 due to + * expanding the key on the fly rather than ahead of time. + * \li The decryptBlock() function is not supported, which means that CBC + * mode cannot be used but the CTR, CFB, OFB, EAX, and GCM modes can be used. + * + * This class is useful when RAM is at a premium, CBC mode is not required, + * and reduced encryption performance is not a hindrance to the application. + * + * The companion AESSmall256 class supports decryptBlock() at the cost of + * some additional memory and slower setKey() times. + * + * \sa AESSmall256, AES256 + */ + +/** @cond */ + +// Helper macros. +#define LEFT 0 +#define RIGHT 16 +#define ENCRYPT(phase) \ + do { \ + AESCommon::subBytesAndShiftRows(state2, state1); \ + AESCommon::mixColumn(state1, state2); \ + AESCommon::mixColumn(state1 + 4, state2 + 4); \ + AESCommon::mixColumn(state1 + 8, state2 + 8); \ + AESCommon::mixColumn(state1 + 12, state2 + 12); \ + for (posn = 0; posn < 16; ++posn) \ + state1[posn] ^= schedule[posn + (phase)]; \ + } while (0) +#define DECRYPT(phase) \ + do { \ + for (posn = 0; posn < 16; ++posn) \ + state2[posn] ^= schedule[posn + (phase)]; \ + AESCommon::inverseMixColumn(state1, state2); \ + AESCommon::inverseMixColumn(state1 + 4, state2 + 4); \ + AESCommon::inverseMixColumn(state1 + 8, state2 + 8); \ + AESCommon::inverseMixColumn(state1 + 12, state2 + 12); \ + AESCommon::inverseShiftRowsAndSubBytes(state2, state1); \ + } while (0) +#define KCORE(n) \ + do { \ + AESCommon::keyScheduleCore(temp, schedule + 28, (n)); \ + schedule[0] ^= temp[0]; \ + schedule[1] ^= temp[1]; \ + schedule[2] ^= temp[2]; \ + schedule[3] ^= temp[3]; \ + } while (0) +#define KXOR(a, b) \ + do { \ + schedule[(a) * 4] ^= schedule[(b) * 4]; \ + schedule[(a) * 4 + 1] ^= schedule[(b) * 4 + 1]; \ + schedule[(a) * 4 + 2] ^= schedule[(b) * 4 + 2]; \ + schedule[(a) * 4 + 3] ^= schedule[(b) * 4 + 3]; \ + } while (0) +#define KSBOX() \ + do { \ + AESCommon::applySbox(temp, schedule + 12); \ + schedule[16] ^= temp[0]; \ + schedule[17] ^= temp[1]; \ + schedule[18] ^= temp[2]; \ + schedule[19] ^= temp[3]; \ + } while (0) + +/** @endcond */ + +/** + * \brief Constructs an AES 256-bit block cipher with no initial key. + * + * This constructor must be followed by a call to setKey() before the + * block cipher can be used for encryption or decryption. + */ +AESTiny256::AESTiny256() +{ +} + +AESTiny256::~AESTiny256() +{ + clean(schedule); +} + +/** + * \brief Size of an AES block in bytes. + * \return Always returns 16. + */ +size_t AESTiny256::blockSize() const +{ + return 16; +} + +/** + * \brief Size of a 256-bit AES key in bytes. + * \return Always returns 32. + */ +size_t AESTiny256::keySize() const +{ + return 32; +} + +bool AESTiny256::setKey(const uint8_t *key, size_t len) +{ + if (len == 32) { + // Make a copy of the key - it will be expanded in encryptBlock(). + memcpy(schedule, key, 32); + return true; + } + return false; +} + +void AESTiny256::encryptBlock(uint8_t *output, const uint8_t *input) +{ + uint8_t schedule[32]; + uint8_t posn; + uint8_t round; + uint8_t state1[16]; + uint8_t state2[16]; + uint8_t temp[4]; + + // Start with the key in the schedule buffer. + memcpy(schedule, this->schedule, 32); + + // Copy the input into the state and perform the first round. + for (posn = 0; posn < 16; ++posn) + state1[posn] = input[posn] ^ schedule[posn]; + ENCRYPT(RIGHT); + + // Perform the next 12 rounds of the cipher two at a time. + for (round = 1; round <= 6; ++round) { + // Expand the next 32 bytes of the key schedule. + KCORE(round); + KXOR(1, 0); + KXOR(2, 1); + KXOR(3, 2); + KSBOX(); + KXOR(5, 4); + KXOR(6, 5); + KXOR(7, 6); + + // Encrypt using the left and right halves of the key schedule. + ENCRYPT(LEFT); + ENCRYPT(RIGHT); + } + + // Expand the final 16 bytes of the key schedule. + KCORE(7); + KXOR(1, 0); + KXOR(2, 1); + KXOR(3, 2); + + // Perform the final round. + AESCommon::subBytesAndShiftRows(state2, state1); + for (posn = 0; posn < 16; ++posn) + output[posn] = state2[posn] ^ schedule[posn]; +} + +void AESTiny256::decryptBlock(uint8_t *output, const uint8_t *input) +{ + // Decryption is not supported by AESTiny256. +} + +void AESTiny256::clear() +{ + clean(schedule); +} + +/** + * \class AESSmall256 AES.h + * \brief AES block cipher with 256-bit keys and reduced memory usage. + * + * This class differs from the AES256 class in that the RAM requirements are + * vastly reduced. The key schedule is expanded round by round instead of + * being generated and stored by setKey(). The performance of encryption + * and decryption is slightly less because of this. + * + * This class is useful when RAM is at a premium and reduced encryption + * performance is not a hindrance to the application. + * + * The companion AESTiny256 class uses even less RAM but only supports the + * encryptBlock() operation. Block cipher modes like CTR, EAX, and GCM + * do not need the decryptBlock() operation, so AESTiny256 may be a better + * option than AESSmall256 for many applications. + * + * \sa AESTiny256, AES256 + */ + +/** + * \brief Constructs an AES 256-bit block cipher with no initial key. + * + * This constructor must be followed by a call to setKey() before the + * block cipher can be used for encryption or decryption. + */ +AESSmall256::AESSmall256() +{ +} + +AESSmall256::~AESSmall256() +{ + clean(reverse); +} + +bool AESSmall256::setKey(const uint8_t *key, size_t len) +{ + uint8_t *schedule; + uint8_t round; + uint8_t temp[4]; + + // Set the encryption key first. + if (!AESTiny256::setKey(key, len)) + return false; + + // Expand the key schedule up to the last round which gives + // us the round keys to use for the final two rounds. We can + // then work backwards from there in decryptBlock(). + schedule = reverse; + memcpy(schedule, key, 32); + for (round = 1; round <= 6; ++round) { + KCORE(round); + KXOR(1, 0); + KXOR(2, 1); + KXOR(3, 2); + KSBOX(); + KXOR(5, 4); + KXOR(6, 5); + KXOR(7, 6); + } + KCORE(7); + KXOR(1, 0); + KXOR(2, 1); + KXOR(3, 2); + + // Key is ready to go. + return true; +} + +void AESSmall256::decryptBlock(uint8_t *output, const uint8_t *input) +{ + uint8_t schedule[32]; + uint8_t round; + uint8_t posn; + uint8_t state1[16]; + uint8_t state2[16]; + uint8_t temp[4]; + + // Start with the end of the decryption schedule. + memcpy(schedule, reverse, 32); + + // Copy the input into the state and reverse the final round. + for (posn = 0; posn < 16; ++posn) + state1[posn] = input[posn] ^ schedule[posn]; + AESCommon::inverseShiftRowsAndSubBytes(state2, state1); + KXOR(3, 2); + KXOR(2, 1); + KXOR(1, 0); + KCORE(7); + + // Perform the next 12 rounds of the decryption process two at a time. + for (round = 6; round >= 1; --round) { + // Decrypt using the right and left halves of the key schedule. + DECRYPT(RIGHT); + DECRYPT(LEFT); + + // Expand the next 32 bytes of the key schedule in reverse. + KXOR(7, 6); + KXOR(6, 5); + KXOR(5, 4); + KSBOX(); + KXOR(3, 2); + KXOR(2, 1); + KXOR(1, 0); + KCORE(round); + } + + // Reverse the initial round and create the output words. + DECRYPT(RIGHT); + for (posn = 0; posn < 16; ++posn) + output[posn] = state2[posn] ^ schedule[posn]; +} + +void AESSmall256::clear() +{ + clean(reverse); + AESTiny256::clear(); +} + +#endif // CRYPTO_AES_DEFAULT diff --git a/src/AESCommon.cpp b/src/AESCommon.cpp new file mode 100644 index 0000000..ee44b81 --- /dev/null +++ b/src/AESCommon.cpp @@ -0,0 +1,328 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "AES.h" +#include "Crypto.h" +#include "utility/ProgMemUtil.h" +#include "ArduinoECCX08.h" + +#if defined(CRYPTO_AES_DEFAULT) || defined(CRYPTO_DOC) + +/** + * \class AESCommon AES.h + * \brief Abstract base class for AES block ciphers. + * + * This class is abstract. The caller should instantiate AES128, + * AES192, or AES256 to create an AES block cipher with a specific + * key size. + * + * \note This AES implementation does not have constant cache behaviour due + * to the use of table lookups. It may not be safe to use this implementation + * in an environment where the attacker can observe the timing of encryption + * and decryption operations. Unless AES compatibility is required, + * it is recommended that the ChaCha stream cipher be used instead. + * + * Reference: http://en.wikipedia.org/wiki/Advanced_Encryption_Standard + * + * \sa ChaCha, AES128, AES192, AES256 + */ + +/** @cond sbox */ + +// AES S-box (http://en.wikipedia.org/wiki/Rijndael_S-box) +static uint8_t const sbox[256] PROGMEM = { + 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, // 0x00 + 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, + 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, // 0x10 + 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, + 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, // 0x20 + 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, + 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, // 0x30 + 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, + 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, // 0x40 + 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, + 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, // 0x50 + 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, + 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, // 0x60 + 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, + 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, // 0x70 + 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, + 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, // 0x80 + 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, + 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, // 0x90 + 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, + 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, // 0xA0 + 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, + 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, // 0xB0 + 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, + 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, // 0xC0 + 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, + 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, // 0xD0 + 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, + 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, // 0xE0 + 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, + 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, // 0xF0 + 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16 +}; + +// AES inverse S-box (http://en.wikipedia.org/wiki/Rijndael_S-box) +static uint8_t const sbox_inverse[256] PROGMEM = { + 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, // 0x00 + 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB, + 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, // 0x10 + 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, + 0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, // 0x20 + 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E, + 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, // 0x30 + 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25, + 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, // 0x40 + 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, + 0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, // 0x50 + 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84, + 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, // 0x60 + 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06, + 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, // 0x70 + 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, + 0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, // 0x80 + 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73, + 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, // 0x90 + 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E, + 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, // 0xA0 + 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, + 0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, // 0xB0 + 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4, + 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, // 0xC0 + 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F, + 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, // 0xD0 + 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, + 0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, // 0xE0 + 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, // 0xF0 + 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D +}; + +/** @endcond */ + +/** + * \brief Constructs an AES block cipher object. + */ +AESCommon::AESCommon() + : rounds(0), schedule(0) +{ +} + +/** + * \brief Destroys this AES block cipher object after clearing + * sensitive information. + */ +AESCommon::~AESCommon() +{ +} + +/** + * \brief Size of an AES block in bytes. + * \return Always returns 16. + */ +size_t AESCommon::blockSize() const +{ + return 16; +} + +// Constants to correct Galois multiplication for the high bits +// that are shifted out when multiplying by powers of two. +static uint8_t const K[8] = { + 0x00, + 0x1B, + (0x1B << 1), + (0x1B << 1) ^ 0x1B, + (0x1B << 2), + (0x1B << 2) ^ 0x1B, + (0x1B << 2) ^ (0x1B << 1), + (0x1B << 2) ^ (0x1B << 1) ^ 0x1B +}; + +// Multiply x by 2 in the Galois field, to achieve the effect of the following: +// +// if (x & 0x80) +// return (x << 1) ^ 0x1B; +// else +// return (x << 1); +// +// However, we don't want to use runtime conditionals if we can help it +// to avoid leaking timing information from the implementation. +// In this case, multiplication is slightly faster than table lookup on AVR. +#define gmul2(x) (t = ((uint16_t)(x)) << 1, \ + ((uint8_t)t) ^ (uint8_t)(0x1B * ((uint8_t)(t >> 8)))) + +// Multiply x by 4 in the Galois field. +#define gmul4(x) (t = ((uint16_t)(x)) << 2, ((uint8_t)t) ^ K[t >> 8]) + +// Multiply x by 8 in the Galois field. +#define gmul8(x) (t = ((uint16_t)(x)) << 3, ((uint8_t)t) ^ K[t >> 8]) + +#define OUT(col, row) output[(col) * 4 + (row)] +#define IN(col, row) input[(col) * 4 + (row)] + +/** @cond aes_funcs */ + +void AESCommon::subBytesAndShiftRows(uint8_t *output, const uint8_t *input) +{ + OUT(0, 0) = pgm_read_byte(sbox + IN(0, 0)); + OUT(0, 1) = pgm_read_byte(sbox + IN(1, 1)); + OUT(0, 2) = pgm_read_byte(sbox + IN(2, 2)); + OUT(0, 3) = pgm_read_byte(sbox + IN(3, 3)); + OUT(1, 0) = pgm_read_byte(sbox + IN(1, 0)); + OUT(1, 1) = pgm_read_byte(sbox + IN(2, 1)); + OUT(1, 2) = pgm_read_byte(sbox + IN(3, 2)); + OUT(1, 3) = pgm_read_byte(sbox + IN(0, 3)); + OUT(2, 0) = pgm_read_byte(sbox + IN(2, 0)); + OUT(2, 1) = pgm_read_byte(sbox + IN(3, 1)); + OUT(2, 2) = pgm_read_byte(sbox + IN(0, 2)); + OUT(2, 3) = pgm_read_byte(sbox + IN(1, 3)); + OUT(3, 0) = pgm_read_byte(sbox + IN(3, 0)); + OUT(3, 1) = pgm_read_byte(sbox + IN(0, 1)); + OUT(3, 2) = pgm_read_byte(sbox + IN(1, 2)); + OUT(3, 3) = pgm_read_byte(sbox + IN(2, 3)); +} + +void AESCommon::inverseShiftRowsAndSubBytes(uint8_t *output, const uint8_t *input) +{ + OUT(0, 0) = pgm_read_byte(sbox_inverse + IN(0, 0)); + OUT(0, 1) = pgm_read_byte(sbox_inverse + IN(3, 1)); + OUT(0, 2) = pgm_read_byte(sbox_inverse + IN(2, 2)); + OUT(0, 3) = pgm_read_byte(sbox_inverse + IN(1, 3)); + OUT(1, 0) = pgm_read_byte(sbox_inverse + IN(1, 0)); + OUT(1, 1) = pgm_read_byte(sbox_inverse + IN(0, 1)); + OUT(1, 2) = pgm_read_byte(sbox_inverse + IN(3, 2)); + OUT(1, 3) = pgm_read_byte(sbox_inverse + IN(2, 3)); + OUT(2, 0) = pgm_read_byte(sbox_inverse + IN(2, 0)); + OUT(2, 1) = pgm_read_byte(sbox_inverse + IN(1, 1)); + OUT(2, 2) = pgm_read_byte(sbox_inverse + IN(0, 2)); + OUT(2, 3) = pgm_read_byte(sbox_inverse + IN(3, 3)); + OUT(3, 0) = pgm_read_byte(sbox_inverse + IN(3, 0)); + OUT(3, 1) = pgm_read_byte(sbox_inverse + IN(2, 1)); + OUT(3, 2) = pgm_read_byte(sbox_inverse + IN(1, 2)); + OUT(3, 3) = pgm_read_byte(sbox_inverse + IN(0, 3)); +} + +void AESCommon::mixColumn(uint8_t *output, uint8_t *input) +{ + uint16_t t; // Needed by the gmul2 macro. + uint8_t a = input[0]; + uint8_t b = input[1]; + uint8_t c = input[2]; + uint8_t d = input[3]; + uint8_t a2 = gmul2(a); + uint8_t b2 = gmul2(b); + uint8_t c2 = gmul2(c); + uint8_t d2 = gmul2(d); + output[0] = a2 ^ b2 ^ b ^ c ^ d; + output[1] = a ^ b2 ^ c2 ^ c ^ d; + output[2] = a ^ b ^ c2 ^ d2 ^ d; + output[3] = a2 ^ a ^ b ^ c ^ d2; +} + +void AESCommon::inverseMixColumn(uint8_t *output, const uint8_t *input) +{ + uint16_t t; // Needed by the gmul2, gmul4, and gmul8 macros. + uint8_t a = input[0]; + uint8_t b = input[1]; + uint8_t c = input[2]; + uint8_t d = input[3]; + uint8_t a2 = gmul2(a); + uint8_t b2 = gmul2(b); + uint8_t c2 = gmul2(c); + uint8_t d2 = gmul2(d); + uint8_t a4 = gmul4(a); + uint8_t b4 = gmul4(b); + uint8_t c4 = gmul4(c); + uint8_t d4 = gmul4(d); + uint8_t a8 = gmul8(a); + uint8_t b8 = gmul8(b); + uint8_t c8 = gmul8(c); + uint8_t d8 = gmul8(d); + output[0] = a8 ^ a4 ^ a2 ^ b8 ^ b2 ^ b ^ c8 ^ c4 ^ c ^ d8 ^ d; + output[1] = a8 ^ a ^ b8 ^ b4 ^ b2 ^ c8 ^ c2 ^ c ^ d8 ^ d4 ^ d; + output[2] = a8 ^ a4 ^ a ^ b8 ^ b ^ c8 ^ c4 ^ c2 ^ d8 ^ d2 ^ d; + output[3] = a8 ^ a2 ^ a ^ b8 ^ b4 ^ b ^ c8 ^ c ^ d8 ^ d4 ^ d2; +} + +/** @endcond */ + +void AESCommon::encryptBlock(uint8_t *output, const uint8_t *input) +{ + int AESCommonSucces = ECCX08.aesEncryptECB(0XFFFF, input, output); +} + +void AESCommon::decryptBlock(uint8_t *output, const uint8_t *input) +{ + int AesDecryptSucces = ECCX08.aesDecryptECB(0xFFFF, input, output); +} + +void AESCommon::encryptBlockWithSlot(int slot, uint8_t *output, const uint8_t *input) +{ + int aesSuccess = ECCX08.aesEncryptECB(slot, input, output); +} + +void AESCommon::decryptBlockWithSlot(int slot, uint8_t *output, const uint8_t *input) +{ + int aesSuccess = ECCX08.aesDecryptECB(slot, input, output); + //debugging print statements + // Serial.printf("\n aesDecryptECBOutput = "); + // for(int index = 0; index < 16; index++){ + // Serial.printf("%x ", output[index]); + // } + // Serial.println(""); +} + +void AESCommon::clear() +{ + clean(schedule, (rounds + 1) * 16); +} + +/** @cond aes_keycore */ + +void AESCommon::keyScheduleCore(uint8_t *output, const uint8_t *input, uint8_t iteration) +{ + // Rcon(i), 2^i in the Rijndael finite field, for i = 0..10. + // http://en.wikipedia.org/wiki/Rijndael_key_schedule + static uint8_t const rcon[11] PROGMEM = { + 0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, // 0x00 + 0x80, 0x1B, 0x36 + }; + output[0] = pgm_read_byte(sbox + input[1]) ^ pgm_read_byte(rcon + iteration); + output[1] = pgm_read_byte(sbox + input[2]); + output[2] = pgm_read_byte(sbox + input[3]); + output[3] = pgm_read_byte(sbox + input[0]); +} + +void AESCommon::applySbox(uint8_t *output, const uint8_t *input) +{ + output[0] = pgm_read_byte(sbox + input[0]); + output[1] = pgm_read_byte(sbox + input[1]); + output[2] = pgm_read_byte(sbox + input[2]); + output[3] = pgm_read_byte(sbox + input[3]); +} + +/** @endcond */ + +#endif // CRYPTO_AES_DEFAULT diff --git a/src/AESEsp32.cpp b/src/AESEsp32.cpp new file mode 100644 index 0000000..72a8646 --- /dev/null +++ b/src/AESEsp32.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2018 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "AES.h" +#include "Crypto.h" +#include + +// AES implementation for ESP32 using the hardware crypto module. + +#if defined(CRYPTO_AES_ESP32) + +AESCommon::AESCommon(uint8_t keySize) +{ + ctx.key_bytes = keySize; +} + +AESCommon::~AESCommon() +{ + clean(ctx.key, sizeof(ctx.key)); +} + +size_t AESCommon::blockSize() const +{ + return 16; +} + +size_t AESCommon::keySize() const +{ + return ctx.key_bytes; +} + +bool AESCommon::setKey(const uint8_t *key, size_t len) +{ + if (len == ctx.key_bytes) { + // Do the effect of esp_aes_setkey() which is just a memcpy(). + memcpy(ctx.key, key, len); + return true; + } + return false; +} + +void AESCommon::encryptBlock(uint8_t *output, const uint8_t *input) +{ + esp_aes_encrypt(&ctx, input, output); +} + +void AESCommon::decryptBlock(uint8_t *output, const uint8_t *input) +{ + esp_aes_decrypt(&ctx, input, output); +} + +void AESCommon::clear() +{ + clean(ctx.key, sizeof(ctx.key)); +} + +AES128::~AES128() +{ +} + +AES192::~AES192() +{ +} + +AES256::~AES256() +{ +} + +#endif // CRYPTO_AES_ESP32 diff --git a/src/AuthenticatedCipher.cpp b/src/AuthenticatedCipher.cpp new file mode 100644 index 0000000..6184aa8 --- /dev/null +++ b/src/AuthenticatedCipher.cpp @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "AuthenticatedCipher.h" + +/** + * \class AuthenticatedCipher AuthenticatedCipher.h + * \brief Abstract base class for authenticated ciphers. + * + * This class abstracts the details of algorithms that provide Authenticated + * Encryption with Associated Data (AEAD). Such algorithms combine + * encryption with message authentication to provide a single primitive. + * + * Authenticated ciphers have four parameters: the secret key, an + * initialization vector (called a "nonce" in the literature), the + * plaintext, and some associated data which is to be authenticated + * with the plaintext but not encrypted. Associated data might be + * sequence numbers, IP addresses, protocol versions, or other information + * that is not secret but is important and unique to the session. + * + * Subclasses encrypt the plaintext content and output the ciphertext. + * Once all plaintext has been processed, the caller should invoke + * computeTag() to obtain the authentication tag to transmit with + * the ciphertext. When the ciphertext is later decrypted, the checkTag() + * function can be used to check that the data is authentic. + * + * Reference: RFC 5116 + * + * \sa Cipher + */ + +/** + * \brief Constructs a new authenticated cipher. + */ +AuthenticatedCipher::AuthenticatedCipher() +{ +} + +/** + * \brief Destroys this authenticated cipher. + */ +AuthenticatedCipher::~AuthenticatedCipher() +{ +} + +/** + * \fn size_t AuthenticatedCipher::tagSize() const + * \brief Returns the size of the authentication tag. + * + * \return The size of the authentication tag in bytes. + * + * By default this function should return the largest tag size supported + * by the authenticated cipher. + * + * \sa computeTag() + */ + +/** + * \fn void AuthenticatedCipher::addAuthData(const void *data, size_t len) + * \brief Adds extra data that will be authenticated but not encrypted. + * + * \param data The extra data to be authenticated. + * \param len The number of bytes of extra data to be authenticated. + * + * This function must be called before the first call to encrypt() or + * decrypt(). That is, it is assumed that all extra data for authentication + * is available before the first payload data block and that it will be + * prepended to the payload for authentication. If the subclass needs to + * process the extra data after the payload, then it is responsible for saving + * \a data away until it is needed during computeTag() or checkTag(). + * + * This function can be called multiple times with separate extra data + * blocks for authentication. All such data will be concatenated into a + * single block for authentication purposes. + */ + +/** + * \fn void AuthenticatedCipher::computeTag(void *tag, size_t len) + * \brief Finalizes the encryption process and computes the authentication tag. + * + * \param tag Points to the buffer to write the tag to. + * \param len The length of the tag, which may be less than tagSize() to + * truncate the tag to the first \a len bytes. + * + * \sa checkTag() + */ + +/** + * \fn bool AuthenticatedCipher::checkTag(const void *tag, size_t len) + * \brief Finalizes the decryption process and checks the authentication tag. + * + * \param tag The tag value from the incoming ciphertext to be checked. + * \param len The length of the tag value in bytes, which may be less + * than tagSize(). + * + * \return Returns true if the \a tag is identical to the first \a len + * bytes of the authentication tag that was calculated during the + * decryption process. Returns false otherwise. + * + * This function must be called after the final block of ciphertext is + * passed to decrypt() to determine if the data could be authenticated. + * + * \note Authenticated cipher modes usually require that if the tag could + * not be verified, then all of the data that was previously decrypted + * must be discarded. It is unwise to use the decrypted data for + * any purpose before it can be verified. Callers are responsible for + * ensuring that any data returned via previous calls to decrypt() is + * discarded if checkTag() returns false. + * + * \sa computeTag() + */ diff --git a/src/AuthenticatedCipher.h b/src/AuthenticatedCipher.h new file mode 100644 index 0000000..de419d0 --- /dev/null +++ b/src/AuthenticatedCipher.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_AUTHENTICATEDCIPHER_h +#define CRYPTO_AUTHENTICATEDCIPHER_h + +#include "Cipher.h" + +class AuthenticatedCipher : public Cipher +{ +public: + AuthenticatedCipher(); + virtual ~AuthenticatedCipher(); + + virtual size_t tagSize() const = 0; + + virtual void addAuthData(const void *data, size_t len) = 0; + + virtual void computeTag(void *tag, size_t len) = 0; + virtual bool checkTag(const void *tag, size_t len) = 0; +}; + +#endif diff --git a/src/BLAKE2b.cpp b/src/BLAKE2b.cpp new file mode 100644 index 0000000..7352ec0 --- /dev/null +++ b/src/BLAKE2b.cpp @@ -0,0 +1,330 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "BLAKE2b.h" +#include "Crypto.h" +#include "utility/EndianUtil.h" +#include "utility/RotateUtil.h" +#include "utility/ProgMemUtil.h" +#include + +/** + * \class BLAKE2b BLAKE2b.h + * \brief BLAKE2b hash algorithm. + * + * BLAKE2b is a variation on the ChaCha stream cipher, designed for hashing, + * with a 512-bit hash output. It is intended as a high performance + * replacement for SHA512 for when speed is critical but exact SHA512 + * compatibility is not. + * + * This class supports two types of keyed hash. The BLAKE2 keyed hash and + * traditional HMAC. The BLAKE2 keyed hash is recommended unless there is + * some higher-level application need to be compatible with the HMAC + * construction. The keyed hash is computed as follows: + * + * \code + * BLAKE2b blake; + * blake.reset(key, sizeof(key), outputLength); + * blake.update(data1, sizeof(data1)); + * blake.update(data2, sizeof(data2)); + * ... + * blake.update(dataN, sizeof(dataN)); + * blake.finalize(hash, outputLength); + * \endcode + * + * The HMAC is computed as follows (the output length is always 64): + * + * \code + * BLAKE2b blake; + * blake.resetHMAC(key, sizeof(key)); + * blake.update(data1, sizeof(data1)); + * blake.update(data2, sizeof(data2)); + * ... + * blake.update(dataN, sizeof(dataN)); + * blake.finalizeHMAC(key, sizeof(key), hash, 32); + * \endcode + * + * References: https://blake2.net/, + * RFC 7693 + * + * \sa BLAKE2s, SHA512, SHA3_512 + */ + +/** + * \brief Constructs a BLAKE2b hash object. + */ +BLAKE2b::BLAKE2b() +{ + reset(); +} + +/** + * \brief Destroys this BLAKE2b hash object after clearing + * sensitive information. + */ +BLAKE2b::~BLAKE2b() +{ + clean(state); +} + +size_t BLAKE2b::hashSize() const +{ + return 64; +} + +size_t BLAKE2b::blockSize() const +{ + return 128; +} + +// Initialization vectors for BLAKE2b. +#define BLAKE2b_IV0 0x6a09e667f3bcc908ULL +#define BLAKE2b_IV1 0xbb67ae8584caa73bULL +#define BLAKE2b_IV2 0x3c6ef372fe94f82bULL +#define BLAKE2b_IV3 0xa54ff53a5f1d36f1ULL +#define BLAKE2b_IV4 0x510e527fade682d1ULL +#define BLAKE2b_IV5 0x9b05688c2b3e6c1fULL +#define BLAKE2b_IV6 0x1f83d9abfb41bd6bULL +#define BLAKE2b_IV7 0x5be0cd19137e2179ULL + +void BLAKE2b::reset() +{ + state.h[0] = BLAKE2b_IV0 ^ 0x01010040; // Default output length of 64. + state.h[1] = BLAKE2b_IV1; + state.h[2] = BLAKE2b_IV2; + state.h[3] = BLAKE2b_IV3; + state.h[4] = BLAKE2b_IV4; + state.h[5] = BLAKE2b_IV5; + state.h[6] = BLAKE2b_IV6; + state.h[7] = BLAKE2b_IV7; + state.chunkSize = 0; + state.lengthLow = 0; + state.lengthHigh = 0; +} + +/** + * \brief Resets the hash ready for a new hashing process with a specified + * output length. + * + * \param outputLength The output length to use for the final hash in bytes, + * between 1 and 64. + */ +void BLAKE2b::reset(uint8_t outputLength) +{ + if (outputLength < 1) + outputLength = 1; + else if (outputLength > 64) + outputLength = 64; + state.h[0] = BLAKE2b_IV0 ^ 0x01010000 ^ outputLength; + state.h[1] = BLAKE2b_IV1; + state.h[2] = BLAKE2b_IV2; + state.h[3] = BLAKE2b_IV3; + state.h[4] = BLAKE2b_IV4; + state.h[5] = BLAKE2b_IV5; + state.h[6] = BLAKE2b_IV6; + state.h[7] = BLAKE2b_IV7; + state.chunkSize = 0; + state.lengthLow = 0; + state.lengthHigh = 0; +} + +/** + * \brief Resets the hash ready for a new hashing process with a specified + * key and output length. + * + * \param key Points to the key. + * \param keyLen The length of the key in bytes, between 0 and 64. + * \param outputLength The output length to use for the final hash in bytes, + * between 1 and 64. + * + * If \a keyLen is greater than 64, then the \a key will be truncated to + * the first 64 bytes. + */ +void BLAKE2b::reset(const void *key, size_t keyLen, uint8_t outputLength) +{ + if (keyLen > 64) + keyLen = 64; + if (outputLength < 1) + outputLength = 1; + else if (outputLength > 64) + outputLength = 64; + state.h[0] = BLAKE2b_IV0 ^ 0x01010000 ^ (keyLen << 8) ^ outputLength; + state.h[1] = BLAKE2b_IV1; + state.h[2] = BLAKE2b_IV2; + state.h[3] = BLAKE2b_IV3; + state.h[4] = BLAKE2b_IV4; + state.h[5] = BLAKE2b_IV5; + state.h[6] = BLAKE2b_IV6; + state.h[7] = BLAKE2b_IV7; + if (keyLen > 0) { + // Set the first block to the key and pad with zeroes. + memcpy(state.m, key, keyLen); + memset(((uint8_t *)state.m) + keyLen, 0, 128 - keyLen); + state.chunkSize = 128; + state.lengthLow = 128; + } else { + // No key. The first data block is the first hashed block. + state.chunkSize = 0; + state.lengthLow = 0; + } + state.lengthHigh = 0; +} + +void BLAKE2b::update(const void *data, size_t len) +{ + // Break the input up into 1024-bit chunks and process each in turn. + const uint8_t *d = (const uint8_t *)data; + while (len > 0) { + if (state.chunkSize == 128) { + // Previous chunk was full and we know that it wasn't the + // last chunk, so we can process it now with f0 set to zero. + processChunk(0); + state.chunkSize = 0; + } + uint8_t size = 128 - state.chunkSize; + if (size > len) + size = len; + memcpy(((uint8_t *)state.m) + state.chunkSize, d, size); + state.chunkSize += size; + uint64_t temp = state.lengthLow; + state.lengthLow += size; + if (state.lengthLow < temp) + ++state.lengthHigh; + len -= size; + d += size; + } +} + +void BLAKE2b::finalize(void *hash, size_t len) +{ + // Pad the last chunk and hash it with f0 set to all-ones. + memset(((uint8_t *)state.m) + state.chunkSize, 0, 128 - state.chunkSize); + processChunk(0xFFFFFFFFFFFFFFFFULL); + + // Convert the hash into little-endian in the message buffer. + for (uint8_t posn = 0; posn < 8; ++posn) + state.m[posn] = htole64(state.h[posn]); + + // Copy the hash to the caller's return buffer. + if (len > 64) + len = 64; + memcpy(hash, state.m, len); +} + +void BLAKE2b::clear() +{ + clean(state); + reset(); +} + +void BLAKE2b::resetHMAC(const void *key, size_t keyLen) +{ + formatHMACKey(state.m, key, keyLen, 0x36); + state.lengthLow += 128; + processChunk(0); +} + +void BLAKE2b::finalizeHMAC(const void *key, size_t keyLen, void *hash, size_t hashLen) +{ + uint8_t temp[64]; + finalize(temp, sizeof(temp)); + formatHMACKey(state.m, key, keyLen, 0x5C); + state.lengthLow += 128; + processChunk(0); + update(temp, sizeof(temp)); + finalize(hash, hashLen); + clean(temp); +} + +// Permutation on the message input state for BLAKE2b. +static const uint8_t sigma[12][16] PROGMEM = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0}, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, +}; + +// Perform a BLAKE2b quarter round operation. +#define quarterRound(a, b, c, d, i) \ + do { \ + uint64_t _b = (b); \ + uint64_t _a = (a) + _b + state.m[pgm_read_byte(&(sigma[index][2 * (i)]))]; \ + uint64_t _d = rightRotate32_64((d) ^ _a); \ + uint64_t _c = (c) + _d; \ + _b = rightRotate24_64(_b ^ _c); \ + _a += _b + state.m[pgm_read_byte(&(sigma[index][2 * (i) + 1]))]; \ + (d) = _d = rightRotate16_64(_d ^ _a); \ + _c += _d; \ + (a) = _a; \ + (b) = rightRotate63_64(_b ^ _c); \ + (c) = _c; \ + } while (0) + +void BLAKE2b::processChunk(uint64_t f0) +{ + uint8_t index; + uint64_t v[16]; + + // Byte-swap the message buffer into little-endian if necessary. +#if !defined(CRYPTO_LITTLE_ENDIAN) + for (index = 0; index < 16; ++index) + state.m[index] = le64toh(state.m[index]); +#endif + + // Format the block to be hashed. + memcpy(v, state.h, sizeof(state.h)); + v[8] = BLAKE2b_IV0; + v[9] = BLAKE2b_IV1; + v[10] = BLAKE2b_IV2; + v[11] = BLAKE2b_IV3; + v[12] = BLAKE2b_IV4 ^ state.lengthLow; + v[13] = BLAKE2b_IV5 ^ state.lengthHigh; + v[14] = BLAKE2b_IV6 ^ f0; + v[15] = BLAKE2b_IV7; + + // Perform the 12 BLAKE2b rounds. + for (index = 0; index < 12; ++index) { + // Column round. + quarterRound(v[0], v[4], v[8], v[12], 0); + quarterRound(v[1], v[5], v[9], v[13], 1); + quarterRound(v[2], v[6], v[10], v[14], 2); + quarterRound(v[3], v[7], v[11], v[15], 3); + + // Diagonal round. + quarterRound(v[0], v[5], v[10], v[15], 4); + quarterRound(v[1], v[6], v[11], v[12], 5); + quarterRound(v[2], v[7], v[8], v[13], 6); + quarterRound(v[3], v[4], v[9], v[14], 7); + } + + // Combine the new and old hash values. + for (index = 0; index < 8; ++index) + state.h[index] ^= (v[index] ^ v[index + 8]); +} diff --git a/src/BLAKE2b.h b/src/BLAKE2b.h new file mode 100644 index 0000000..e3f7868 --- /dev/null +++ b/src/BLAKE2b.h @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_BLAKE2B_H +#define CRYPTO_BLAKE2B_H + +#include "Hash.h" + +class BLAKE2b : public Hash +{ +public: + BLAKE2b(); + virtual ~BLAKE2b(); + + size_t hashSize() const; + size_t blockSize() const; + + void reset(); + void reset(uint8_t outputLength); + void reset(const void *key, size_t keyLen, uint8_t outputLength = 64); + + void update(const void *data, size_t len); + void finalize(void *hash, size_t len); + + void clear(); + + void resetHMAC(const void *key, size_t keyLen); + void finalizeHMAC(const void *key, size_t keyLen, void *hash, size_t hashLen); + +private: + struct { + uint64_t h[8]; + uint64_t m[16]; + uint64_t lengthLow; + uint64_t lengthHigh; + uint8_t chunkSize; + } state; + + void processChunk(uint64_t f0); +}; + +#endif diff --git a/src/BLAKE2s.cpp b/src/BLAKE2s.cpp new file mode 100644 index 0000000..a707ff7 --- /dev/null +++ b/src/BLAKE2s.cpp @@ -0,0 +1,322 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "BLAKE2s.h" +#include "Crypto.h" +#include "utility/EndianUtil.h" +#include "utility/RotateUtil.h" +#include "utility/ProgMemUtil.h" +#include + +/** + * \class BLAKE2s BLAKE2s.h + * \brief BLAKE2s hash algorithm. + * + * BLAKE2s is a variation on the ChaCha stream cipher, designed for hashing, + * with a 256-bit hash output. It is intended as a high performance + * replacement for SHA256 for when speed is critical but exact SHA256 + * compatibility is not. + * + * This class supports two types of keyed hash. The BLAKE2 keyed hash and + * traditional HMAC. The BLAKE2 keyed hash is recommended unless there is + * some higher-level application need to be compatible with the HMAC + * construction. The keyed hash is computed as follows: + * + * \code + * BLAKE2s blake; + * blake.reset(key, sizeof(key), outputLength); + * blake.update(data1, sizeof(data1)); + * blake.update(data2, sizeof(data2)); + * ... + * blake.update(dataN, sizeof(dataN)); + * blake.finalize(hash, outputLength); + * \endcode + * + * The HMAC is computed as follows (the output length is always 32): + * + * \code + * BLAKE2s blake; + * blake.resetHMAC(key, sizeof(key)); + * blake.update(data1, sizeof(data1)); + * blake.update(data2, sizeof(data2)); + * ... + * blake.update(dataN, sizeof(dataN)); + * blake.finalizeHMAC(key, sizeof(key), hash, 32); + * \endcode + * + * References: https://blake2.net/, + * RFC 7693 + * + * \sa BLAKE2b, SHA256, SHA3_256 + */ + +/** + * \brief Constructs a BLAKE2s hash object. + */ +BLAKE2s::BLAKE2s() +{ + reset(); +} + +/** + * \brief Destroys this BLAKE2s hash object after clearing + * sensitive information. + */ +BLAKE2s::~BLAKE2s() +{ + clean(state); +} + +size_t BLAKE2s::hashSize() const +{ + return 32; +} + +size_t BLAKE2s::blockSize() const +{ + return 64; +} + +// Initialization vectors for BLAKE2s. +#define BLAKE2s_IV0 0x6A09E667 +#define BLAKE2s_IV1 0xBB67AE85 +#define BLAKE2s_IV2 0x3C6EF372 +#define BLAKE2s_IV3 0xA54FF53A +#define BLAKE2s_IV4 0x510E527F +#define BLAKE2s_IV5 0x9B05688C +#define BLAKE2s_IV6 0x1F83D9AB +#define BLAKE2s_IV7 0x5BE0CD19 + +void BLAKE2s::reset() +{ + state.h[0] = BLAKE2s_IV0 ^ 0x01010020; // Default output length of 32. + state.h[1] = BLAKE2s_IV1; + state.h[2] = BLAKE2s_IV2; + state.h[3] = BLAKE2s_IV3; + state.h[4] = BLAKE2s_IV4; + state.h[5] = BLAKE2s_IV5; + state.h[6] = BLAKE2s_IV6; + state.h[7] = BLAKE2s_IV7; + state.chunkSize = 0; + state.length = 0; +} + +/** + * \brief Resets the hash ready for a new hashing process with a specified + * output length. + * + * \param outputLength The output length to use for the final hash in bytes, + * between 1 and 32. + */ +void BLAKE2s::reset(uint8_t outputLength) +{ + if (outputLength < 1) + outputLength = 1; + else if (outputLength > 32) + outputLength = 32; + state.h[0] = BLAKE2s_IV0 ^ 0x01010000 ^ outputLength; + state.h[1] = BLAKE2s_IV1; + state.h[2] = BLAKE2s_IV2; + state.h[3] = BLAKE2s_IV3; + state.h[4] = BLAKE2s_IV4; + state.h[5] = BLAKE2s_IV5; + state.h[6] = BLAKE2s_IV6; + state.h[7] = BLAKE2s_IV7; + state.chunkSize = 0; + state.length = 0; +} + +/** + * \brief Resets the hash ready for a new hashing process with a specified + * key and output length. + * + * \param key Points to the key. + * \param keyLen The length of the key in bytes, between 0 and 32. + * \param outputLength The output length to use for the final hash in bytes, + * between 1 and 32. + * + * If \a keyLen is greater than 32, then the \a key will be truncated to + * the first 32 bytes. + */ +void BLAKE2s::reset(const void *key, size_t keyLen, uint8_t outputLength) +{ + if (keyLen > 32) + keyLen = 32; + if (outputLength < 1) + outputLength = 1; + else if (outputLength > 32) + outputLength = 32; + state.h[0] = BLAKE2s_IV0 ^ 0x01010000 ^ (keyLen << 8) ^ outputLength; + state.h[1] = BLAKE2s_IV1; + state.h[2] = BLAKE2s_IV2; + state.h[3] = BLAKE2s_IV3; + state.h[4] = BLAKE2s_IV4; + state.h[5] = BLAKE2s_IV5; + state.h[6] = BLAKE2s_IV6; + state.h[7] = BLAKE2s_IV7; + if (keyLen > 0) { + // Set the first block to the key and pad with zeroes. + memcpy(state.m, key, keyLen); + memset(((uint8_t *)state.m) + keyLen, 0, 64 - keyLen); + state.chunkSize = 64; + state.length = 64; + } else { + // No key. The first data block is the first hashed block. + state.chunkSize = 0; + state.length = 0; + } +} + +void BLAKE2s::update(const void *data, size_t len) +{ + // Break the input up into 512-bit chunks and process each in turn. + const uint8_t *d = (const uint8_t *)data; + while (len > 0) { + if (state.chunkSize == 64) { + // Previous chunk was full and we know that it wasn't the + // last chunk, so we can process it now with f0 set to zero. + processChunk(0); + state.chunkSize = 0; + } + uint8_t size = 64 - state.chunkSize; + if (size > len) + size = len; + memcpy(((uint8_t *)state.m) + state.chunkSize, d, size); + state.chunkSize += size; + state.length += size; + len -= size; + d += size; + } +} + +void BLAKE2s::finalize(void *hash, size_t len) +{ + // Pad the last chunk and hash it with f0 set to all-ones. + memset(((uint8_t *)state.m) + state.chunkSize, 0, 64 - state.chunkSize); + processChunk(0xFFFFFFFF); + + // Convert the hash into little-endian in the message buffer. + for (uint8_t posn = 0; posn < 8; ++posn) + state.m[posn] = htole32(state.h[posn]); + + // Copy the hash to the caller's return buffer. + if (len > 32) + len = 32; + memcpy(hash, state.m, len); +} + +void BLAKE2s::clear() +{ + clean(state); + reset(); +} + +void BLAKE2s::resetHMAC(const void *key, size_t keyLen) +{ + formatHMACKey(state.m, key, keyLen, 0x36); + state.length += 64; + processChunk(0); +} + +void BLAKE2s::finalizeHMAC(const void *key, size_t keyLen, void *hash, size_t hashLen) +{ + uint8_t temp[32]; + finalize(temp, sizeof(temp)); + formatHMACKey(state.m, key, keyLen, 0x5C); + state.length += 64; + processChunk(0); + update(temp, sizeof(temp)); + finalize(hash, hashLen); + clean(temp); +} + +// Permutation on the message input state for BLAKE2s. +static const uint8_t sigma[10][16] PROGMEM = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0} +}; + +// Perform a BLAKE2s quarter round operation. +#define quarterRound(a, b, c, d, i) \ + do { \ + uint32_t _b = (b); \ + uint32_t _a = (a) + _b + state.m[pgm_read_byte(&(sigma[index][2 * (i)]))]; \ + uint32_t _d = rightRotate16((d) ^ _a); \ + uint32_t _c = (c) + _d; \ + _b = rightRotate12(_b ^ _c); \ + _a += _b + state.m[pgm_read_byte(&(sigma[index][2 * (i) + 1]))]; \ + (d) = _d = rightRotate8(_d ^ _a); \ + _c += _d; \ + (a) = _a; \ + (b) = rightRotate7(_b ^ _c); \ + (c) = _c; \ + } while (0) + +void BLAKE2s::processChunk(uint32_t f0) +{ + uint8_t index; + uint32_t v[16]; + + // Byte-swap the message buffer into little-endian if necessary. +#if !defined(CRYPTO_LITTLE_ENDIAN) + for (index = 0; index < 16; ++index) + state.m[index] = le32toh(state.m[index]); +#endif + + // Format the block to be hashed. + memcpy(v, state.h, sizeof(state.h)); + v[8] = BLAKE2s_IV0; + v[9] = BLAKE2s_IV1; + v[10] = BLAKE2s_IV2; + v[11] = BLAKE2s_IV3; + v[12] = BLAKE2s_IV4 ^ (uint32_t)(state.length); + v[13] = BLAKE2s_IV5 ^ (uint32_t)(state.length >> 32); + v[14] = BLAKE2s_IV6 ^ f0; + v[15] = BLAKE2s_IV7; + + // Perform the 10 BLAKE2s rounds. + for (index = 0; index < 10; ++index) { + // Column round. + quarterRound(v[0], v[4], v[8], v[12], 0); + quarterRound(v[1], v[5], v[9], v[13], 1); + quarterRound(v[2], v[6], v[10], v[14], 2); + quarterRound(v[3], v[7], v[11], v[15], 3); + + // Diagonal round. + quarterRound(v[0], v[5], v[10], v[15], 4); + quarterRound(v[1], v[6], v[11], v[12], 5); + quarterRound(v[2], v[7], v[8], v[13], 6); + quarterRound(v[3], v[4], v[9], v[14], 7); + } + + // Combine the new and old hash values. + for (index = 0; index < 8; ++index) + state.h[index] ^= (v[index] ^ v[index + 8]); +} diff --git a/src/BLAKE2s.h b/src/BLAKE2s.h new file mode 100644 index 0000000..3ebae2e --- /dev/null +++ b/src/BLAKE2s.h @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_BLAKE2S_H +#define CRYPTO_BLAKE2S_H + +#include "Hash.h" + +class BLAKE2s : public Hash +{ +public: + BLAKE2s(); + virtual ~BLAKE2s(); + + size_t hashSize() const; + size_t blockSize() const; + + void reset(); + void reset(uint8_t outputLength); + void reset(const void *key, size_t keyLen, uint8_t outputLength = 32); + + void update(const void *data, size_t len); + void finalize(void *hash, size_t len); + + void clear(); + + void resetHMAC(const void *key, size_t keyLen); + void finalizeHMAC(const void *key, size_t keyLen, void *hash, size_t hashLen); + +private: + struct { + uint32_t h[8]; + uint32_t m[16]; + uint64_t length; + uint8_t chunkSize; + } state; + + void processChunk(uint32_t f0); +}; + +#endif diff --git a/src/BigNumberUtil.cpp b/src/BigNumberUtil.cpp new file mode 100644 index 0000000..976603c --- /dev/null +++ b/src/BigNumberUtil.cpp @@ -0,0 +1,769 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "BigNumberUtil.h" +#include "utility/EndianUtil.h" +#include "utility/LimbUtil.h" +#include + +/** + * \class BigNumberUtil BigNumberUtil.h + * \brief Utilities to assist with implementing big number arithmetic. + * + * Big numbers are represented as arrays of limb_t words, which may be + * 8 bits, 16 bits, or 32 bits in size depending upon how the library + * was configured. For AVR, 16 bit limbs usually give the best performance. + * + * Limb arrays are ordered from the least significant word to the most + * significant. + */ + +/** + * \brief Unpacks the little-endian byte representation of a big number + * into a limb array. + * + * \param limbs The limb array, starting with the least significant word. + * \param count The number of elements in the \a limbs array. + * \param bytes The bytes to unpack. + * \param len The number of bytes to unpack. + * + * If \a len is shorter than the length of \a limbs, then the high bytes + * will be filled with zeroes. If \a len is longer than the length of + * \a limbs, then the high bytes will be truncated and lost. + * + * \sa packLE(), unpackBE() + */ +void BigNumberUtil::unpackLE(limb_t *limbs, size_t count, + const uint8_t *bytes, size_t len) +{ +#if BIGNUMBER_LIMB_8BIT + if (len < count) { + memcpy(limbs, bytes, len); + memset(limbs + len, 0, count - len); + } else { + memcpy(limbs, bytes, count); + } +#elif CRYPTO_LITTLE_ENDIAN + count *= sizeof(limb_t); + if (len < count) { + memcpy(limbs, bytes, len); + memset(((uint8_t *)limbs) + len, 0, count - len); + } else { + memcpy(limbs, bytes, count); + } +#elif BIGNUMBER_LIMB_16BIT + while (count > 0 && len >= 2) { + *limbs++ = ((limb_t)(bytes[0])) | + (((limb_t)(bytes[1])) << 8); + bytes += 2; + --count; + len -= 2; + } + if (count > 0 && len == 1) { + *limbs++ = ((limb_t)(bytes[0])); + --count; + } + while (count > 0) { + *limbs++ = 0; + --count; + } +#elif BIGNUMBER_LIMB_32BIT + while (count > 0 && len >= 4) { + *limbs++ = ((limb_t)(bytes[0])) | + (((limb_t)(bytes[1])) << 8) | + (((limb_t)(bytes[2])) << 16) | + (((limb_t)(bytes[3])) << 24); + bytes += 4; + --count; + len -= 4; + } + if (count > 0 && len > 0) { + if (len == 3) { + *limbs++ = ((limb_t)(bytes[0])) | + (((limb_t)(bytes[1])) << 8) | + (((limb_t)(bytes[2])) << 16); + } else if (len == 2) { + *limbs++ = ((limb_t)(bytes[0])) | + (((limb_t)(bytes[1])) << 8); + } else { + *limbs++ = ((limb_t)(bytes[0])); + } + --count; + } + while (count > 0) { + *limbs++ = 0; + --count; + } +#elif BIGNUMBER_LIMB_64BIT + while (count > 0 && len >= 8) { + *limbs++ = ((limb_t)(bytes[0])) | + (((limb_t)(bytes[1])) << 8) | + (((limb_t)(bytes[2])) << 16) | + (((limb_t)(bytes[3])) << 24) | + (((limb_t)(bytes[4])) << 32) | + (((limb_t)(bytes[5])) << 40) | + (((limb_t)(bytes[6])) << 48) | + (((limb_t)(bytes[7])) << 56); + bytes += 8; + --count; + len -= 8; + } + if (count > 0 && len > 0) { + limb_t word = 0; + uint8_t shift = 0; + while (len > 0 && shift < 64) { + word |= (((limb_t)(*bytes++)) << shift); + shift += 8; + --len; + } + *limbs++ = word; + --count; + } + while (count > 0) { + *limbs++ = 0; + --count; + } +#endif +} + +/** + * \brief Unpacks the big-endian byte representation of a big number + * into a limb array. + * + * \param limbs The limb array, starting with the least significant word. + * \param count The number of elements in the \a limbs array. + * \param bytes The bytes to unpack. + * \param len The number of bytes to unpack. + * + * If \a len is shorter than the length of \a limbs, then the high bytes + * will be filled with zeroes. If \a len is longer than the length of + * \a limbs, then the high bytes will be truncated and lost. + * + * \sa packBE(), unpackLE() + */ +void BigNumberUtil::unpackBE(limb_t *limbs, size_t count, + const uint8_t *bytes, size_t len) +{ +#if BIGNUMBER_LIMB_8BIT + while (count > 0 && len > 0) { + --count; + --len; + *limbs++ = bytes[len]; + } + memset(limbs, 0, count); +#elif BIGNUMBER_LIMB_16BIT + bytes += len; + while (count > 0 && len >= 2) { + --count; + bytes -= 2; + len -= 2; + *limbs++ = ((limb_t)(bytes[1])) | + (((limb_t)(bytes[0])) << 8); + } + if (count > 0 && len == 1) { + --count; + --bytes; + *limbs++ = (limb_t)(bytes[0]); + } + memset(limbs, 0, count * sizeof(limb_t)); +#elif BIGNUMBER_LIMB_32BIT + bytes += len; + while (count > 0 && len >= 4) { + --count; + bytes -= 4; + len -= 4; + *limbs++ = ((limb_t)(bytes[3])) | + (((limb_t)(bytes[2])) << 8) | + (((limb_t)(bytes[1])) << 16) | + (((limb_t)(bytes[0])) << 24); + } + if (count > 0) { + if (len == 3) { + --count; + bytes -= 3; + *limbs++ = ((limb_t)(bytes[2])) | + (((limb_t)(bytes[1])) << 8) | + (((limb_t)(bytes[0])) << 16); + } else if (len == 2) { + --count; + bytes -= 2; + *limbs++ = ((limb_t)(bytes[1])) | + (((limb_t)(bytes[0])) << 8); + } else if (len == 1) { + --count; + --bytes; + *limbs++ = (limb_t)(bytes[0]); + } + } + memset(limbs, 0, count * sizeof(limb_t)); +#elif BIGNUMBER_LIMB_64BIT + bytes += len; + while (count > 0 && len >= 8) { + --count; + bytes -= 8; + len -= 8; + *limbs++ = ((limb_t)(bytes[7])) | + (((limb_t)(bytes[6])) << 8) | + (((limb_t)(bytes[5])) << 16) | + (((limb_t)(bytes[4])) << 24) | + (((limb_t)(bytes[3])) << 32) | + (((limb_t)(bytes[2])) << 40) | + (((limb_t)(bytes[1])) << 48) | + (((limb_t)(bytes[0])) << 56); + } + if (count > 0 && len > 0) { + limb_t word = 0; + uint8_t shift = 0; + while (len > 0 && shift < 64) { + word |= (((limb_t)(*(--bytes))) << shift); + shift += 8; + --len; + } + *limbs++ = word; + --count; + } + memset(limbs, 0, count * sizeof(limb_t)); +#endif +} + +/** + * \brief Packs the little-endian byte representation of a big number + * into a byte array. + * + * \param bytes The byte array to pack into. + * \param len The number of bytes in the destination \a bytes array. + * \param limbs The limb array representing the big number, starting with + * the least significant word. + * \param count The number of elements in the \a limbs array. + * + * If \a len is shorter than the length of \a limbs, then the number will + * be truncated to the least significant \a len bytes. If \a len is longer + * than the length of \a limbs, then the high bytes will be filled with zeroes. + * + * \sa unpackLE(), packBE() + */ +void BigNumberUtil::packLE(uint8_t *bytes, size_t len, + const limb_t *limbs, size_t count) +{ +#if BIGNUMBER_LIMB_8BIT + if (len <= count) { + memcpy(bytes, limbs, len); + } else { + memcpy(bytes, limbs, count); + memset(bytes + count, 0, len - count); + } +#elif CRYPTO_LITTLE_ENDIAN + count *= sizeof(limb_t); + if (len <= count) { + memcpy(bytes, limbs, len); + } else { + memcpy(bytes, limbs, count); + memset(bytes + count, 0, len - count); + } +#elif BIGNUMBER_LIMB_16BIT + limb_t word; + while (count > 0 && len >= 2) { + word = *limbs++; + bytes[0] = (uint8_t)word; + bytes[1] = (uint8_t)(word >> 8); + --count; + len -= 2; + bytes += 2; + } + if (count > 0 && len == 1) { + bytes[0] = (uint8_t)(*limbs); + --len; + ++bytes; + } + memset(bytes, 0, len); +#elif BIGNUMBER_LIMB_32BIT + limb_t word; + while (count > 0 && len >= 4) { + word = *limbs++; + bytes[0] = (uint8_t)word; + bytes[1] = (uint8_t)(word >> 8); + bytes[2] = (uint8_t)(word >> 16); + bytes[3] = (uint8_t)(word >> 24); + --count; + len -= 4; + bytes += 4; + } + if (count > 0) { + if (len == 3) { + word = *limbs; + bytes[0] = (uint8_t)word; + bytes[1] = (uint8_t)(word >> 8); + bytes[2] = (uint8_t)(word >> 16); + len -= 3; + bytes += 3; + } else if (len == 2) { + word = *limbs; + bytes[0] = (uint8_t)word; + bytes[1] = (uint8_t)(word >> 8); + len -= 2; + bytes += 2; + } else if (len == 1) { + bytes[0] = (uint8_t)(*limbs); + --len; + ++bytes; + } + } + memset(bytes, 0, len); +#elif BIGNUMBER_LIMB_64BIT + limb_t word; + while (count > 0 && len >= 8) { + word = *limbs++; + bytes[0] = (uint8_t)word; + bytes[1] = (uint8_t)(word >> 8); + bytes[2] = (uint8_t)(word >> 16); + bytes[3] = (uint8_t)(word >> 24); + bytes[4] = (uint8_t)(word >> 32); + bytes[5] = (uint8_t)(word >> 40); + bytes[6] = (uint8_t)(word >> 48); + bytes[7] = (uint8_t)(word >> 56); + --count; + len -= 8; + bytes += 8; + } + if (count > 0) { + word = *limbs; + while (len > 0) { + *bytes++ = (uint8_t)word; + word >>= 8; + --len; + } + } + memset(bytes, 0, len); +#endif +} + +/** + * \brief Packs the big-endian byte representation of a big number + * into a byte array. + * + * \param bytes The byte array to pack into. + * \param len The number of bytes in the destination \a bytes array. + * \param limbs The limb array representing the big number, starting with + * the least significant word. + * \param count The number of elements in the \a limbs array. + * + * If \a len is shorter than the length of \a limbs, then the number will + * be truncated to the least significant \a len bytes. If \a len is longer + * than the length of \a limbs, then the high bytes will be filled with zeroes. + * + * \sa unpackLE(), packBE() + */ +void BigNumberUtil::packBE(uint8_t *bytes, size_t len, + const limb_t *limbs, size_t count) +{ +#if BIGNUMBER_LIMB_8BIT + if (len > count) { + size_t size = len - count; + memset(bytes, 0, size); + len -= size; + bytes += size; + } else if (len < count) { + count = len; + } + limbs += count; + while (count > 0) { + --count; + *bytes++ = *(--limbs); + } +#elif BIGNUMBER_LIMB_16BIT + size_t countBytes = count * sizeof(limb_t); + limb_t word; + if (len >= countBytes) { + size_t size = len - countBytes; + memset(bytes, 0, size); + len -= size; + bytes += size; + limbs += count; + } else { + count = len / sizeof(limb_t); + limbs += count; + if ((len & 1) != 0) + *bytes++ = (uint8_t)(*limbs); + } + while (count > 0) { + --count; + word = *(--limbs); + *bytes++ = (uint8_t)(word >> 8); + *bytes++ = (uint8_t)word; + } +#elif BIGNUMBER_LIMB_32BIT + size_t countBytes = count * sizeof(limb_t); + limb_t word; + if (len >= countBytes) { + size_t size = len - countBytes; + memset(bytes, 0, size); + len -= size; + bytes += size; + limbs += count; + } else { + count = len / sizeof(limb_t); + limbs += count; + if ((len & 3) == 3) { + word = *limbs; + *bytes++ = (uint8_t)(word >> 16); + *bytes++ = (uint8_t)(word >> 8); + *bytes++ = (uint8_t)word; + } else if ((len & 3) == 2) { + word = *limbs; + *bytes++ = (uint8_t)(word >> 8); + *bytes++ = (uint8_t)word; + } else if ((len & 3) == 1) { + *bytes++ = (uint8_t)(*limbs); + } + } + while (count > 0) { + --count; + word = *(--limbs); + *bytes++ = (uint8_t)(word >> 24); + *bytes++ = (uint8_t)(word >> 16); + *bytes++ = (uint8_t)(word >> 8); + *bytes++ = (uint8_t)word; + } +#elif BIGNUMBER_LIMB_64BIT + size_t countBytes = count * sizeof(limb_t); + limb_t word; + if (len >= countBytes) { + size_t size = len - countBytes; + memset(bytes, 0, size); + len -= size; + bytes += size; + limbs += count; + } else { + count = len / sizeof(limb_t); + limbs += count; + uint8_t size = len & 7; + uint8_t shift = size * 8; + word = *limbs; + while (size > 0) { + shift -= 8; + *bytes++ = (uint8_t)(word >> shift); + --size; + } + } + while (count > 0) { + --count; + word = *(--limbs); + *bytes++ = (uint8_t)(word >> 56); + *bytes++ = (uint8_t)(word >> 48); + *bytes++ = (uint8_t)(word >> 40); + *bytes++ = (uint8_t)(word >> 32); + *bytes++ = (uint8_t)(word >> 24); + *bytes++ = (uint8_t)(word >> 16); + *bytes++ = (uint8_t)(word >> 8); + *bytes++ = (uint8_t)word; + } +#endif +} + +/** + * \brief Adds two big numbers. + * + * \param result The result of the addition. This can be the same + * as either \a x or \a y. + * \param x The first big number. + * \param y The second big number. + * \param size The size of the values in limbs. + * + * \return Returns 1 if there was a carry out or 0 if there was no carry out. + * + * \sa sub(), mul() + */ +limb_t BigNumberUtil::add(limb_t *result, const limb_t *x, + const limb_t *y, size_t size) +{ + dlimb_t carry = 0; + while (size > 0) { + carry += *x++; + carry += *y++; + *result++ = (limb_t)carry; + carry >>= LIMB_BITS; + --size; + } + return (limb_t)carry; +} + +/** + * \brief Subtracts one big number from another. + * + * \param result The result of the subtraction. This can be the same + * as either \a x or \a y. + * \param x The first big number. + * \param y The second big number to subtract from \a x. + * \param size The size of the values in limbs. + * + * \return Returns 1 if there was a borrow, or 0 if there was no borrow. + * + * \sa add(), mul() + */ +limb_t BigNumberUtil::sub(limb_t *result, const limb_t *x, + const limb_t *y, size_t size) +{ + dlimb_t borrow = 0; + while (size > 0) { + borrow = ((dlimb_t)(*x++)) - (*y++) - ((borrow >> LIMB_BITS) & 0x01); + *result++ = (limb_t)borrow; + --size; + } + return ((limb_t)(borrow >> LIMB_BITS)) & 0x01; +} + +/** + * \brief Multiplies two big numbers. + * + * \param result The result of the multiplication. The array must be + * \a xcount + \a ycount limbs in size. + * \param x Points to the first value to multiply. + * \param xcount The number of limbs in \a x. + * \param y Points to the second value to multiply. + * \param ycount The number of limbs in \a y. + * + * \sa mul_P() + */ +void BigNumberUtil::mul(limb_t *result, const limb_t *x, size_t xcount, + const limb_t *y, size_t ycount) +{ + size_t i, j; + dlimb_t carry; + limb_t word; + const limb_t *xx; + limb_t *rr; + + // Multiply the lowest limb of y by x. + carry = 0; + word = y[0]; + xx = x; + rr = result; + for (i = 0; i < xcount; ++i) { + carry += ((dlimb_t)(*xx++)) * word; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + *rr = (limb_t)carry; + + // Multiply and add the remaining limbs of y by x. + for (i = 1; i < ycount; ++i) { + word = y[i]; + carry = 0; + xx = x; + rr = result + i; + for (j = 0; j < xcount; ++j) { + carry += ((dlimb_t)(*xx++)) * word; + carry += *rr; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + *rr = (limb_t)carry; + } +} + +/** + * \brief Reduces \a x modulo \a y using subtraction. + * + * \param result The result of the reduction. This can be the + * same as \a x. + * \param x The number to be reduced. + * \param y The base to use for the modulo reduction. + * \param size The size of the values in limbs. + * + * It is assumed that \a x is less than \a y * 2 so that a single + * conditional subtraction will bring it down below \a y. The reduction + * is performed in constant time. + * + * \sa reduceQuick_P() + */ +void BigNumberUtil::reduceQuick(limb_t *result, const limb_t *x, + const limb_t *y, size_t size) +{ + // Subtract "y" from "x" and turn the borrow into an AND mask. + limb_t mask = sub(result, x, y, size); + mask = (~mask) + 1; + + // Add "y" back to the result if the mask is non-zero. + dlimb_t carry = 0; + while (size > 0) { + carry += *result; + carry += (*y++ & mask); + *result++ = (limb_t)carry; + carry >>= LIMB_BITS; + --size; + } +} + +/** + * \brief Adds two big numbers where one of them is in program memory. + * + * \param result The result of the addition. This can be the same as \a x. + * \param x The first big number. + * \param y The second big number. This must point into program memory. + * \param size The size of the values in limbs. + * + * \return Returns 1 if there was a carry out or 0 if there was no carry out. + * + * \sa sub_P(), mul_P() + */ +limb_t BigNumberUtil::add_P(limb_t *result, const limb_t *x, + const limb_t *y, size_t size) +{ + dlimb_t carry = 0; + while (size > 0) { + carry += *x++; + carry += pgm_read_limb(y++); + *result++ = (limb_t)carry; + carry >>= LIMB_BITS; + --size; + } + return (limb_t)carry; +} + +/** + * \brief Subtracts one big number from another where one is in program memory. + * + * \param result The result of the subtraction. This can be the same as \a x. + * \param x The first big number. + * \param y The second big number to subtract from \a x. This must point + * into program memory. + * \param size The size of the values in limbs. + * + * \return Returns 1 if there was a borrow, or 0 if there was no borrow. + * + * \sa add_P(), mul_P() + */ +limb_t BigNumberUtil::sub_P(limb_t *result, const limb_t *x, + const limb_t *y, size_t size) +{ + dlimb_t borrow = 0; + while (size > 0) { + borrow = ((dlimb_t)(*x++)) - pgm_read_limb(y++) - ((borrow >> LIMB_BITS) & 0x01); + *result++ = (limb_t)borrow; + --size; + } + return ((limb_t)(borrow >> LIMB_BITS)) & 0x01; +} + +/** + * \brief Multiplies two big numbers where one is in program memory. + * + * \param result The result of the multiplication. The array must be + * \a xcount + \a ycount limbs in size. + * \param x Points to the first value to multiply. + * \param xcount The number of limbs in \a x. + * \param y Points to the second value to multiply. This must point + * into program memory. + * \param ycount The number of limbs in \a y. + * + * \sa mul() + */ +void BigNumberUtil::mul_P(limb_t *result, const limb_t *x, size_t xcount, + const limb_t *y, size_t ycount) +{ + size_t i, j; + dlimb_t carry; + limb_t word; + const limb_t *xx; + limb_t *rr; + + // Multiply the lowest limb of y by x. + carry = 0; + word = pgm_read_limb(&(y[0])); + xx = x; + rr = result; + for (i = 0; i < xcount; ++i) { + carry += ((dlimb_t)(*xx++)) * word; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + *rr = (limb_t)carry; + + // Multiply and add the remaining limb of y by x. + for (i = 1; i < ycount; ++i) { + word = pgm_read_limb(&(y[i])); + carry = 0; + xx = x; + rr = result + i; + for (j = 0; j < xcount; ++j) { + carry += ((dlimb_t)(*xx++)) * word; + carry += *rr; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + *rr = (limb_t)carry; + } +} + +/** + * \brief Reduces \a x modulo \a y using subtraction where \a y is + * in program memory. + * + * \param result The result of the reduction. This can be the + * same as \a x. + * \param x The number to be reduced. + * \param y The base to use for the modulo reduction. This must point + * into program memory. + * \param size The size of the values in limbs. + * + * It is assumed that \a x is less than \a y * 2 so that a single + * conditional subtraction will bring it down below \a y. The reduction + * is performed in constant time. + * + * \sa reduceQuick() + */ +void BigNumberUtil::reduceQuick_P(limb_t *result, const limb_t *x, + const limb_t *y, size_t size) +{ + // Subtract "y" from "x" and turn the borrow into an AND mask. + limb_t mask = sub_P(result, x, y, size); + mask = (~mask) + 1; + + // Add "y" back to the result if the mask is non-zero. + dlimb_t carry = 0; + while (size > 0) { + carry += *result; + carry += (pgm_read_limb(y++) & mask); + *result++ = (limb_t)carry; + carry >>= LIMB_BITS; + --size; + } +} + +/** + * \brief Determine if a big number is zero. + * + * \param x Points to the number to test. + * \param size The number of limbs in \a x. + * \return Returns 1 if \a x is zero or 0 otherwise. + * + * This function attempts to make the determination in constant time. + */ +limb_t BigNumberUtil::isZero(const limb_t *x, size_t size) +{ + limb_t word = 0; + while (size > 0) { + word |= *x++; + --size; + } + return (limb_t)(((((dlimb_t)1) << LIMB_BITS) - word) >> LIMB_BITS); +} diff --git a/src/BigNumberUtil.h b/src/BigNumberUtil.h new file mode 100644 index 0000000..2212cbb --- /dev/null +++ b/src/BigNumberUtil.h @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_BIGNUMBERUTIL_h +#define CRYPTO_BIGNUMBERUTIL_h + +#include +#include + +// Define exactly one of these to 1 to set the size of the basic limb type. +#if defined(__AVR__) || defined(ESP8266) +// 16-bit limbs seem to give the best performance on 8-bit AVR micros. +// They also seem to give better performance on ESP8266 as well. +#define BIGNUMBER_LIMB_8BIT 0 +#define BIGNUMBER_LIMB_16BIT 1 +#define BIGNUMBER_LIMB_32BIT 0 +#define BIGNUMBER_LIMB_64BIT 0 +#elif defined(__GNUC__) && __WORDSIZE == 64 +// 64-bit system with 128-bit double limbs. +#define BIGNUMBER_LIMB_8BIT 0 +#define BIGNUMBER_LIMB_16BIT 0 +#define BIGNUMBER_LIMB_32BIT 0 +#define BIGNUMBER_LIMB_64BIT 1 +#else +// On all other platforms, assume 32-bit is best. +#define BIGNUMBER_LIMB_8BIT 0 +#define BIGNUMBER_LIMB_16BIT 0 +#define BIGNUMBER_LIMB_32BIT 1 +#define BIGNUMBER_LIMB_64BIT 0 +#endif + +// Define the limb types to use on this platform. +#if BIGNUMBER_LIMB_8BIT +typedef uint8_t limb_t; +typedef int8_t slimb_t; +typedef uint16_t dlimb_t; +#elif BIGNUMBER_LIMB_16BIT +typedef uint16_t limb_t; +typedef int16_t slimb_t; +typedef uint32_t dlimb_t; +#elif BIGNUMBER_LIMB_32BIT +typedef uint32_t limb_t; +typedef int32_t slimb_t; +typedef uint64_t dlimb_t; +#elif BIGNUMBER_LIMB_64BIT +typedef uint64_t limb_t; +typedef int64_t slimb_t; +typedef unsigned __int128 dlimb_t; +#else +#error "limb_t must be 8, 16, 32, or 64 bits in size" +#endif + +class BigNumberUtil +{ +public: + static void unpackLE(limb_t *limbs, size_t count, + const uint8_t *bytes, size_t len); + static void unpackBE(limb_t *limbs, size_t count, + const uint8_t *bytes, size_t len); + static void packLE(uint8_t *bytes, size_t len, + const limb_t *limbs, size_t count); + static void packBE(uint8_t *bytes, size_t len, + const limb_t *limbs, size_t count); + + static limb_t add(limb_t *result, const limb_t *x, + const limb_t *y, size_t size); + static limb_t sub(limb_t *result, const limb_t *x, + const limb_t *y, size_t size); + static void mul(limb_t *result, const limb_t *x, size_t xcount, + const limb_t *y, size_t ycount); + static void reduceQuick(limb_t *result, const limb_t *x, + const limb_t *y, size_t size); + + static limb_t add_P(limb_t *result, const limb_t *x, + const limb_t *y, size_t size); + static limb_t sub_P(limb_t *result, const limb_t *x, + const limb_t *y, size_t size); + static void mul_P(limb_t *result, const limb_t *x, size_t xcount, + const limb_t *y, size_t ycount); + static void reduceQuick_P(limb_t *result, const limb_t *x, + const limb_t *y, size_t size); + + static limb_t isZero(const limb_t *x, size_t size); + +private: + // Constructor and destructor are private - cannot instantiate this class. + BigNumberUtil() {} + ~BigNumberUtil() {} +}; + +#endif diff --git a/src/BlockCipher.cpp b/src/BlockCipher.cpp new file mode 100644 index 0000000..e98eeb6 --- /dev/null +++ b/src/BlockCipher.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "BlockCipher.h" + +/** + * \class BlockCipher BlockCipher.h + * \brief Abstract base class for block ciphers. + * + * Block ciphers always operate in electronic codebook (ECB) mode. + * Higher-level classes such as CFB128 and CTR128 wrap the block cipher to + * create more useful classes for encryption and decryption of bulk data. + * + * References: http://en.wikipedia.org/wiki/Block_cipher, + * http://en.wikipedia.org/wiki/Block_cipher_modes_of_operation#Electronic_codebook_.28ECB.29 + */ + +/** + * \brief Constructs a block cipher. + */ +BlockCipher::BlockCipher() +{ +} + +/** + * \brief Destroys this block cipher object. + * + * Subclasses are responsible for clearing temporary key schedules + * and other buffers so as to avoid leaking sensitive information. + * + * \sa clear() + */ +BlockCipher::~BlockCipher() +{ +} + +/** + * \fn size_t BlockCipher::blockSize() const + * \brief Size of a single block processed by this cipher, in bytes. + * + * \return Returns the size of a block in bytes. + * + * \sa keySize(), encryptBlock() + */ + +/** + * \fn size_t BlockCipher::keySize() const + * \brief Default size of the key for this block cipher, in bytes. + * + * This value indicates the default, or recommended, size for the key. + * + * \sa setKey(), blockSize() + */ + +/** + * \fn bool BlockCipher::setKey(const uint8_t *key, size_t len) + * \brief Sets the key to use for future encryption and decryption operations. + * + * \param key The key to use. + * \param len The length of the key. + * \return Returns false if the key length is not supported, or the key + * is somehow "weak" and unusable by this cipher. + * + * Use clear() or the destructor to remove the key and any other sensitive + * data from the object once encryption or decryption is complete. + * + * \sa keySize(), clear() + */ + +/** + * \fn void BlockCipher::encryptBlock(uint8_t *output, const uint8_t *input) + * \brief Encrypts a single block using this cipher. + * + * \param output The output buffer to put the ciphertext into. + * Must be at least blockSize() bytes in length. + * \param input The input buffer to read the plaintext from which is + * allowed to overlap with \a output. Must be at least blockSize() + * bytes in length. + * + * \sa decryptBlock(), blockSize() + */ + +/** + * \fn void BlockCipher::decryptBlock(uint8_t *output, const uint8_t *input) + * \brief Decrypts a single block using this cipher. + * + * \param output The output buffer to put the plaintext into. + * Must be at least blockSize() bytes in length. + * \param input The input buffer to read the ciphertext from which is + * allowed to overlap with \a output. Must be at least blockSize() + * bytes in length. + * + * \sa encryptBlock(), blockSize() + */ + +/** + * \fn void BlockCipher::clear() + * \brief Clears all security-sensitive state from this block cipher. + * + * Security-sensitive information includes key schedules and any + * temporary state that is used by encryptBlock() or decryptBlock() + * which is stored in the object itself. + * + * \sa setKey(), encryptBlock(), decryptBlock() + */ diff --git a/src/BlockCipher.h b/src/BlockCipher.h new file mode 100644 index 0000000..5b70eb7 --- /dev/null +++ b/src/BlockCipher.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_BLOCKCIPHER_h +#define CRYPTO_BLOCKCIPHER_h + +#include +#include + +class BlockCipher +{ +public: + BlockCipher(); + virtual ~BlockCipher(); + + virtual size_t blockSize() const = 0; + virtual size_t keySize() const = 0; + + virtual bool setKey(const uint8_t *key, size_t len) = 0; + + virtual void encryptBlock(uint8_t *output, const uint8_t *input) = 0; + virtual void decryptBlock(uint8_t *output, const uint8_t *input) = 0; + + virtual void encryptBlockWithSlot(int slot, uint8_t *output, const uint8_t *input) = 0; + virtual void decryptBlockWithSlot(int slot, uint8_t *output, const uint8_t *input) = 0; + + + virtual void clear() = 0; +}; + +#endif diff --git a/src/CTR.cpp b/src/CTR.cpp new file mode 100644 index 0000000..72f4e9e --- /dev/null +++ b/src/CTR.cpp @@ -0,0 +1,220 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "CTR.h" +#include "Crypto.h" +#include + +/** + * \class CTRCommon CTR.h + * \brief Concrete base class to assist with implementing CTR mode for + * 128-bit block ciphers. + * + * Reference: http://en.wikipedia.org/wiki/Block_cipher_mode_of_operation + * + * \sa CTR + */ + +/** + * \brief Constructs a new cipher in CTR mode. + * + * This constructor should be followed by a call to setBlockCipher(). + */ +CTRCommon::CTRCommon() + : blockCipher(0) + , posn(16) + , counterStart(0) +{ +} + +CTRCommon::~CTRCommon() +{ + // It is assumed that the subclass will clear sensitive + // information in the block cipher. + clean(counter); + clean(state); +} + +size_t CTRCommon::keySize() const +{ + return blockCipher->keySize(); +} + +size_t CTRCommon::ivSize() const +{ + return 16; +} + +/** + * \brief Sets the counter size for the IV. + * + * \param size The number of bytes on the end of the counter block + * that are relevant when incrementing, between 1 and 16. + * \return Returns false if the \a size value is not between 1 and 16. + * + * When the counter is incremented during encrypt(), only the last + * \a size bytes are considered relevant. This can be useful + * to improve performance when the higher level protocol specifies that + * only the least significant N bytes "count". The high level protocol + * should explicitly generate a new initial counter value and key long + * before the \a size bytes overflow and wrap around. + * + * By default, the counter size is 16 which is the same as the block size + * of the underlying block cipher. + * + * \sa setIV() + */ +bool CTRCommon::setCounterSize(size_t size) +{ + if (size < 1 || size > 16) + return false; + counterStart = 16 - size; + return true; +} + +bool CTRCommon::setKey(const uint8_t *key, size_t len) +{ + // Verify the cipher's block size, just in case. + if (blockCipher->blockSize() != 16) + return false; + + // Set the key on the underlying block cipher. + return blockCipher->setKey(key, len); +} + +/** + * \brief Sets the initial counter value to use for future encryption and + * decryption operations. + * + * \param iv The initial counter value which must contain exactly 16 bytes. + * \param len The length of the counter value, which mut be 16. + * \return Returns false if \a len is not exactly 16. + * + * The precise method to generate the initial counter is not defined by + * this class. Usually higher level protocols like SSL/TLS and SSH + * specify how to construct the initial counter value. This class merely + * increments the counter every time a new block of keystream data is needed. + * + * \sa encrypt(), setCounterSize() + */ +bool CTRCommon::setIV(const uint8_t *iv, size_t len) +{ + if (len != 16) + return false; + memcpy(counter, iv, len); + posn = 16; + return true; +} + +void CTRCommon::encrypt(uint8_t *output, const uint8_t *input, size_t len) +{ + while (len > 0) { + if (posn >= 16) { + // Generate a new encrypted counter block. + blockCipher->encryptBlock(state, counter); + posn = 0; + + // Increment the counter, taking care not to reveal + // any timing information about the starting value. + // We iterate through the entire counter region even + // if we could stop earlier because a byte is non-zero. + uint16_t temp = 1; + uint8_t index = 16; + while (index > counterStart) { + --index; + temp += counter[index]; + counter[index] = (uint8_t)temp; + temp >>= 8; + } + } + uint8_t templen = 16 - posn; + if (templen > len) + templen = len; + len -= templen; + while (templen > 0) { + *output++ = *input++ ^ state[posn++]; + --templen; + } + } +} + +void CTRCommon::decrypt(uint8_t *output, const uint8_t *input, size_t len) +{ + encrypt(output, input, len); +} + +void CTRCommon::clear() +{ + blockCipher->clear(); + clean(counter); + clean(state); + posn = 16; +} + +/** + * \fn void CTRCommon::setBlockCipher(BlockCipher *cipher) + * \brief Sets the block cipher to use for this CTR object. + * + * \param cipher The block cipher to use to implement CTR mode, + * which must have a block size of 16 bytes (128 bits). + * + * \note This class only works with block ciphers whose block size is + * 16 bytes (128 bits). If the \a cipher has a different block size, + * then setKey() will fail and return false. + */ + +/** + * \class CTR CTR.h + * \brief Implementation of the Counter (CTR) mode for 128-bit block ciphers. + * + * Counter mode converts a block cipher into a stream cipher. The specific + * block cipher is passed as the template parameter T and the key is + * specified via the setKey() function. + * + * Keystream blocks are generated by encrypting an increasing counter value + * and XOR'ing it with each byte of input. The encrypt() and decrypt() + * operations are identical. + * + * The template parameter T must be a concrete subclass of BlockCipher + * indicating the specific block cipher to use. For example, the following + * creates a CTR object using AES256 as the underlying cipher: + * + * \code + * CTR ctr; + * ctr.setKey(key, 32); + * ctr.setIV(iv, 16); + * ctr.setCounterSize(4); + * ctr.encrypt(output, input, len); + * \endcode + * + * In this example, the last 4 bytes of the IV are incremented to count + * blocks. The remaining bytes are left unchanged from block to block. + * + * Reference: http://en.wikipedia.org/wiki/Block_cipher_mode_of_operation + * + * \sa CFB, OFB, CBC + */ + +/** + * \fn CTR::CTR() + * \brief Constructs a new CTR object for the 128-bit block cipher T. + */ diff --git a/src/CTR.h b/src/CTR.h new file mode 100644 index 0000000..477166e --- /dev/null +++ b/src/CTR.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_CTR_h +#define CRYPTO_CTR_h + +#include "Cipher.h" +#include "BlockCipher.h" + +class CTRCommon : public Cipher +{ +public: + virtual ~CTRCommon(); + + size_t keySize() const; + size_t ivSize() const; + + bool setCounterSize(size_t size); + + bool setKey(const uint8_t *key, size_t len); + bool setIV(const uint8_t *iv, size_t len); + + void encrypt(uint8_t *output, const uint8_t *input, size_t len); + void decrypt(uint8_t *output, const uint8_t *input, size_t len); + + void clear(); + +protected: + CTRCommon(); + void setBlockCipher(BlockCipher *cipher) { blockCipher = cipher; } + +private: + BlockCipher *blockCipher; + uint8_t counter[16]; + uint8_t state[16]; + uint8_t posn; + uint8_t counterStart; +}; + +template +class CTR : public CTRCommon +{ +public: + CTR() { setBlockCipher(&cipher); } + +private: + T cipher; +}; + +#endif diff --git a/src/ChaCha.cpp b/src/ChaCha.cpp new file mode 100644 index 0000000..b12b1a8 --- /dev/null +++ b/src/ChaCha.cpp @@ -0,0 +1,281 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ChaCha.h" +#include "Crypto.h" +#include "utility/RotateUtil.h" +#include "utility/EndianUtil.h" +#include "utility/ProgMemUtil.h" +#include + +/** + * \class ChaCha ChaCha.h + * \brief ChaCha stream cipher. + * + * ChaCha is a stream cipher that takes a key, an 8-byte nonce/IV, and a + * counter and hashes them to generate a keystream to XOR with the plaintext. + * Variations on the ChaCha cipher use 8, 12, or 20 rounds of hashing + * operations with either 128-bit or 256-bit keys. + * + * Reference: http://cr.yp.to/chacha.html + */ + +/** + * \brief Constructs a new ChaCha stream cipher. + * + * \param numRounds Number of encryption rounds to use; usually 8, 12, or 20. + */ +ChaCha::ChaCha(uint8_t numRounds) + : rounds(numRounds) + , posn(64) +{ +} + +ChaCha::~ChaCha() +{ + clean(block); + clean(stream); +} + +size_t ChaCha::keySize() const +{ + // Default key size is 256-bit, but any key size is allowed. + return 32; +} + +size_t ChaCha::ivSize() const +{ + // We return 8 but we also support 12-byte nonces in setIV(). + return 8; +} + +/** + * \fn uint8_t ChaCha::numRounds() const + * \brief Returns the number of encryption rounds; usually 8, 12, or 20. + * + * \sa setNumRounds() + */ + +/** + * \fn void ChaCha::setNumRounds(uint8_t numRounds) + * \brief Sets the number of encryption rounds. + * + * \param numRounds The number of encryption rounds; usually 8, 12, or 20. + * + * \sa numRounds() + */ + +bool ChaCha::setKey(const uint8_t *key, size_t len) +{ + static const char tag128[] PROGMEM = "expand 16-byte k"; + static const char tag256[] PROGMEM = "expand 32-byte k"; + if (len <= 16) { + memcpy_P(block, tag128, 16); + memcpy(block + 16, key, len); + memcpy(block + 32, key, len); + if (len < 16) { + memset(block + 16 + len, 0, 16 - len); + memset(block + 32 + len, 0, 16 - len); + } + } else { + if (len > 32) + len = 32; + memcpy_P(block, tag256, 16); + memcpy(block + 16, key, len); + if (len < 32) + memset(block + 16 + len, 0, 32 - len); + } + posn = 64; + return true; +} + +bool ChaCha::setIV(const uint8_t *iv, size_t len) +{ + // From draft-nir-cfrg-chacha20-poly1305-10.txt, we can use either + // 64-bit or 96-bit nonces. The 96-bit nonce consists of the high + // word of the counter prepended to a regular 64-bit nonce for ChaCha. + if (len == 8) { + memset(block + 48, 0, 8); + memcpy(block + 56, iv, len); + posn = 64; + return true; + } else if (len == 12) { + memset(block + 48, 0, 4); + memcpy(block + 52, iv, len); + posn = 64; + return true; + } else { + return false; + } +} + +/** + * \brief Sets the starting counter for encryption. + * + * \param counter A 4-byte or 8-byte value to use for the starting counter + * instead of the default value of zero. + * \param len The length of the counter, which must be 4 or 8. + * \return Returns false if \a len is not 4 or 8. + * + * This function must be called after setIV() and before the first call + * to encrypt(). It is used to specify a different starting value than + * zero for the counter portion of the hash input. + * + * \sa setIV() + */ +bool ChaCha::setCounter(const uint8_t *counter, size_t len) +{ + // Normally both the IV and the counter are 8 bytes in length. + // However, if the IV was 12 bytes, then a 4 byte counter can be used. + if (len == 4 || len == 8) { + memcpy(block + 48, counter, len); + posn = 64; + return true; + } else { + return false; + } +} + +void ChaCha::encrypt(uint8_t *output, const uint8_t *input, size_t len) +{ + while (len > 0) { + if (posn >= 64) { + // Generate a new encrypted counter block. + hashCore((uint32_t *)stream, (const uint32_t *)block, rounds); + posn = 0; + + // Increment the counter, taking care not to reveal + // any timing information about the starting value. + // We iterate through the entire counter region even + // if we could stop earlier because a byte is non-zero. + uint16_t temp = 1; + uint8_t index = 48; + while (index < 56) { + temp += block[index]; + block[index] = (uint8_t)temp; + temp >>= 8; + ++index; + } + } + uint8_t templen = 64 - posn; + if (templen > len) + templen = len; + len -= templen; + while (templen > 0) { + *output++ = *input++ ^ stream[posn++]; + --templen; + } + } +} + +void ChaCha::decrypt(uint8_t *output, const uint8_t *input, size_t len) +{ + encrypt(output, input, len); +} + +/** + * \brief Generates a single block of output direct from the keystream. + * + * \param output The output buffer to fill with keystream bytes. + * + * Unlike encrypt(), this function does not XOR the keystream with + * plaintext data. Instead it generates the keystream directly into + * the caller-supplied buffer. This is useful if the caller knows + * that the plaintext is all-zeroes. + * + * \sa encrypt() + */ +void ChaCha::keystreamBlock(uint32_t *output) +{ + // Generate the hash output directly into the caller-supplied buffer. + hashCore(output, (const uint32_t *)block, rounds); + posn = 64; + + // Increment the lowest counter byte. We are assuming that the caller + // is ChaChaPoly::setKey() and that the previous counter value was zero. + block[48] = 1; +} + +void ChaCha::clear() +{ + clean(block); + clean(stream); + posn = 64; +} + +// Perform a ChaCha quarter round operation. +#define quarterRound(a, b, c, d) \ + do { \ + uint32_t _b = (b); \ + uint32_t _a = (a) + _b; \ + uint32_t _d = leftRotate((d) ^ _a, 16); \ + uint32_t _c = (c) + _d; \ + _b = leftRotate12(_b ^ _c); \ + _a += _b; \ + (d) = _d = leftRotate(_d ^ _a, 8); \ + _c += _d; \ + (a) = _a; \ + (b) = leftRotate7(_b ^ _c); \ + (c) = _c; \ + } while (0) + +/** + * \brief Executes the ChaCha hash core on an input memory block. + * + * \param output Output memory block, must be at least 16 words in length + * and must not overlap with \a input. + * \param input Input memory block, must be at least 16 words in length. + * \param rounds Number of ChaCha rounds to perform; usually 8, 12, or 20. + * + * This function is provided for the convenience of applications that need + * access to the ChaCha hash core without the higher-level processing that + * turns the core into a stream cipher. + */ +void ChaCha::hashCore(uint32_t *output, const uint32_t *input, uint8_t rounds) +{ + uint8_t posn; + + // Copy the input buffer to the output prior to the first round + // and convert from little-endian to host byte order. + for (posn = 0; posn < 16; ++posn) + output[posn] = le32toh(input[posn]); + + // Perform the ChaCha rounds in sets of two. + for (; rounds >= 2; rounds -= 2) { + // Column round. + quarterRound(output[0], output[4], output[8], output[12]); + quarterRound(output[1], output[5], output[9], output[13]); + quarterRound(output[2], output[6], output[10], output[14]); + quarterRound(output[3], output[7], output[11], output[15]); + + // Diagonal round. + quarterRound(output[0], output[5], output[10], output[15]); + quarterRound(output[1], output[6], output[11], output[12]); + quarterRound(output[2], output[7], output[8], output[13]); + quarterRound(output[3], output[4], output[9], output[14]); + } + + // Add the original input to the final output, convert back to + // little-endian, and return the result. + for (posn = 0; posn < 16; ++posn) + output[posn] = htole32(output[posn] + le32toh(input[posn])); +} diff --git a/src/ChaCha.h b/src/ChaCha.h new file mode 100644 index 0000000..8c05cd4 --- /dev/null +++ b/src/ChaCha.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_CHACHA_h +#define CRYPTO_CHACHA_h + +#include "Cipher.h" + +class ChaChaPoly; + +class ChaCha : public Cipher +{ +public: + explicit ChaCha(uint8_t numRounds = 20); + virtual ~ChaCha(); + + size_t keySize() const; + size_t ivSize() const; + + uint8_t numRounds() const { return rounds; } + void setNumRounds(uint8_t numRounds) { rounds = numRounds; } + + bool setKey(const uint8_t *key, size_t len); + bool setIV(const uint8_t *iv, size_t len); + bool setCounter(const uint8_t *counter, size_t len); + + void encrypt(uint8_t *output, const uint8_t *input, size_t len); + void decrypt(uint8_t *output, const uint8_t *input, size_t len); + + void clear(); + + static void hashCore(uint32_t *output, const uint32_t *input, uint8_t rounds); + +private: + uint8_t block[64]; + uint8_t stream[64]; + uint8_t rounds; + uint8_t posn; + + void keystreamBlock(uint32_t *output); + + friend class ChaChaPoly; +}; + +#endif diff --git a/src/ChaChaPoly.cpp b/src/ChaChaPoly.cpp new file mode 100644 index 0000000..c6cb301 --- /dev/null +++ b/src/ChaChaPoly.cpp @@ -0,0 +1,170 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ChaChaPoly.h" +#include "Crypto.h" +#include "utility/EndianUtil.h" +#include + +/** + * \class ChaChaPoly ChaChaPoly.h + * \brief Authenticated cipher based on ChaCha and Poly1305 + * + * ChaChaPoly is an authenticated cipher based on a combination of + * ChaCha with 20 rounds for encryption and Poly1305 for authentication. + * The resulting cipher has a 256-bit key, a 64-bit or 96-bit + * initialization vector, and a 128-bit authentication tag. + * + * Reference: https://tools.ietf.org/html/draft-irtf-cfrg-chacha20-poly1305-10 + * + * \sa ChaCha, Poly1305, AuthenticatedCipher + */ + +/** + * \brief Constructs a new ChaChaPoly authenticated cipher. + */ +ChaChaPoly::ChaChaPoly() +{ + state.authSize = 0; + state.dataSize = 0; + state.dataStarted = false; + state.ivSize = 8; +} + +/** + * \brief Destroys this ChaChaPoly authenticated cipher. + */ +ChaChaPoly::~ChaChaPoly() +{ + clean(state); +} + +size_t ChaChaPoly::keySize() const +{ + // Default key size is 256-bit, but any key size is allowed. + return 32; +} + +size_t ChaChaPoly::ivSize() const +{ + // Return 8 but we also support 12-byte nonces in setIV(). + return 8; +} + +size_t ChaChaPoly::tagSize() const +{ + // Any tag size between 1 and 16 is supported. + return 16; +} + +bool ChaChaPoly::setKey(const uint8_t *key, size_t len) +{ + return chacha.setKey(key, len); +} + +bool ChaChaPoly::setIV(const uint8_t *iv, size_t len) +{ + // ChaCha::setIV() supports both 64-bit and 96-bit nonces. + if (!chacha.setIV(iv, len)) + return false; + + // Generate the key and nonce to use for Poly1305. + uint32_t data[16]; + chacha.keystreamBlock(data); + poly1305.reset(data); + memcpy(state.nonce, data + 4, 16); + clean(data); + + // Reset the size counters for the auth data and payload. + state.authSize = 0; + state.dataSize = 0; + state.dataStarted = false; + state.ivSize = len; + return true; +} + +void ChaChaPoly::encrypt(uint8_t *output, const uint8_t *input, size_t len) +{ + if (!state.dataStarted) { + poly1305.pad(); + state.dataStarted = true; + } + chacha.encrypt(output, input, len); + poly1305.update(output, len); + state.dataSize += len; +} + +void ChaChaPoly::decrypt(uint8_t *output, const uint8_t *input, size_t len) +{ + if (!state.dataStarted) { + poly1305.pad(); + state.dataStarted = true; + } + poly1305.update(input, len); + chacha.encrypt(output, input, len); // encrypt() is the same as decrypt() + state.dataSize += len; +} + +void ChaChaPoly::addAuthData(const void *data, size_t len) +{ + if (!state.dataStarted) { + poly1305.update(data, len); + state.authSize += len; + } +} + +void ChaChaPoly::computeTag(void *tag, size_t len) +{ + uint64_t sizes[2]; + + // Pad the final Poly1305 block and then hash the sizes. + poly1305.pad(); + sizes[0] = htole64(state.authSize); + sizes[1] = htole64(state.dataSize); + poly1305.update(sizes, sizeof(sizes)); + + // Compute the tag and copy it to the return buffer. + poly1305.finalize(state.nonce, tag, len); + clean(sizes); +} + +bool ChaChaPoly::checkTag(const void *tag, size_t len) +{ + // Can never match if the expected tag length is too long. + if (len > 16) + return false; + + // Compute the tag and check it. + uint8_t temp[16]; + computeTag(temp, len); + bool equal = secure_compare(temp, tag, len); + clean(temp); + return equal; +} + +void ChaChaPoly::clear() +{ + chacha.clear(); + poly1305.clear(); + clean(state); + state.ivSize = 8; +} diff --git a/src/ChaChaPoly.h b/src/ChaChaPoly.h new file mode 100644 index 0000000..2e7247e --- /dev/null +++ b/src/ChaChaPoly.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_CHACHAPOLY_H +#define CRYPTO_CHACHAPOLY_H + +#include "AuthenticatedCipher.h" +#include "ChaCha.h" +#include "Poly1305.h" + +class ChaChaPoly : public AuthenticatedCipher +{ +public: + ChaChaPoly(); + virtual ~ChaChaPoly(); + + size_t keySize() const; + size_t ivSize() const; + size_t tagSize() const; + + bool setKey(const uint8_t *key, size_t len); + bool setIV(const uint8_t *iv, size_t len); + + void encrypt(uint8_t *output, const uint8_t *input, size_t len); + void decrypt(uint8_t *output, const uint8_t *input, size_t len); + + void addAuthData(const void *data, size_t len); + + void computeTag(void *tag, size_t len); + bool checkTag(const void *tag, size_t len); + + void clear(); + +private: + ChaCha chacha; + Poly1305 poly1305; + struct { + uint8_t nonce[16]; + uint64_t authSize; + uint64_t dataSize; + bool dataStarted; + uint8_t ivSize; + } state; +}; + +#endif diff --git a/src/Cipher.cpp b/src/Cipher.cpp new file mode 100644 index 0000000..f91a14a --- /dev/null +++ b/src/Cipher.cpp @@ -0,0 +1,154 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "Cipher.h" + +/** + * \class Cipher Cipher.h + * \brief Abstract base class for stream ciphers. + * + * This class is intended for implementing ciphers that operate on arbitrary + * amounts of data. In particular, stream ciphers where the number of + * bytes that are input to encrypt() or decrypt() is exactly the same as + * the number of bytes that are output. + * + * All of the stream ciphers such as ChaCha inherit directly from this class, + * together with block cipher modes such as CTR and CFB. + */ + +/** + * \brief Constructs a new cipher object. + */ +Cipher::Cipher() +{ +} + +/** + * \brief Destroys this cipher object. + * + * Subclasses are responsible for clearing temporary key schedules + * and other buffers so as to avoid leaking sensitive information. + * + * \sa clear() + */ +Cipher::~Cipher() +{ +} + +/** + * \fn size_t Cipher::keySize() const + * \brief Default size of the key for this cipher, in bytes. + * + * If the cipher supports variable-sized keys, keySize() indicates the + * default or recommended key size. The cipher may support other key sizes. + * + * \sa setKey(), ivSize() + */ + +/** + * \fn size_t Cipher::ivSize() const + * \brief Size of the initialization vector for this cipher, in bytes. + * + * If the cipher does not need an initialization vector, this function + * will return zero. + */ + +/** + * \fn bool Cipher::setKey(const uint8_t *key, size_t len) + * \brief Sets the key to use for future encryption and decryption operations. + * + * \param key The key to use. + * \param len The length of the key in bytes. + * \return Returns false if the key length is not supported, or the key + * is somehow "weak" and unusable by this cipher. + * + * Use clear() or the destructor to remove the key and any other sensitive + * data from the object once encryption or decryption is complete. + * + * Calling setKey() resets the cipher. Any temporary data that was being + * retained for encrypting partial blocks will be abandoned. + * + * \sa keySize(), clear() + */ + +/** + * \fn bool Cipher::setIV(const uint8_t *iv, size_t len) + * \brief Sets the initialization vector to use for future encryption and + * decryption operations. + * + * \param iv The initialization vector to use. + * \param len The length of the initialization vector in bytes. + * \return Returns false if the length is not supported. + * + * Initialization vectors should be set before the first call to + * encrypt() or decrypt() after a setKey() call. If the initialization + * vector is changed after encryption or decryption begins, + * then the behaviour is undefined. + * + * \note The IV is not encoded into the output stream by encrypt(). + * The caller is responsible for communicating the IV to the other party. + * + * \sa ivSize() + */ + +/** + * \fn void Cipher::encrypt(uint8_t *output, const uint8_t *input, size_t len) + * \brief Encrypts an input buffer and writes the ciphertext to an + * output buffer. + * + * \param output The output buffer to write to, which may be the same + * buffer as \a input. The \a output buffer must have at least as many + * bytes as the \a input buffer. + * \param input The input buffer to read from. + * \param len The number of bytes to encrypt. + * + * The encrypt() function can be called multiple times with different + * regions of the plaintext data. + * + * \sa decrypt() + */ + +/** + * \fn void Cipher::decrypt(uint8_t *output, const uint8_t *input, size_t len) + * \brief Decrypts an input buffer and writes the plaintext to an + * output buffer. + * + * \param output The output buffer to write to, which may be the same + * buffer as \a input. The \a output buffer must have at least as many + * bytes as the \a input buffer. + * \param input The input buffer to read from. + * \param len The number of bytes to decrypt. + * + * The decrypt() function can be called multiple times with different + * regions of the ciphertext data. + * + * \sa encrypt() + */ + +/** + * \fn void Cipher::clear() + * \brief Clears all security-sensitive state from this cipher. + * + * Security-sensitive information includes key schedules, initialization + * vectors, and any temporary state that is used by encrypt() or decrypt() + * which is stored in the cipher itself. + */ diff --git a/src/Cipher.h b/src/Cipher.h new file mode 100644 index 0000000..8d498d8 --- /dev/null +++ b/src/Cipher.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_CIPHER_h +#define CRYPTO_CIPHER_h + +#include +#include + +class Cipher +{ +public: + Cipher(); + virtual ~Cipher(); + + virtual size_t keySize() const = 0; + virtual size_t ivSize() const = 0; + + virtual bool setKey(const uint8_t *key, size_t len) = 0; + virtual bool setIV(const uint8_t *iv, size_t len) = 0; + + virtual void encrypt(uint8_t *output, const uint8_t *input, size_t len) = 0; + virtual void decrypt(uint8_t *output, const uint8_t *input, size_t len) = 0; + + virtual void clear() = 0; +}; + +#endif diff --git a/src/Crypto.cpp b/src/Crypto.cpp new file mode 100644 index 0000000..6c537ba --- /dev/null +++ b/src/Crypto.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "Crypto.h" + +/** + * \brief Cleans a block of bytes. + * + * \param dest The destination block to be cleaned. + * \param size The size of the destination to be cleaned in bytes. + * + * Unlike memset(), this function attempts to prevent the compiler + * from optimizing away the clear on a memory buffer. + */ +void clean(void *dest, size_t size) +{ + // Force the use of volatile so that we actually clear the memory. + // Otherwise the compiler might optimise the entire contents of this + // function away, which will not be secure. + volatile uint8_t *d = (volatile uint8_t *)dest; + while (size > 0) { + *d++ = 0; + --size; + } +} + +/** + * \fn void clean(T &var) + * \brief Template function that cleans a variable. + * + * \param var A reference to the variable to clean. + * + * The variable will be cleared to all-zeroes in a secure manner. + * Unlike memset(), this function attempts to prevent the compiler + * from optimizing away the variable clear. + */ + +/** + * \brief Compares two memory blocks for equality. + * + * \param data1 Points to the first memory block. + * \param data2 Points to the second memory block. + * \param len The size of the memory blocks in bytes. + * + * Unlike memcmp(), this function attempts to compare the two memory blocks + * in a way that will not reveal the contents in the instruction timing. + * In particular, this function will not stop early if a byte is different. + * It will instead continue onto the end of the array. + */ +bool secure_compare(const void *data1, const void *data2, size_t len) +{ + uint8_t result = 0; + const uint8_t *d1 = (const uint8_t *)data1; + const uint8_t *d2 = (const uint8_t *)data2; + while (len > 0) { + result |= (*d1++ ^ *d2++); + --len; + } + return (bool)((((uint16_t)0x0100) - result) >> 8); +} + +/** + * \brief Calculates the CRC-8 value over an array in memory. + * + * \param tag Starting tag to distinguish this calculation. + * \param data The data to checksum. + * \param size The number of bytes to checksum. + * \return The CRC-8 value over the data. + * + * This function does not provide any real security. It is a simple + * check that seed values have been initialized within EEPROM or Flash. + * If the CRC-8 check fails, then it is assumed that the EEPROM/Flash + * contents are invalid and should be re-initialized. + * + * Reference: http://www.sunshine2k.de/articles/coding/crc/understanding_crc.html#ch4 + */ +uint8_t crypto_crc8(uint8_t tag, const void *data, unsigned size) +{ + const uint8_t *d = (const uint8_t *)data; + uint8_t crc = 0xFF ^ tag; + uint8_t bit; + while (size > 0) { + crc ^= *d++; + for (bit = 0; bit < 8; ++bit) { + // if (crc & 0x80) + // crc = (crc << 1) ^ 0x1D; + // else + // crc = (crc << 1); + uint8_t generator = (uint8_t)((((int8_t)crc) >> 7) & 0x1D); + crc = (crc << 1) ^ generator; + } + --size; + } + return crc; +} diff --git a/src/Crypto.h b/src/Crypto.h new file mode 100644 index 0000000..b780f25 --- /dev/null +++ b/src/Crypto.h @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_h +#define CRYPTO_h + +#include +#include + +void clean(void *dest, size_t size); + +template +inline void clean(T &var) +{ + clean(&var, sizeof(T)); +} + +bool secure_compare(const void *data1, const void *data2, size_t len); + +#if defined(ESP8266) +extern "C" void system_soft_wdt_feed(void); +#define crypto_feed_watchdog() system_soft_wdt_feed() +#else +#define crypto_feed_watchdog() do { ; } while (0) +#endif + +#endif diff --git a/src/Curve25519.cpp b/src/Curve25519.cpp new file mode 100644 index 0000000..84744f0 --- /dev/null +++ b/src/Curve25519.cpp @@ -0,0 +1,1610 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "Curve25519.h" +#include "Crypto.h" +#include "RNG.h" +#include "utility/LimbUtil.h" +#include + +/** + * \class Curve25519 Curve25519.h + * \brief Diffie-Hellman key agreement based on the elliptic curve + * modulo 2^255 - 19. + * + * \note The public functions in this class need a substantial amount of + * stack space to store intermediate results while the curve function is + * being evaluated. About 1k of free stack space is recommended for safety. + * + * References: http://cr.yp.to/ecdh.html, + * RFC 7748 + * + * \sa Ed25519 + */ + +// Global switch to enable/disable AVR inline assembly optimizations. +#if defined(__AVR__) +// Disabled for now - there are issues with newer Arduino compilers. FIXME +//#define CURVE25519_ASM_AVR 1 +#endif + +// The overhead of clean() calls in mul(), reduceQuick(), etc can +// add up to a lot of processing time during eval(). Only do such +// cleanups if strict mode has been enabled. Other implementations +// like curve25519-donna don't do any cleaning at all so the value +// of cleaning up the stack is dubious at best anyway. +#if defined(CURVE25519_STRICT_CLEAN) +#define strict_clean(x) clean(x) +#else +#define strict_clean(x) do { ; } while (0) +#endif + +/** + * \brief Evaluates the raw Curve25519 function. + * + * \param result The result of evaluating the curve function. + * \param s The S parameter to the curve function. + * \param x The X(Q) parameter to the curve function. If this pointer is + * NULL then the value 9 is used for \a x. + * + * This function is provided to assist with implementating other + * algorithms with the curve. Normally applications should use dh1() + * and dh2() directly instead. + * + * \return Returns true if the function was evaluated; false if \a x is + * not a proper member of the field modulo (2^255 - 19). + * + * Reference: RFC 7748 + * + * \sa dh1(), dh2() + */ +bool Curve25519::eval(uint8_t result[32], const uint8_t s[32], const uint8_t x[32]) +{ + limb_t x_1[NUM_LIMBS_256BIT]; + limb_t x_2[NUM_LIMBS_256BIT]; + limb_t x_3[NUM_LIMBS_256BIT]; + limb_t z_2[NUM_LIMBS_256BIT]; + limb_t z_3[NUM_LIMBS_256BIT]; + limb_t A[NUM_LIMBS_256BIT]; + limb_t B[NUM_LIMBS_256BIT]; + limb_t C[NUM_LIMBS_256BIT]; + limb_t D[NUM_LIMBS_256BIT]; + limb_t E[NUM_LIMBS_256BIT]; + limb_t AA[NUM_LIMBS_256BIT]; + limb_t BB[NUM_LIMBS_256BIT]; + limb_t DA[NUM_LIMBS_256BIT]; + limb_t CB[NUM_LIMBS_256BIT]; + uint8_t mask; + uint8_t sposn; + uint8_t select; + uint8_t swap; + bool retval; + + // Unpack the "x" argument into the limb representation + // which also masks off the high bit. NULL means 9. + if (x) { + // x1 = x + BigNumberUtil::unpackLE(x_1, NUM_LIMBS_256BIT, x, 32); + x_1[NUM_LIMBS_256BIT - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1); + } else { + memset(x_1, 0, sizeof(x_1)); // x_1 = 9 + x_1[0] = 9; + } + + // Check that "x" is within the range of the modulo field. + // We can do this with a reduction - if there was no borrow + // then the value of "x" was out of range. Timing is sensitive + // here so that we don't reveal anything about the value of "x". + // If there was a reduction, then continue executing the rest + // of this function with the (now) in-range "x" value and + // report the failure at the end. + retval = (bool)(reduceQuick(x_1) & 0x01); + + // Initialize the other temporary variables. + memset(x_2, 0, sizeof(x_2)); // x_2 = 1 + x_2[0] = 1; + memset(z_2, 0, sizeof(z_2)); // z_2 = 0 + memcpy(x_3, x_1, sizeof(x_1)); // x_3 = x + memcpy(z_3, x_2, sizeof(x_2)); // z_3 = 1 + + // Iterate over all 255 bits of "s" from the highest to the lowest. + // We ignore the high bit of the 256-bit representation of "s". + mask = 0x40; + sposn = 31; + swap = 0; + for (uint8_t t = 255; t > 0; --t) { + // Conditional swaps on entry to this bit but only if we + // didn't swap on the previous bit. + select = s[sposn] & mask; + swap ^= select; + cswap(swap, x_2, x_3); + cswap(swap, z_2, z_3); + + // Evaluate the curve. + add(A, x_2, z_2); // A = x_2 + z_2 + square(AA, A); // AA = A^2 + sub(B, x_2, z_2); // B = x_2 - z_2 + square(BB, B); // BB = B^2 + sub(E, AA, BB); // E = AA - BB + add(C, x_3, z_3); // C = x_3 + z_3 + sub(D, x_3, z_3); // D = x_3 - z_3 + mul(DA, D, A); // DA = D * A + mul(CB, C, B); // CB = C * B + add(x_3, DA, CB); // x_3 = (DA + CB)^2 + square(x_3, x_3); + sub(z_3, DA, CB); // z_3 = x_1 * (DA - CB)^2 + square(z_3, z_3); + mul(z_3, z_3, x_1); + mul(x_2, AA, BB); // x_2 = AA * BB + mulA24(z_2, E); // z_2 = E * (AA + a24 * E) + add(z_2, z_2, AA); + mul(z_2, z_2, E); + + // Move onto the next lower bit of "s". + mask >>= 1; + if (!mask) { + --sposn; + mask = 0x80; + swap = select << 7; + } else { + swap = select >> 1; + } + } + + // Final conditional swaps. + cswap(swap, x_2, x_3); + cswap(swap, z_2, z_3); + + // Compute x_2 * (z_2 ^ (p - 2)) where p = 2^255 - 19. + recip(z_3, z_2); + mul(x_2, x_2, z_3); + + // Pack the result into the return array. + BigNumberUtil::packLE(result, 32, x_2, NUM_LIMBS_256BIT); + + // Clean up and exit. + clean(x_1); + clean(x_2); + clean(x_3); + clean(z_2); + clean(z_3); + clean(A); + clean(B); + clean(C); + clean(D); + clean(E); + clean(AA); + clean(BB); + clean(DA); + clean(CB); + return retval; +} + +/** + * \brief Performs phase 1 of a Diffie-Hellman key exchange using Curve25519. + * + * \param k The key value to send to the other party as part of the exchange. + * \param f The generated secret value for this party. This must not be + * transmitted to any party or stored in permanent storage. It only needs + * to be kept in memory until dh2() is called. + * + * The \a f value is generated with \link RNGClass::rand() RNG.rand()\endlink. + * It is the caller's responsibility to ensure that the global random number + * pool has sufficient entropy to generate the 32 bytes of \a f safely + * before calling this function. + * + * The following example demonstrates how to perform a full Diffie-Hellman + * key exchange using dh1() and dh2(): + * + * \code + * uint8_t f[32]; + * uint8_t k[32]; + * + * // Generate the secret value "f" and the public value "k". + * Curve25519::dh1(k, f); + * + * // Send "k" to the other party. + * ... + * + * // Read the "k" value that the other party sent to us. + * ... + * + * // Generate the shared secret in "k" using the previous secret value "f". + * if (!Curve25519::dh2(k, f)) { + * // The received "k" value was invalid - abort the session. + * ... + * } + * + * // The "k" value can now be used to generate session keys for encryption. + * ... + * \endcode + * + * Reference: RFC 7748 + * + * \sa dh2() + */ +void Curve25519::dh1(uint8_t k[32], uint8_t f[32]) +{ + do { + // Generate a random "f" value and then adjust the value to make + // it valid as an "s" value for eval(). According to the specification + // we need to mask off the 3 right-most bits of f[0], mask off the + // left-most bit of f[31], and set the second to left-most bit of f[31]. + RNG.rand(f, 32); + f[0] &= 0xF8; + f[31] = (f[31] & 0x7F) | 0x40; + + // Evaluate the curve function: k = Curve25519::eval(f, 9). + // We pass NULL to eval() to indicate the value 9. There is no + // need to check the return value from eval() because we know + // that 9 is a valid field element. + eval(k, f, 0); + + // If "k" is weak for contributory behaviour then reject it, + // generate another "f" value, and try again. This case is + // highly unlikely but we still perform the check just in case. + } while (isWeakPoint(k)); +} + +/** + * \brief Performs phase 2 of a Diffie-Hellman key exchange using Curve25519. + * + * \param k On entry, this is the key value that was received from the other + * party as part of the exchange. On exit, this will be the shared secret. + * \param f The secret value for this party that was generated by dh1(). + * The \a f value will be destroyed by this function. + * + * \return Returns true if the key exchange was successful, or false if + * the \a k value is invalid. + * + * Reference: RFC 7748 + * + * \sa dh1() + */ +bool Curve25519::dh2(uint8_t k[32], uint8_t f[32]) +{ + uint8_t weak; + + // Evaluate the curve function: k = Curve25519::eval(f, k). + // If "k" is weak for contributory behaviour before or after + // the curve evaluation, then fail the exchange. For safety + // we perform every phase of the weak checks even if we could + // bail out earlier so that the execution takes the same + // amount of time for weak and non-weak "k" values. + weak = isWeakPoint(k); // Is "k" weak before? + weak |= ((eval(k, f, k) ^ 0x01) & 0x01); // Is "k" weak during? + weak |= isWeakPoint(k); // Is "k" weak after? + clean(f, 32); + return (bool)((weak ^ 0x01) & 0x01); +} + +/** + * \brief Determines if a Curve25519 point is weak for contributory behaviour. + * + * \param k The point to check. + * \return Returns 1 if \a k is weak for contributory behavior or + * returns zero if \a k is not weak. + */ +uint8_t Curve25519::isWeakPoint(const uint8_t k[32]) +{ + // List of weak points from http://cr.yp.to/ecdh.html + // That page lists some others but they are variants on these + // of the form "point + i * (2^255 - 19)" for i = 0, 1, 2. + // Here we mask off the high bit and eval() catches the rest. + static const uint8_t points[5][32] PROGMEM = { + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0xE0, 0xEB, 0x7A, 0x7C, 0x3B, 0x41, 0xB8, 0xAE, + 0x16, 0x56, 0xE3, 0xFA, 0xF1, 0x9F, 0xC4, 0x6A, + 0xDA, 0x09, 0x8D, 0xEB, 0x9C, 0x32, 0xB1, 0xFD, + 0x86, 0x62, 0x05, 0x16, 0x5F, 0x49, 0xB8, 0x00}, + {0x5F, 0x9C, 0x95, 0xBC, 0xA3, 0x50, 0x8C, 0x24, + 0xB1, 0xD0, 0xB1, 0x55, 0x9C, 0x83, 0xEF, 0x5B, + 0x04, 0x44, 0x5C, 0xC4, 0x58, 0x1C, 0x8E, 0x86, + 0xD8, 0x22, 0x4E, 0xDD, 0xD0, 0x9F, 0x11, 0x57}, + {0xEC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F} + }; + + // Check each of the weak points in turn. We perform the + // comparisons carefully so as not to reveal the value of "k" + // in the instruction timing. If "k" is indeed weak then + // we still check everything so as not to reveal which + // weak point it is. + uint8_t result = 0; + for (uint8_t posn = 0; posn < 5; ++posn) { + const uint8_t *point = points[posn]; + uint8_t check = (pgm_read_byte(&(point[31])) ^ k[31]) & 0x7F; + for (uint8_t index = 31; index > 0; --index) + check |= (pgm_read_byte(&(point[index - 1])) ^ k[index - 1]); + result |= (uint8_t)((((uint16_t)0x0100) - check) >> 8); + } + + // The "result" variable will be non-zero if there was a match. + return result; +} + +/** + * \brief Reduces a number modulo 2^255 - 19. + * + * \param result The array that will contain the result when the + * function exits. Must be NUM_LIMBS_256BIT limbs in size. + * \param x The number to be reduced, which must be NUM_LIMBS_512BIT + * limbs in size and less than or equal to square(2^255 - 19 - 1). + * This array will be modified by the reduction process. + * \param size The size of the high order half of \a x. This indicates + * the size of \a x in limbs. If it is shorter than NUM_LIMBS_256BIT + * then the reduction can be performed quicker. + */ +void Curve25519::reduce(limb_t *result, limb_t *x, uint8_t size) +{ + /* + Note: This explaination is best viewed with a UTF-8 text viewer. + + To help explain what this function is doing, the following describes + how to efficiently compute reductions modulo a base of the form (2ⁿ - b) + where b is greater than zero and (b + 1)² <= 2ⁿ. + + Here we are interested in reducing the result of multiplying two + numbers that are less than or equal to (2ⁿ - b - 1). That is, + multiplying numbers that have already been reduced. + + Given some x less than or equal to (2ⁿ - b - 1)², we want to find a + y less than (2ⁿ - b) such that: + + y ≡ x mod (2ⁿ - b) + + We know that for all integer values of k >= 0: + + y ≡ x - k * (2ⁿ - b) + ≡ x - k * 2ⁿ + k * b + + In our case we choose k = ⌊x / 2ⁿ⌋ and then let: + + w = (x mod 2ⁿ) + ⌊x / 2ⁿ⌋ * b + + The value w will either be the answer y or y can be obtained by + repeatedly subtracting (2ⁿ - b) from w until it is less than (2ⁿ - b). + At most b subtractions will be required. + + In our case b is 19 which is more subtractions than we would like to do, + but we can handle that by performing the above reduction twice and then + performing a single trial subtraction: + + w = (x mod 2ⁿ) + ⌊x / 2ⁿ⌋ * b + y = (w mod 2ⁿ) + ⌊w / 2ⁿ⌋ * b + if y >= (2ⁿ - b) + y -= (2ⁿ - b) + + The value y is the answer we want for reducing x modulo (2ⁿ - b). + */ + +#if !defined(CURVE25519_ASM_AVR) + dlimb_t carry; + uint8_t posn; + + // Calculate (x mod 2^255) + ((x / 2^255) * 19) which will + // either produce the answer we want or it will produce a + // value of the form "answer + j * (2^255 - 19)". + carry = ((dlimb_t)(x[NUM_LIMBS_256BIT - 1] >> (LIMB_BITS - 1))) * 19U; + x[NUM_LIMBS_256BIT - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1); + for (posn = 0; posn < size; ++posn) { + carry += ((dlimb_t)(x[posn + NUM_LIMBS_256BIT])) * 38U; + carry += x[posn]; + x[posn] = (limb_t)carry; + carry >>= LIMB_BITS; + } + if (size < NUM_LIMBS_256BIT) { + // The high order half of the number is short; e.g. for mulA24(). + // Propagate the carry through the rest of the low order part. + for (posn = size; posn < NUM_LIMBS_256BIT; ++posn) { + carry += x[posn]; + x[posn] = (limb_t)carry; + carry >>= LIMB_BITS; + } + } + + // The "j" value may still be too large due to the final carry-out. + // We must repeat the reduction. If we already have the answer, + // then this won't do any harm but we must still do the calculation + // to preserve the overall timing. + carry *= 38U; + carry += ((dlimb_t)(x[NUM_LIMBS_256BIT - 1] >> (LIMB_BITS - 1))) * 19U; + x[NUM_LIMBS_256BIT - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1); + for (posn = 0; posn < NUM_LIMBS_256BIT; ++posn) { + carry += x[posn]; + x[posn] = (limb_t)carry; + carry >>= LIMB_BITS; + } + + // At this point "x" will either be the answer or it will be the + // answer plus (2^255 - 19). Perform a trial subtraction which + // is equivalent to adding 19 and subtracting 2^255. We put the + // trial answer into the top-most limbs of the original "x" array. + // We add 19 here; the subtraction of 2^255 occurs in the next step. + carry = 19U; + for (posn = 0; posn < NUM_LIMBS_256BIT; ++posn) { + carry += x[posn]; + x[posn + NUM_LIMBS_256BIT] = (limb_t)carry; + carry >>= LIMB_BITS; + } + + // If there was a borrow, then the bottom-most limbs of "x" are the + // correct answer. If there was no borrow, then the top-most limbs + // of "x" are the correct answer. Select the correct answer but do + // it in a way that instruction timing will not reveal which value + // was selected. Borrow will occur if the high bit of the previous + // result is 0: turn the high bit into a selection mask. + limb_t mask = (limb_t)(((slimb_t)(x[NUM_LIMBS_512BIT - 1])) >> (LIMB_BITS - 1)); + limb_t nmask = ~mask; + x[NUM_LIMBS_512BIT - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1); + for (posn = 0; posn < NUM_LIMBS_256BIT; ++posn) { + result[posn] = (x[posn] & nmask) | (x[posn + NUM_LIMBS_256BIT] & mask); + } +#else + __asm__ __volatile__ ( + // Calculate (x mod 2^255) + ((x / 2^255) * 19) which will + // either produce the answer we want or it will produce a + // value of the form "answer + j * (2^255 - 19)". + "ldd r24,Z+31\n" // Extract the high bit of x[31] + "mov r25,r24\n" // and mask it off + "andi r25,0x7F\n" + "std Z+31,r25\n" + "lsl r24\n" // carry = high bit * 19 + "mov r24,__zero_reg__\n" + "sbc r24,__zero_reg__\n" + "andi r24,19\n" + + "mov r25,%1\n" // load "size" into r25 + "ldi r23,38\n" // r23 = 38 + "mov r22,__zero_reg__\n" // r22 = 0 (we're about to destroy r1) + "1:\n" + "ld r16,Z\n" // r16 = x[0] + "ldd r17,Z+32\n" // r17 = x[32] + "mul r17,r23\n" // r0:r1 = r17 * 38 + "add r0,r24\n" // r0:r1 += carry + "adc r1,r22\n" + "add r0,r16\n" // r0:r1 += r16 + "adc r1,r22\n" + "st Z+,r0\n" // *x++ = r0 + "mov r24,r1\n" // carry = r1 + "dec r25\n" // if (--r25 != 0) loop + "brne 1b\n" + + // If the size is short, then we need to continue propagating carries. + "ldi r25,32\n" + "cp %1,r25\n" + "breq 3f\n" + "sub r25,%1\n" + "ld __tmp_reg__,Z\n" + "add __tmp_reg__,r24\n" + "st Z+,__tmp_reg__\n" + "dec r25\n" + "2:\n" + "ld __tmp_reg__,Z\n" // *x++ += carry + "adc __tmp_reg__,r22\n" + "st Z+,__tmp_reg__\n" + "dec r25\n" + "brne 2b\n" + "mov r24,r22\n" // put the carry back into r24 + "adc r24,r22\n" + "3:\n" + "sbiw r30,32\n" // Point Z back to the start of "x" + + // The "j" value may still be too large due to the final carry-out. + // We must repeat the reduction. If we already have the answer, + // then this won't do any harm but we must still do the calculation + // to preserve the overall timing. + "mul r24,r23\n" // carry *= 38 + "ldd r24,Z+31\n" // Extract the high bit of x[31] + "mov r25,r24\n" // and mask it off + "andi r25,0x7F\n" + "std Z+31,r25\n" + "lsl r24\n" // carry += high bit * 19 + "mov r24,r22\n" + "sbc r24,r22\n" + "andi r24,19\n" + "add r0,r24\n" + "adc r1,r22\n" // 9-bit carry is now in r0:r1 + + // Propagate the carry through the rest of x. + "ld r24,Z\n" // x[0] + "add r0,r24\n" + "adc r1,r22\n" + "st Z+,r0\n" + "ld r24,Z\n" // x[1] + "add r1,r24\n" + "st Z+,r1\n" + "ldi r25,30\n" // x[2..31] + "4:\n" + "ld r24,Z\n" + "adc r24,r22\n" + "st Z+,r24\n" + "dec r25\n" + "brne 4b\n" + "sbiw r30,32\n" // Point Z back to the start of "x" + + // We destroyed __zero_reg__ (r1) above, so restore its zero value. + "mov __zero_reg__,r22\n" + + // At this point "x" will either be the answer or it will be the + // answer plus (2^255 - 19). Perform a trial subtraction which + // is equivalent to adding 19 and subtracting 2^255. We put the + // trial answer into the top-most limbs of the original "x" array. + // We add 19 here; the subtraction of 2^255 occurs in the next step. + "ldi r24,8\n" // Loop counter. + "ldi r25,19\n" // carry = 19 + "5:\n" + "ld r16,Z+\n" // r16:r19:carry = *xx++ + carry + "ld r17,Z+\n" + "ld r18,Z+\n" + "ld r19,Z+\n" + "add r16,r25\n" // r16:r19:carry += carry + "adc r17,__zero_reg__\n" + "adc r18,__zero_reg__\n" + "adc r19,__zero_reg__\n" + "mov r25,__zero_reg__\n" + "adc r25,r25\n" + "std Z+28,r16\n" // *tt++ = r16:r19 + "std Z+29,r17\n" + "std Z+30,r18\n" + "std Z+31,r19\n" + "dec r24\n" + "brne 5b\n" + + // Subtract 2^255 from x[32..63] which is equivalent to extracting + // the top bit and then masking it off. If the top bit is zero + // then a borrow has occurred and this isn't the answer we want. + "mov r25,r19\n" + "andi r19,0x7F\n" + "std Z+31,r19\n" + "lsl r25\n" + "mov r25,__zero_reg__\n" + "sbc r25,__zero_reg__\n" + + // At this point, r25 is 0 if the original x[0..31] is the answer + // we want, or 0xFF if x[32..63] is the answer we want. Essentially + // we need to do a conditional move of either x[0..31] or x[32..63] + // into "result". + "sbiw r30,32\n" // Point Z back to x[0]. + "ldi r24,8\n" + "6:\n" + "ldd r16,Z+32\n" + "ldd r17,Z+33\n" + "ldd r18,Z+34\n" + "ldd r19,Z+35\n" + "ld r20,Z+\n" + "ld r21,Z+\n" + "ld r22,Z+\n" + "ld r23,Z+\n" + "eor r16,r20\n" + "eor r17,r21\n" + "eor r18,r22\n" + "eor r19,r23\n" + "and r16,r25\n" + "and r17,r25\n" + "and r18,r25\n" + "and r19,r25\n" + "eor r20,r16\n" + "eor r21,r17\n" + "eor r22,r18\n" + "eor r23,r19\n" + "st X+,r20\n" + "st X+,r21\n" + "st X+,r22\n" + "st X+,r23\n" + "dec r24\n" + "brne 6b\n" + + : : "z"(x), "r"((uint8_t)(size * sizeof(limb_t))), "x"(result) + : "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25" + ); +#endif +} + +/** + * \brief Quickly reduces a number modulo 2^255 - 19. + * + * \param x The number to be reduced, which must be NUM_LIMBS_256BIT + * limbs in size and less than or equal to 2 * (2^255 - 19 - 1). + * \return Zero if \a x was greater than or equal to (2^255 - 19). + * + * The answer is also put into \a x and will consist of NUM_LIMBS_256BIT limbs. + * + * This function is intended for reducing the result of additions where + * the caller knows that \a x is within the described range. A single + * trial subtraction is all that is needed to reduce the number. + */ +limb_t Curve25519::reduceQuick(limb_t *x) +{ +#if !defined(CURVE25519_ASM_AVR) + limb_t temp[NUM_LIMBS_256BIT]; + dlimb_t carry; + uint8_t posn; + limb_t *xx; + limb_t *tt; + + // Perform a trial subtraction of (2^255 - 19) from "x" which is + // equivalent to adding 19 and subtracting 2^255. We add 19 here; + // the subtraction of 2^255 occurs in the next step. + carry = 19U; + xx = x; + tt = temp; + for (posn = 0; posn < NUM_LIMBS_256BIT; ++posn) { + carry += *xx++; + *tt++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + + // If there was a borrow, then the original "x" is the correct answer. + // If there was no borrow, then "temp" is the correct answer. Select the + // correct answer but do it in a way that instruction timing will not + // reveal which value was selected. Borrow will occur if the high bit + // of "temp" is 0: turn the high bit into a selection mask. + limb_t mask = (limb_t)(((slimb_t)(temp[NUM_LIMBS_256BIT - 1])) >> (LIMB_BITS - 1)); + limb_t nmask = ~mask; + temp[NUM_LIMBS_256BIT - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1); + xx = x; + tt = temp; + for (posn = 0; posn < NUM_LIMBS_256BIT; ++posn) { + *xx = ((*xx) & nmask) | ((*tt++) & mask); + ++xx; + } + + // Clean up "temp". + strict_clean(temp); + + // Return a zero value if we actually subtracted (2^255 - 19) from "x". + return nmask; +#else // CURVE25519_ASM_AVR + limb_t temp[NUM_LIMBS_256BIT]; + uint8_t result; + __asm__ __volatile__ ( + // Subtract (2^255 - 19) from "x", which is the same as adding 19 + // and then subtracting 2^255. + "ldi r24,8\n" // Loop counter. + "ldi r25,19\n" // carry = 19 + "1:\n" + "ld r16,Z+\n" // r16:r19:carry = *xx++ + carry + "ld r17,Z+\n" + "ld r18,Z+\n" + "ld r19,Z+\n" + "add r16,r25\n" // r16:r19:carry += carry + "adc r17,__zero_reg__\n" + "adc r18,__zero_reg__\n" + "adc r19,__zero_reg__\n" + "mov r25,__zero_reg__\n" + "adc r25,r25\n" + "st X+,r16\n" // *tt++ = r16:r19 + "st X+,r17\n" + "st X+,r18\n" + "st X+,r19\n" + "dec r24\n" + "brne 1b\n" + + // Subtract 2^255 from "temp" which is equivalent to extracting + // the top bit and then masking it off. If the top bit is zero + // then a borrow has occurred and this isn't the answer we want. + "mov r25,r19\n" + "andi r19,0x7F\n" + "st -X,r19\n" + "lsl r25\n" + "mov r25,__zero_reg__\n" + "sbc r25,__zero_reg__\n" + + // At this point, r25 is 0 if the original "x" is the answer + // we want, or 0xFF if "temp" is the answer we want. Essentially + // we need to do a conditional move of "temp" into "x". + "sbiw r26,31\n" // Point X back to the start of "temp". + "sbiw r30,32\n" // Point Z back to the start of "x". + "ldi r24,8\n" + "2:\n" + "ld r16,X+\n" + "ld r17,X+\n" + "ld r18,X+\n" + "ld r19,X+\n" + "ld r20,Z\n" + "ldd r21,Z+1\n" + "ldd r22,Z+2\n" + "ldd r23,Z+3\n" + "eor r16,r20\n" + "eor r17,r21\n" + "eor r18,r22\n" + "eor r19,r23\n" + "and r16,r25\n" + "and r17,r25\n" + "and r18,r25\n" + "and r19,r25\n" + "eor r20,r16\n" + "eor r21,r17\n" + "eor r22,r18\n" + "eor r23,r19\n" + "st Z+,r20\n" + "st Z+,r21\n" + "st Z+,r22\n" + "st Z+,r23\n" + "dec r24\n" + "brne 2b\n" + "mov %0,r25\n" + : "=r"(result) + : "x"(temp), "z"(x) + : "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25" + ); + strict_clean(temp); + return result; +#endif // CURVE25519_ASM_AVR +} + +/** + * \brief Multiplies two 256-bit values to produce a 512-bit result. + * + * \param result The result, which must be NUM_LIMBS_512BIT limbs in size + * and must not overlap with \a x or \a y. + * \param x The first value to multiply, which must be NUM_LIMBS_256BIT + * limbs in size. + * \param y The second value to multiply, which must be NUM_LIMBS_256BIT + * limbs in size. + * + * \sa mul() + */ +void Curve25519::mulNoReduce(limb_t *result, const limb_t *x, const limb_t *y) +{ +#if !defined(CURVE25519_ASM_AVR) + uint8_t i, j; + dlimb_t carry; + limb_t word; + const limb_t *yy; + limb_t *rr; + + // Multiply the lowest word of x by y. + carry = 0; + word = x[0]; + yy = y; + rr = result; + for (i = 0; i < NUM_LIMBS_256BIT; ++i) { + carry += ((dlimb_t)(*yy++)) * word; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + *rr = (limb_t)carry; + + // Multiply and add the remaining words of x by y. + for (i = 1; i < NUM_LIMBS_256BIT; ++i) { + word = x[i]; + carry = 0; + yy = y; + rr = result + i; + for (j = 0; j < NUM_LIMBS_256BIT; ++j) { + carry += ((dlimb_t)(*yy++)) * word; + carry += *rr; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + *rr = (limb_t)carry; + } +#else + __asm__ __volatile__ ( + // Save Y and copy the "result" pointer into it. + "push r28\n" + "push r29\n" + "mov r28,%A2\n" + "mov r29,%B2\n" + + // Multiply the first byte of "x" by y[0..31]. + "ldi r25,8\n" // loop 8 times: 4 bytes of y each time + "clr r24\n" // carry = 0 + "clr r22\n" // r22 = 0 to replace __zero_reg__ + "ld r23,X+\n" // r23 = *x++ + "1:\n" + "ld r16,Z\n" // r16 = y[0] + "mul r16,r23\n" // r8:r9 = y[0] * r23 + "movw r8,r0\n" + "ldd r16,Z+2\n" // r16 = y[2] + "mul r16,r23\n" // r10:r11 = y[2] * r23 + "movw r10,r0\n" + "ldd r16,Z+1\n" // r16 = y[1] + "mul r16,r23\n" // r9:r10:r11 += y[1] * r23 + "add r9,r0\n" + "adc r10,r1\n" + "adc r11,r22\n" + "ldd r16,Z+3\n" // r16 = y[3] + "mul r16,r23\n" // r11:r1 += y[3] * r23 + "add r11,r0\n" + "adc r1,r22\n" + "add r8,r24\n" // r8:r9:r10:r11:r1 += carry + "adc r9,r22\n" + "adc r10,r22\n" + "adc r11,r22\n" + "adc r1,r22\n" + "mov r24,r1\n" // carry = r1 + "st Y+,r8\n" // *rr++ = r8:r9:r10:r11 + "st Y+,r9\n" + "st Y+,r10\n" + "st Y+,r11\n" + "adiw r30,4\n" + "dec r25\n" + "brne 1b\n" + "st Y+,r24\n" // *rr++ = carry + "sbiw r28,32\n" // rr -= 32 + "sbiw r30,32\n" // Point Z back to the start of y + + // Multiply and add the remaining bytes of "x" by y[0..31]. + "ldi r21,31\n" // 31 more bytes of x to go. + "2:\n" + "ldi r25,8\n" // loop 8 times: 4 bytes of y each time + "clr r24\n" // carry = 0 + "ld r23,X+\n" // r23 = *x++ + "3:\n" + "ld r16,Z\n" // r16 = y[0] + "mul r16,r23\n" // r8:r9 = y[0] * r23 + "movw r8,r0\n" + "ldd r16,Z+2\n" // r16 = y[2] + "mul r16,r23\n" // r10:r11 = y[2] * r23 + "movw r10,r0\n" + "ldd r16,Z+1\n" // r16 = y[1] + "mul r16,r23\n" // r9:r10:r11 += y[1] * r23 + "add r9,r0\n" + "adc r10,r1\n" + "adc r11,r22\n" + "ldd r16,Z+3\n" // r16 = y[3] + "mul r16,r23\n" // r11:r1 += y[3] * r23 + "add r11,r0\n" + "adc r1,r22\n" + "add r8,r24\n" // r8:r9:r10:r11:r1 += carry + "adc r9,r22\n" + "adc r10,r22\n" + "adc r11,r22\n" + "adc r1,r22\n" + "ld r16,Y\n" // r8:r9:r10:r11:r1 += rr[0..3] + "add r8,r16\n" + "ldd r16,Y+1\n" + "adc r9,r16\n" + "ldd r16,Y+2\n" + "adc r10,r16\n" + "ldd r16,Y+3\n" + "adc r11,r16\n" + "adc r1,r22\n" + "mov r24,r1\n" // carry = r1 + "st Y+,r8\n" // *rr++ = r8:r9:r10:r11 + "st Y+,r9\n" + "st Y+,r10\n" + "st Y+,r11\n" + "adiw r30,4\n" + "dec r25\n" + "brne 3b\n" + "st Y+,r24\n" // *r++ = carry + "sbiw r28,32\n" // rr -= 32 + "sbiw r30,32\n" // Point Z back to the start of y + "dec r21\n" + "brne 2b\n" + + // Restore Y and __zero_reg__. + "pop r29\n" + "pop r28\n" + "clr __zero_reg__\n" + : : "x"(x), "z"(y), "r"(result) + : "r8", "r9", "r10", "r11", "r16", "r20", "r21", "r22", + "r23", "r24", "r25" + ); +#endif +} + +/** + * \brief Multiplies two values and then reduces the result modulo 2^255 - 19. + * + * \param result The result, which must be NUM_LIMBS_256BIT limbs in size + * and can be the same array as \a x or \a y. + * \param x The first value to multiply, which must be NUM_LIMBS_256BIT limbs + * in size and less than 2^255 - 19. + * \param y The second value to multiply, which must be NUM_LIMBS_256BIT limbs + * in size and less than 2^255 - 19. This can be the same array as \a x. + */ +void Curve25519::mul(limb_t *result, const limb_t *x, const limb_t *y) +{ + limb_t temp[NUM_LIMBS_512BIT]; + mulNoReduce(temp, x, y); + reduce(result, temp, NUM_LIMBS_256BIT); + strict_clean(temp); + crypto_feed_watchdog(); +} + +/** + * \fn void Curve25519::square(limb_t *result, const limb_t *x) + * \brief Squares a value and then reduces it modulo 2^255 - 19. + * + * \param result The result, which must be NUM_LIMBS_256BIT limbs in size and + * can be the same array as \a x. + * \param x The value to square, which must be NUM_LIMBS_256BIT limbs in size + * and less than 2^255 - 19. + */ + +/** + * \brief Multiplies a value by the a24 constant and then reduces the result + * modulo 2^255 - 19. + * + * \param result The result, which must be NUM_LIMBS_256BIT limbs in size + * and can be the same array as \a x. + * \param x The value to multiply by a24, which must be NUM_LIMBS_256BIT + * limbs in size and less than 2^255 - 19. + */ +void Curve25519::mulA24(limb_t *result, const limb_t *x) +{ +#if !defined(CURVE25519_ASM_AVR) + // The constant a24 = 121665 (0x1DB41) as a limb array. +#if BIGNUMBER_LIMB_8BIT + static limb_t const a24[3] PROGMEM = {0x41, 0xDB, 0x01}; +#elif BIGNUMBER_LIMB_16BIT + static limb_t const a24[2] PROGMEM = {0xDB41, 0x0001}; +#elif BIGNUMBER_LIMB_32BIT || BIGNUMBER_LIMB_64BIT + static limb_t const a24[1] PROGMEM = {0x0001DB41}; +#else + #error "limb_t must be 8, 16, 32, or 64 bits in size" +#endif + #define NUM_A24_LIMBS (sizeof(a24) / sizeof(limb_t)) + + // Multiply the lowest limb of a24 by x and zero-extend into the result. + limb_t temp[NUM_LIMBS_512BIT]; + uint8_t i, j; + dlimb_t carry = 0; + limb_t word = pgm_read_limb(&(a24[0])); + const limb_t *xx = x; + limb_t *tt = temp; + for (i = 0; i < NUM_LIMBS_256BIT; ++i) { + carry += ((dlimb_t)(*xx++)) * word; + *tt++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + *tt = (limb_t)carry; + + // Multiply and add the remaining limbs of a24. + for (i = 1; i < NUM_A24_LIMBS; ++i) { + word = pgm_read_limb(&(a24[i])); + carry = 0; + xx = x; + tt = temp + i; + for (j = 0; j < NUM_LIMBS_256BIT; ++j) { + carry += ((dlimb_t)(*xx++)) * word; + carry += *tt; + *tt++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + *tt = (limb_t)carry; + } +#else + limb_t temp[NUM_LIMBS_512BIT]; + #define NUM_A24_LIMBS ((3 + sizeof(limb_t) - 1) / sizeof(limb_t)) + __asm__ __volatile__ ( + // Load the two low bytes of a24 into r16 and r17. + // The third byte is 0x01 which we can deal with implicitly. + "ldi r16,0x41\n" + "ldi r17,0xDB\n" + + // Iterate over the bytes of "x" and multiply each with a24. + "ldi r25,32\n" // 32 bytes in "x" + "clr r22\n" // r22 = 0 + "clr r18\n" // r18:r19:r11 = 0 (carry) + "clr r19\n" + "clr r11\n" + "1:\n" + "ld r21,X+\n" // r21 = *x++ + "mul r21,r16\n" // r8:r9 = r21 * a24[0] + "movw r8,r0\n" + "mul r21,r17\n" // r9:r1 += r21 * a24[1] + "add r9,r0\n" + "adc r1,r21\n" // r1:r10 += r21 * a24[2] (implicitly 1) + "mov r10,r22\n" + "adc r10,r22\n" + "add r8,r18\n" // r8:r9:r1:r10 += carry + "adc r9,r19\n" + "adc r1,r11\n" + "adc r10,r22\n" + "st Z+,r8\n" // *tt++ = r8 + "mov r18,r9\n" // carry = r9:r1:r10 + "mov r19,r1\n" + "mov r11,r10\n" + "dec r25\n" + "brne 1b\n" + "st Z,r18\n" // *tt = carry + "std Z+1,r19\n" + "std Z+2,r11\n" +#if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT + "std Z+3,r22\n" // Zero pad to a limb boundary +#endif + + // Restore __zero_reg__ + "clr __zero_reg__\n" + + : : "x"(x), "z"(temp) + : "r8", "r9", "r10", "r11", "r16", "r17", "r18", "r19", + "r20", "r21", "r22", "r25" + ); +#endif + + // Reduce the intermediate result modulo 2^255 - 19. + reduce(result, temp, NUM_A24_LIMBS); + strict_clean(temp); +} + +/** + * \brief Multiplies two values and then reduces the result modulo 2^255 - 19, + * where one of the values is in program memory. + * + * \param result The result, which must be NUM_LIMBS_256BIT limbs in size + * and can be the same array as \a x or \a y. + * \param x The first value to multiply, which must be NUM_LIMBS_256BIT limbs + * in size and less than 2^255 - 19. + * \param y The second value to multiply, which must be NUM_LIMBS_256BIT limbs + * in size and less than 2^255 - 19. This array must be in program memory. + */ +void Curve25519::mul_P(limb_t *result, const limb_t *x, const limb_t *y) +{ + limb_t temp[NUM_LIMBS_512BIT]; + uint8_t i, j; + dlimb_t carry; + limb_t word; + const limb_t *xx; + limb_t *tt; + + // Multiply the lowest word of y by x. + carry = 0; + word = pgm_read_limb(&(y[0])); + xx = x; + tt = temp; + for (i = 0; i < NUM_LIMBS_256BIT; ++i) { + carry += ((dlimb_t)(*xx++)) * word; + *tt++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + *tt = (limb_t)carry; + + // Multiply and add the remaining words of y by x. + for (i = 1; i < NUM_LIMBS_256BIT; ++i) { + word = pgm_read_limb(&(y[i])); + carry = 0; + xx = x; + tt = temp + i; + for (j = 0; j < NUM_LIMBS_256BIT; ++j) { + carry += ((dlimb_t)(*xx++)) * word; + carry += *tt; + *tt++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + *tt = (limb_t)carry; + } + + // Reduce the intermediate result modulo 2^255 - 19. + reduce(result, temp, NUM_LIMBS_256BIT); + strict_clean(temp); +} + +/** + * \brief Adds two values and then reduces the result modulo 2^255 - 19. + * + * \param result The result, which must be NUM_LIMBS_256BIT limbs in size + * and can be the same array as \a x or \a y. + * \param x The first value to multiply, which must be NUM_LIMBS_256BIT + * limbs in size and less than 2^255 - 19. + * \param y The second value to multiply, which must be NUM_LIMBS_256BIT + * limbs in size and less than 2^255 - 19. + */ +void Curve25519::add(limb_t *result, const limb_t *x, const limb_t *y) +{ +#if !defined(CURVE25519_ASM_AVR) + dlimb_t carry = 0; + uint8_t posn; + limb_t *rr = result; + + // Add the two arrays to obtain the intermediate result. + for (posn = 0; posn < NUM_LIMBS_256BIT; ++posn) { + carry += *x++; + carry += *y++; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } +#else // CURVE25519_ASM_AVR + __asm__ __volatile__ ( + // Save Y and copy the "result" pointer into it. + "push r28\n" + "push r29\n" + "mov r28,%A2\n" + "mov r29,%B2\n" + + // Unroll the loop to operate on 4 bytes at a time (8 iterations). + "ldi r24,8\n" // Loop counter. + "clr r25\n" // carry = 0 + "1:\n" + "ld r16,X+\n" // r16:r19 = *x++ + "ld r17,X+\n" + "ld r18,X+\n" + "ld r19,X+\n" + "ld r20,Z+\n" // r20:r23 = *y++ + "ld r21,Z+\n" + "ld r22,Z+\n" + "ld r23,Z+\n" + "add r16,r25\n" // r16:r19:carry += carry + "adc r17,__zero_reg__\n" + "adc r18,__zero_reg__\n" + "adc r19,__zero_reg__\n" + "mov r25,__zero_reg__\n" + "adc r25,r25\n" + "add r16,r20\n" // r16:r19:carry += r20:r23 + "adc r17,r21\n" + "adc r18,r22\n" + "adc r19,r23\n" + "adc r25,__zero_reg__\n" + "st Y+,r16\n" // *rr++ = r16:r23 + "st Y+,r17\n" + "st Y+,r18\n" + "st Y+,r19\n" + "dec r24\n" + "brne 1b\n" + + // Restore Y. + "pop r29\n" + "pop r28\n" + : : "x"(x), "z"(y), "r"(result) + : "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25" + ); +#endif // CURVE25519_ASM_AVR + + // Reduce the result using the quick trial subtraction method. + reduceQuick(result); +} + +/** + * \brief Subtracts two values and then reduces the result modulo 2^255 - 19. + * + * \param result The result, which must be NUM_LIMBS_256BIT limbs in size + * and can be the same array as \a x or \a y. + * \param x The first value to multiply, which must be NUM_LIMBS_256BIT + * limbs in size and less than 2^255 - 19. + * \param y The second value to multiply, which must be NUM_LIMBS_256BIT + * limbs in size and less than 2^255 - 19. + */ +void Curve25519::sub(limb_t *result, const limb_t *x, const limb_t *y) +{ +#if !defined(CURVE25519_ASM_AVR) + dlimb_t borrow; + uint8_t posn; + limb_t *rr = result; + + // Subtract y from x to generate the intermediate result. + borrow = 0; + for (posn = 0; posn < NUM_LIMBS_256BIT; ++posn) { + borrow = ((dlimb_t)(*x++)) - (*y++) - ((borrow >> LIMB_BITS) & 0x01); + *rr++ = (limb_t)borrow; + } + + // If we had a borrow, then the result has gone negative and we + // have to add 2^255 - 19 to the result to make it positive again. + // The top bits of "borrow" will be all 1's if there is a borrow + // or it will be all 0's if there was no borrow. Easiest is to + // conditionally subtract 19 and then mask off the high bit. + rr = result; + borrow = (borrow >> LIMB_BITS) & 19U; + borrow = ((dlimb_t)(*rr)) - borrow; + *rr++ = (limb_t)borrow; + for (posn = 1; posn < NUM_LIMBS_256BIT; ++posn) { + borrow = ((dlimb_t)(*rr)) - ((borrow >> LIMB_BITS) & 0x01); + *rr++ = (limb_t)borrow; + } + *(--rr) &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1); +#else // CURVE25519_ASM_AVR + __asm__ __volatile__ ( + // Save Y and copy the "result" pointer into it. + "push r28\n" + "push r29\n" + "mov r28,%A2\n" + "mov r29,%B2\n" + + // Unroll the sub loop to operate on 4 bytes at a time (8 iterations). + "ldi r24,8\n" // Loop counter. + "clr r25\n" // borrow = 0 + "1:\n" + "ld r16,X+\n" // r16:r19 = *x++ + "ld r17,X+\n" + "ld r18,X+\n" + "ld r19,X+\n" + "ld r20,Z+\n" // r20:r23 = *y++ + "ld r21,Z+\n" + "ld r22,Z+\n" + "ld r23,Z+\n" + "sub r16,r25\n" // r16:r19:borrow -= borrow + "sbc r17,__zero_reg__\n" + "sbc r18,__zero_reg__\n" + "sbc r19,__zero_reg__\n" + "mov r25,__zero_reg__\n" + "sbc r25,__zero_reg__\n" + "sub r16,r20\n" // r16:r19:borrow -= r20:r23 + "sbc r17,r21\n" + "sbc r18,r22\n" + "sbc r19,r23\n" + "sbc r25,__zero_reg__\n" + "st Y+,r16\n" // *rr++ = r16:r23 + "st Y+,r17\n" + "st Y+,r18\n" + "st Y+,r19\n" + "andi r25,1\n" // Only need the bottom bit of the borrow + "dec r24\n" + "brne 1b\n" + + // If there was a borrow, then we need to add 2^255 - 19 back. + // We conditionally subtract 19 and then mask off the high bit. + "neg r25\n" // borrow = mask(borrow) & 19 + "andi r25,19\n" + "sbiw r28,32\n" // Point Y back to the start of "result" + "ldi r24,8\n" + "2:\n" + "ld r16,Y\n" // r16:r19 = *rr + "ldd r17,Y+1\n" + "ldd r18,Y+2\n" + "ldd r19,Y+3\n" + "sub r16,r25\n" + "sbc r17,__zero_reg__\n" // r16:r19:borrow -= borrow + "sbc r18,__zero_reg__\n" + "sbc r19,__zero_reg__\n" + "mov r25,__zero_reg__\n" + "sbc r25,__zero_reg__\n" + "andi r25,1\n" + "st Y+,r16\n" // *r++ = r16:r19 + "st Y+,r17\n" + "st Y+,r18\n" + "st Y+,r19\n" + "dec r24\n" + "brne 2b\n" + "andi r19,0x7F\n" // Mask off the high bit in the last byte + "sbiw r28,1\n" + "st Y,r19\n" + + // Restore Y. + "pop r29\n" + "pop r28\n" + : : "x"(x), "z"(y), "r"(result) + : "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25" + ); +#endif // CURVE25519_ASM_AVR +} + +/** + * \brief Conditionally swaps two values if a selection value is non-zero. + * + * \param select Non-zero to swap \a x and \a y, zero to leave them unchanged. + * \param x The first value to conditionally swap. + * \param y The second value to conditionally swap. + * + * The swap is performed in a way that it should take the same amount of + * time irrespective of the value of \a select. + * + * \sa cmove() + */ +void Curve25519::cswap(limb_t select, limb_t *x, limb_t *y) +{ +#if !defined(CURVE25519_ASM_AVR) + uint8_t posn; + limb_t dummy; + limb_t sel; + + // Turn "select" into an all-zeroes or all-ones mask. We don't care + // which bit or bits is set in the original "select" value. + sel = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - select) >> LIMB_BITS); + --sel; + + // Swap the two values based on "select". Algorithm from: + // http://tools.ietf.org/html/rfc7748 + for (posn = 0; posn < NUM_LIMBS_256BIT; ++posn) { + dummy = sel & (x[posn] ^ y[posn]); + x[posn] ^= dummy; + y[posn] ^= dummy; + } +#else // CURVE25519_ASM_AVR + __asm__ __volatile__ ( + // Combine all bytes from "select" into one and then turn + // that byte into the "sel" mask in r24. + "clr r24\n" +#if BIGNUMBER_LIMB_8BIT + "sub r24,%2\n" +#elif BIGNUMBER_LIMB_16BIT + "or %A2,%B2\n" + "sub r24,%A2\n" +#elif BIGNUMBER_LIMB_32BIT + "or %A2,%B2\n" + "or %A2,%C2\n" + "or %A2,%D2\n" + "sub r24,%A2\n" +#endif + "mov r24,__zero_reg__\n" + "sbc r24,r24\n" + + // Perform the conditional swap 4 bytes at a time. + "ldi r25,8\n" + "1:\n" + "ld r16,X+\n" // r16:r19 = *x + "ld r17,X+\n" + "ld r18,X+\n" + "ld r19,X\n" + "ld r20,Z\n" // r20:r23 = *y + "ldd r21,Z+1\n" + "ldd r22,Z+2\n" + "ldd r23,Z+3\n" + "mov r12,r16\n" // r12:r15 = (r16:r19 ^ r20:r23) & sel + "mov r13,r17\n" + "mov r14,r18\n" + "mov r15,r19\n" + "eor r12,r20\n" + "eor r13,r21\n" + "eor r14,r22\n" + "eor r15,r23\n" + "and r12,r24\n" + "and r13,r24\n" + "and r14,r24\n" + "and r15,r24\n" + "eor r16,r12\n" // r16:r19 ^= r12:r15 + "eor r17,r13\n" + "eor r18,r14\n" + "eor r19,r15\n" + "eor r20,r12\n" // r20:r23 ^= r12:r15 + "eor r21,r13\n" + "eor r22,r14\n" + "eor r23,r15\n" + "st X,r19\n" // *x++ = r16:r19 + "st -X,r18\n" + "st -X,r17\n" + "st -X,r16\n" + "adiw r26,4\n" + "st Z+,r20\n" // *y++ = r20:r23 + "st Z+,r21\n" + "st Z+,r22\n" + "st Z+,r23\n" + "dec r25\n" + "brne 1b\n" + + : : "x"(x), "z"(y), "r"(select) + : "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", + "r20", "r21", "r22", "r23", "r24", "r25" + ); +#endif // CURVE25519_ASM_AVR +} + +/** + * \brief Conditionally moves \a y into \a x if a selection value is non-zero. + * + * \param select Non-zero to move \a y into \a x, zero to leave \a x unchanged. + * \param x The destination to move into. + * \param y The value to conditionally move. + * + * The move is performed in a way that it should take the same amount of + * time irrespective of the value of \a select. + * + * \sa cswap() + */ +void Curve25519::cmove(limb_t select, limb_t *x, const limb_t *y) +{ +#if !defined(CURVE25519_ASM_AVR) + uint8_t posn; + limb_t dummy; + limb_t sel; + + // Turn "select" into an all-zeroes or all-ones mask. We don't care + // which bit or bits is set in the original "select" value. + sel = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - select) >> LIMB_BITS); + --sel; + + // Move y into x based on "select". Similar to conditional swap above. + for (posn = 0; posn < NUM_LIMBS_256BIT; ++posn) { + dummy = sel & (x[posn] ^ y[posn]); + x[posn] ^= dummy; + } +#else // CURVE25519_ASM_AVR + __asm__ __volatile__ ( + // Combine all bytes from "select" into one and then turn + // that byte into the "sel" mask in r24. + "clr r24\n" +#if BIGNUMBER_LIMB_8BIT + "sub r24,%2\n" +#elif BIGNUMBER_LIMB_16BIT + "or %A2,%B2\n" + "sub r24,%A2\n" +#elif BIGNUMBER_LIMB_32BIT + "or %A2,%B2\n" + "or %A2,%C2\n" + "or %A2,%D2\n" + "sub r24,%A2\n" +#endif + "mov r24,__zero_reg__\n" + "sbc r24,r24\n" + + // Perform the conditional move 4 bytes at a time. + "ldi r25,8\n" + "1:\n" + "ld r16,X+\n" // r16:r19 = *x + "ld r17,X+\n" + "ld r18,X+\n" + "ld r19,X\n" + "ld r20,Z+\n" // r20:r23 = *y++ + "ld r21,Z+\n" + "ld r22,Z+\n" + "ld r23,Z+\n" + "eor r20,r16\n" // r20:r23 = (r16:r19 ^ r20:r23) & sel + "eor r21,r17\n" + "eor r22,r18\n" + "eor r23,r19\n" + "and r20,r24\n" + "and r21,r24\n" + "and r22,r24\n" + "and r23,r24\n" + "eor r16,r20\n" // r16:r19 ^= r20:r23 + "eor r17,r21\n" + "eor r18,r22\n" + "eor r19,r23\n" + "st X,r19\n" // *x++ = r16:r19 + "st -X,r18\n" + "st -X,r17\n" + "st -X,r16\n" + "adiw r26,4\n" + "dec r25\n" + "brne 1b\n" + + : : "x"(x), "z"(y), "r"(select) + : "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25" + ); +#endif // CURVE25519_ASM_AVR +} + +/** + * \brief Raise x to the power of (2^250 - 1). + * + * \param result The result array, which must be NUM_LIMBS_256BIT limbs in size. + * \param x The value to raise. + */ +void Curve25519::pow250(limb_t *result, const limb_t *x) +{ + limb_t t1[NUM_LIMBS_256BIT]; + uint8_t i, j; + + // The big-endian hexadecimal expansion of (2^250 - 1) is: + // 03FFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF + // + // The naive implementation needs to do 2 multiplications per 1 bit and + // 1 multiplication per 0 bit. We can improve upon this by creating a + // pattern 0000000001 ... 0000000001. If we square and multiply the + // pattern by itself we can turn the pattern into the partial results + // 0000000011 ... 0000000011, 0000000111 ... 0000000111, etc. + // This averages out to about 1.1 multiplications per 1 bit instead of 2. + + // Build a pattern of 250 bits in length of repeated copies of 0000000001. + #define RECIP_GROUP_SIZE 10 + #define RECIP_GROUP_BITS 250 // Must be a multiple of RECIP_GROUP_SIZE. + square(t1, x); + for (j = 0; j < (RECIP_GROUP_SIZE - 1); ++j) + square(t1, t1); + mul(result, t1, x); + for (i = 0; i < ((RECIP_GROUP_BITS / RECIP_GROUP_SIZE) - 2); ++i) { + for (j = 0; j < RECIP_GROUP_SIZE; ++j) + square(t1, t1); + mul(result, result, t1); + } + + // Multiply bit-shifted versions of the 0000000001 pattern into + // the result to "fill in" the gaps in the pattern. + square(t1, result); + mul(result, result, t1); + for (j = 0; j < (RECIP_GROUP_SIZE - 2); ++j) { + square(t1, t1); + mul(result, result, t1); + } + + // Clean up and exit. + clean(t1); +} + +/** + * \brief Computes the reciprocal of a number modulo 2^255 - 19. + * + * \param result The result as a array of NUM_LIMBS_256BIT limbs in size. + * This cannot be the same array as \a x. + * \param x The number to compute the reciprocal for. + */ +void Curve25519::recip(limb_t *result, const limb_t *x) +{ + // The reciprocal is the same as x ^ (p - 2) where p = 2^255 - 19. + // The big-endian hexadecimal expansion of (p - 2) is: + // 7FFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFEB + // Start with the 250 upper bits of the expansion of (p - 2). + pow250(result, x); + + // Deal with the 5 lowest bits of (p - 2), 01011, from highest to lowest. + square(result, result); + square(result, result); + mul(result, result, x); + square(result, result); + square(result, result); + mul(result, result, x); + square(result, result); + mul(result, result, x); +} + +/** + * \brief Computes the square root of a number modulo 2^255 - 19. + * + * \param result The result as a array of NUM_LIMBS_256BIT limbs in size. + * This must not overlap with \a x. + * \param x The number to compute the square root for. + * + * For any number \a x, there are two square roots: positive and negative. + * For example, both 2 and -2 are square roots of 4 because 2 * 2 = -2 * -2. + * This function will return one or the other. Callers must determine which + * square root they are interested in and invert the result as necessary. + * + * \note This function is not constant time so it should only be used + * on publicly-known values. + */ +bool Curve25519::sqrt(limb_t *result, const limb_t *x) +{ + // sqrt(-1) mod (2^255 - 19). + static limb_t const numSqrtM1[NUM_LIMBS_256BIT] PROGMEM = { + LIMB_PAIR(0x4A0EA0B0, 0xC4EE1B27), LIMB_PAIR(0xAD2FE478, 0x2F431806), + LIMB_PAIR(0x3DFBD7A7, 0x2B4D0099), LIMB_PAIR(0x4FC1DF0B, 0x2B832480) + }; + limb_t y[NUM_LIMBS_256BIT]; + + // Algorithm from: http://tools.ietf.org/html/rfc7748 + + // Compute a candidate root: result = x^((p + 3) / 8) mod p. + // (p + 3) / 8 = (2^252 - 2) which is 251 one bits followed by a zero: + // 0FFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE + pow250(result, x); + square(result, result); + mul(result, result, x); + square(result, result); + + // Did we get the square root immediately? + square(y, result); + if (memcmp(x, y, sizeof(y)) == 0) { + clean(y); + return true; + } + + // Multiply the result by sqrt(-1) and check again. + mul_P(result, result, numSqrtM1); + square(y, result); + if (memcmp(x, y, sizeof(y)) == 0) { + clean(y); + return true; + } + + // The number does not have a square root. + clean(y); + return false; +} diff --git a/src/Curve25519.h b/src/Curve25519.h new file mode 100644 index 0000000..b95a9fa --- /dev/null +++ b/src/Curve25519.h @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_CURVE25519_h +#define CRYPTO_CURVE25519_h + +#include "BigNumberUtil.h" + +class Ed25519; + +class Curve25519 +{ +public: + static bool eval(uint8_t result[32], const uint8_t s[32], const uint8_t x[32]); + + static void dh1(uint8_t k[32], uint8_t f[32]); + static bool dh2(uint8_t k[32], uint8_t f[32]); + +#if defined(TEST_CURVE25519_FIELD_OPS) +public: +#else +private: +#endif + static uint8_t isWeakPoint(const uint8_t k[32]); + + static void reduce(limb_t *result, limb_t *x, uint8_t size); + static limb_t reduceQuick(limb_t *x); + + static void mulNoReduce(limb_t *result, const limb_t *x, const limb_t *y); + + static void mul(limb_t *result, const limb_t *x, const limb_t *y); + static void square(limb_t *result, const limb_t *x) + { + mul(result, x, x); + } + + static void mulA24(limb_t *result, const limb_t *x); + + static void mul_P(limb_t *result, const limb_t *x, const limb_t *y); + + static void add(limb_t *result, const limb_t *x, const limb_t *y); + static void sub(limb_t *result, const limb_t *x, const limb_t *y); + + static void cswap(limb_t select, limb_t *x, limb_t *y); + static void cmove(limb_t select, limb_t *x, const limb_t *y); + + static void pow250(limb_t *result, const limb_t *x); + static void recip(limb_t *result, const limb_t *x); + static bool sqrt(limb_t *result, const limb_t *x); + + // Constructor and destructor are private - cannot instantiate this class. + Curve25519() {} + ~Curve25519() {} + + friend class Ed25519; +}; + +#endif diff --git a/src/EAX.cpp b/src/EAX.cpp new file mode 100644 index 0000000..d5b3cff --- /dev/null +++ b/src/EAX.cpp @@ -0,0 +1,293 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "EAX.h" +#include "Crypto.h" +#include + +/** + * \class EAXCommon EAX.h + * \brief Concrete base class to assist with implementing EAX for + * 128-bit block ciphers. + * + * References: https://en.wikipedia.org/wiki/EAX_mode, + * http://web.cs.ucdavis.edu/~rogaway/papers/eax.html + * + * \sa EAX + */ + +/** + * \brief Constructs a new cipher in EAX mode. + * + * This constructor must be followed by a call to setBlockCipher(). + */ +EAXCommon::EAXCommon() +{ + state.encPosn = 0; + state.authMode = 0; +} + +EAXCommon::~EAXCommon() +{ + clean(state); +} + +size_t EAXCommon::keySize() const +{ + return omac.blockCipher()->keySize(); +} + +size_t EAXCommon::ivSize() const +{ + // Can use any size but 16 is recommended. + return 16; +} + +size_t EAXCommon::tagSize() const +{ + // Tags can be up to 16 bytes in length. + return 16; +} + +bool EAXCommon::setKey(const uint8_t *key, size_t len) +{ + return omac.blockCipher()->setKey(key, len); +} + +bool EAXCommon::setIV(const uint8_t *iv, size_t len) +{ + // Must have at least 1 byte for the IV. + if (!len) + return false; + + // Hash the IV to create the initial nonce for CTR mode. Also creates B. + omac.initFirst(state.counter); + omac.update(state.counter, iv, len); + omac.finalize(state.counter); + + // The tag is initially the nonce value. Will be XOR'ed with + // the hash of the authenticated and encrypted data later. + memcpy(state.tag, state.counter, 16); + + // Start the hashing context for the authenticated data. + omac.initNext(state.hash, 1); + state.encPosn = 16; + state.authMode = 1; + + // The EAX context is ready to go. + return true; +} + +void EAXCommon::encrypt(uint8_t *output, const uint8_t *input, size_t len) +{ + if (state.authMode) + closeAuthData(); + encryptCTR(output, input, len); + omac.update(state.hash, output, len); +} + +void EAXCommon::decrypt(uint8_t *output, const uint8_t *input, size_t len) +{ + if (state.authMode) + closeAuthData(); + omac.update(state.hash, input, len); + encryptCTR(output, input, len); +} + +void EAXCommon::addAuthData(const void *data, size_t len) +{ + if (state.authMode) + omac.update(state.hash, (const uint8_t *)data, len); +} + +void EAXCommon::computeTag(void *tag, size_t len) +{ + closeTag(); + if (len > 16) + len = 16; + memcpy(tag, state.tag, len); +} + +bool EAXCommon::checkTag(const void *tag, size_t len) +{ + // Can never match if the expected tag length is too long. + if (len > 16) + return false; + + // Compute the final tag and check it. + closeTag(); + return secure_compare(state.tag, tag, len); +} + +void EAXCommon::clear() +{ + clean(state); +} + +/** + * \brief Closes the authenticated data portion of the session and + * starts encryption or decryption. + */ +void EAXCommon::closeAuthData() +{ + // Finalise the OMAC hash and XOR it with the final tag. + omac.finalize(state.hash); + for (uint8_t index = 0; index < 16; ++index) + state.tag[index] ^= state.hash[index]; + state.authMode = 0; + + // Initialise the hashing context for the ciphertext data. + omac.initNext(state.hash, 2); +} + +/** + * \brief Encrypts or decrypts a region using the block cipher in CTR mode. + * + * \param output The output buffer to write to, which may be the same + * buffer as \a input. The \a output buffer must have at least as many + * bytes as the \a input buffer. + * \param input The input buffer to read from. + * \param len The number of bytes to process. + */ +void EAXCommon::encryptCTR(uint8_t *output, const uint8_t *input, size_t len) +{ + while (len > 0) { + // Do we need to start a new block? + if (state.encPosn == 16) { + // Encrypt the counter to create the next keystream block. + omac.blockCipher()->encryptBlock(state.stream, state.counter); + state.encPosn = 0; + + // Increment the counter, taking care not to reveal + // any timing information about the starting value. + // We iterate through the entire counter region even + // if we could stop earlier because a byte is non-zero. + uint16_t temp = 1; + uint8_t index = 16; + while (index > 0) { + --index; + temp += state.counter[index]; + state.counter[index] = (uint8_t)temp; + temp >>= 8; + } + } + + // Encrypt/decrypt the current input block. + uint8_t size = 16 - state.encPosn; + if (size > len) + size = (uint8_t)len; + for (uint8_t index = 0; index < size; ++index) + output[index] = input[index] ^ state.stream[(state.encPosn)++]; + + // Move onto the next block. + len -= size; + input += size; + output += size; + } +} + +void EAXCommon::closeTag() +{ + // If we were only authenticating, then close off auth mode. + if (state.authMode) + closeAuthData(); + + // Finalise the hash over the ciphertext and XOR with the final tag. + omac.finalize(state.hash); + for (uint8_t index = 0; index < 16; ++index) + state.tag[index] ^= state.hash[index]; +} + +/** + * \fn void EAXCommon::setBlockCipher(BlockCipher *cipher) + * \brief Sets the block cipher to use for this EAX object. + * + * \param cipher The block cipher to use to implement EAX mode. + * This object must have a block size of 128 bits (16 bytes). + */ + +/** + * \class EAX EAX.h + * \brief Implementation of the EAX authenticated cipher. + * + * EAX mode converts a block cipher into an authenticated cipher + * that uses the block cipher T to encrypt and authenticate. + * + * The size of the key is determined by the underlying block cipher T. + * The IV is recommended to be 128 bits (16 bytes) in length, but other + * lengths are supported as well. The default tagSize() is 128 bits + * (16 bytes) but the EAX specification does allow smaller tag sizes. + * + * The template parameter T must be a concrete subclass of BlockCipher + * indicating the specific block cipher to use. The block cipher must + * have a block size of 128 bits. For example, the following creates a + * EAX object using AES256 as the underlying cipher and then uses it + * to encrypt and authenticate a \c plaintext block: + * + * \code + * EAX eax; + * eax.setKey(key, sizeof(key)); + * eax.setIV(iv, sizeof(iv)); + * eax.addAuthData(adata, sizeof(adata)); + * eax.encrypt(ciphertext, plaintext, sizeof(plaintext)); + * eax.computeTag(tag, sizeof(tag)); + * \endcode + * + * The decryption process is almost identical to convert a \c ciphertext and + * \a tag back into plaintext and then check the tag: + * + * \code + * EAX eax; + * eax.setKey(key, sizeof(key)); + * eax.setIV(iv, sizeof(iv)); + * eax.addAuthData(adata, sizeof(adata)); + * eax.decrypt(ciphertext, plaintext, sizeof(plaintext)); + * if (!eax.checkTag(tag, sizeof(tag))) { + * // The data was invalid - do not use it. + * ... + * } + * \endcode + * + * The EAX class can also be used to implement message authentication + * by omitting the plaintext: + * + * \code + * EAX eax; + * eax.setKey(key, sizeof(key)); + * eax.setIV(iv, sizeof(iv)); + * eax.addAuthData(adata1, sizeof(adata1)); + * eax.addAuthData(adata2, sizeof(adata1)); + * ... + * eax.addAuthData(adataN, sizeof(adataN)); + * eax.computeTag(tag, sizeof(tag)); + * \endcode + * + * References: https://en.wikipedia.org/wiki/EAX_mode, + * http://web.cs.ucdavis.edu/~rogaway/papers/eax.html + * + * \sa EAXCommon, GCM + */ + +/** + * \fn EAX::EAX() + * \brief Constructs a new EAX object for the block cipher T. + */ diff --git a/src/EAX.h b/src/EAX.h new file mode 100644 index 0000000..1511a5d --- /dev/null +++ b/src/EAX.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_EAX_h +#define CRYPTO_EAX_h + +#include "AuthenticatedCipher.h" +#include "BlockCipher.h" +#include "OMAC.h" + +class EAXCommon : public AuthenticatedCipher +{ +public: + virtual ~EAXCommon(); + + size_t keySize() const; + size_t ivSize() const; + size_t tagSize() const; + + bool setKey(const uint8_t *key, size_t len); + bool setIV(const uint8_t *iv, size_t len); + + void encrypt(uint8_t *output, const uint8_t *input, size_t len); + void decrypt(uint8_t *output, const uint8_t *input, size_t len); + + void addAuthData(const void *data, size_t len); + + void computeTag(void *tag, size_t len); + bool checkTag(const void *tag, size_t len); + + void clear(); + +protected: + EAXCommon(); + void setBlockCipher(BlockCipher *cipher) + { + omac.setBlockCipher(cipher); + } + +private: + struct { + uint8_t counter[16]; + uint8_t stream[16]; + uint8_t tag[16]; + uint8_t hash[16]; + uint8_t encPosn; + uint8_t authMode; + } state; + OMAC omac; + + void closeAuthData(); + void encryptCTR(uint8_t *output, const uint8_t *input, size_t len); + void closeTag(); +}; + +template +class EAX : public EAXCommon +{ +public: + EAX() { setBlockCipher(&cipher); } + +private: + T cipher; +}; + +#endif diff --git a/src/ECCX08.cpp b/src/ECCX08.cpp index 8bb1a9a..2f20d07 100644 --- a/src/ECCX08.cpp +++ b/src/ECCX08.cpp @@ -18,8 +18,9 @@ */ #include - #include "ECCX08.h" +#include +#include const uint32_t ECCX08Class::_wakeupFrequency = 100000u; // 100 kHz #ifdef __AVR__ @@ -28,6 +29,9 @@ const uint32_t ECCX08Class::_normalFrequency = 400000u; // 400 kHz const uint32_t ECCX08Class::_normalFrequency = 1000000u; // 1 MHz #endif +const int aesInputLength = 16; +const int gfmInputLength = 32; + ECCX08Class::ECCX08Class(TwoWire& wire, uint8_t address) : _wire(&wire), _address(address) @@ -41,8 +45,9 @@ ECCX08Class::~ECCX08Class() int ECCX08Class::begin() { _wire->begin(); - + wakeup(); + idle(); long ver = version() & 0x0F00000; @@ -50,7 +55,6 @@ int ECCX08Class::begin() if (ver != 0x0500000 && ver != 0x0600000) { return 0; } - return 1; } @@ -164,53 +168,197 @@ int ECCX08Class::random(byte data[], size_t length) return 1; } - +/*function generatePrivateKey creates an ECDH key in slot and a corresponding public Key in publicKey + * (in) slot + * (out) publicKey + * + */ int ECCX08Class::generatePrivateKey(int slot, byte publicKey[]) { + byte mode = 0b00000100;//generates a random private key and puts it in the keyslot indicated by slot + int publicKeySize = 64; if (!wakeup()) { - return 0; + return 2; } + + if (!sendCommand(0x40, mode, slot)) { + delay(62); + idle(); + return 3; + } + + delay(62);//3+max time on clock division 00 for the A and B - if (!sendCommand(0x40, 0x04, slot)) { - return 0; + int response = receiveResponseWithErrorCode(publicKey, publicKeySize); + if (response != 1) { + delay(26); + idle(); + return response +10000; } + - delay(115); + delay(26); + + idle(); - if (!receiveResponse(publicKey, 64)) { + return 1; +} +/* A function which uses the GenKey command to derive a public key given a slot with an ECC private key in it. + * parameters: + * slot(in) the slot number of the relevant private key + * publicKey (out) 64 bytes of public key goodness + */ +int ECCX08Class::generatePublicKey(int slot, byte publicKey[]) +{ + int publicKeySize = 64; + byte mode = 0b00000000;//generate a public key based on the keyslot + if (!wakeup()) { return 0; } - delay(1); + if (!sendCommand(0x40, mode, slot)) { + return 3; + } + + delay(115);//3+max time on clock division 00 for the A and B + + + int response = receiveResponseWithErrorCode(publicKey, publicKeySize); + if (response != 1) { + return response +10000; + } + + delay(26); idle(); return 1; } - -int ECCX08Class::generatePublicKey(int slot, byte publicKey[]) +/*A function which gernerates a key and sticks it in tempkey, + * to wipe out old things in tempkey + * + */ +int ECCX08Class::refreshTempKey(byte publicKey[]) { + int slot = 0xFFFF; + int publicKeySize = 64; + byte mode = 0b00000010;//generate a private key in tempkey if (!wakeup()) { return 0; } - if (!sendCommand(0x40, 0x00, slot)) { - return 0; + if (!sendCommand(0x40, mode, slot)) { + return 3; } - delay(115); + delay(62);//3+max time on clock division 00 for the A and B - if (!receiveResponse(publicKey, 64)) { - return 0; + + int response = receiveResponseWithErrorCode(publicKey, publicKeySize); + if (response != 1) { + return response +10000; } - delay(1); + delay(26); idle(); return 1; } + +int ECCX08Class::readPublicKey(uint8_t publicKeySlot, uint8_t publicKey[64]){ + + uint8_t sectionOne[32]; + uint8_t sectionTwo[32]; + uint8_t sectionThree[32]; + uint8_t Zone = 0b10000010; //where bit[7], (1) is read 32 bytes, and bits[1:0] are the Data Zone (10) + int initialAddress; + if (publicKeySlot == 8){ + initialAddress = 0x0040; + } else if(publicKeySlot == 9){ + initialAddress = 0x0048; + } else if(publicKeySlot == 10){ + initialAddress = 0x0050; + } else if(publicKeySlot == 11){ + initialAddress = 0x0058; + } else if(publicKeySlot == 12){ + initialAddress = 0x0060; + } else if(publicKeySlot == 13){ + initialAddress = 0x0068; + } else if(publicKeySlot == 14){ + initialAddress = 0x0070; + } else if(publicKeySlot == 15){ + initialAddress = 0x0078; + } else{ + return -1; + } + + if(!wakeup()){ + return 2; + } + uint8_t temp[32]; + for(int block = 0; block < 3; block+=1){ + uint8_t address = initialAddress+(block*0x0100); //see section 10.5 of the datasheet + + if(!sendCommand(0x2, Zone, address)){ + return 3; + } + delay(26); + receiveResponse(temp, 32); + Serial.printf("\n public Key Section %d: ", block); + for(int index =0; index < 32; index++){ + Serial.printf(" %x",temp[index]); + } + bool test = block < 3; + Serial.printf("\n\n test, block is less than three? %d", test); + } + return 1; +} + +int ECCX08Class::ecdhKeyGen(uint8_t mode, uint8_t keyID, byte publicKey[]) +{ + int publicKeyLength = 64; + if (!wakeup()) { + return 2; + } + if (!sendCommand(0x43, mode, keyID, publicKey, publicKeyLength)) { + return 0; + } + delay(96); //ATECC608A divider 5 timing, 0 is 28, D is 370. + //delay(something)//ATECC608B divider timing. + if (mode ==0b00001100){ //the key goes to the output buffer, output is bigger + + byte output[32]; + int responseResult = receiveResponseWithErrorCode(output, 32); + if (responseResult != 1) { + delay(1); + idle(); + return 2000 + responseResult; + } else{ + receiveResponse(output, 32); + } + } else { + //output is what we get from Receive response, responseResult is the error code + uint8_t output[1]; //in the case that mode bits 3-2 are not 11, the output will be a success or an error + int responseResult = receiveResponse(output, 1); + + //if ecdh gives back 0x00, we are in the clear. Otherwise, it is responding + if (output[0] != 0x00){//oh, do so dereference that pointers made a few lines above to get the nut inside + //return with an error code, otherwise it works and passes 0 as success. + delay(1); + idle(); + + return (int)*output + 3000; + } + } + + delay(1); + idle(); + + return 1; +} + int ECCX08Class::ecdsaVerify(const byte message[], const byte signature[], const byte pubkey[]) { if (!challenge(message)) { @@ -243,6 +391,101 @@ int ECCX08Class::ecSign(int slot, const byte message[], byte signature[]) return 1; } + +/* breif nonce: a function which tells the chip to generate a nonce for the TempKey. + * parameters + * rngMode [in] 0x00: Target is the Temp Key. 0x01: Target is Message Digest Buffer, 0x10 Target is alternate Key buffer + * 0utputMode [in] bool, 1 to replace RandOut with Tempkey or 0 for outputLocation to be where the RNG gets put, + * this is transformed into a two byte array for reasons to do with command formatting on the chip. + * inputNum [in] 20, 32, or 64 bit random number + * sizeOfInput number of bytes in the input + * [out] OutputLocation pointer to where to put the output. + */ +int ECCX08Class::nonce(byte rngMode, bool outputMode, byte inputNum[], int sizeOfInput){ + if (outputMode == 0){ + byte responseType[2] = {0x00,0x00}; + } + else if(outputMode ==1 ){ + byte responseType[2] = {0x00,0x01}; + } + + byte outputData[sizeOfInput]; + + + if(!wakeup()){ + return 2; + } + + + if (!sendCommand(0x16, rngMode, outputMode, inputNum, *outputData)){ + return 3; + } + + delay(26); + + idle(); + return 1; + +} + +int ECCX08Class::aesEncryptECB(uint16_t slot, const byte input[], byte result[]) +{ + // mode: 000 aes-ECB-encrypt + //bits 7 and 6 of the mode are the four possible 16 bit units of a 32 or 64 bit range to check in the keyslot + //presuming 00 lowest and 11 highest, so 01 to 01 for the case that its only 32 bits, but the datasheet is opaque. + return aes(0b00000000, slot, input, aesInputLength, result); +} + +int ECCX08Class::aesDecryptECB(uint16_t slot, const byte input[], byte result[]) +{ + // mode: 001 aes-ECB-decrypt + return aes(0b00000001, slot, input, aesInputLength, result); +} + +// aesMultiply Datasheet Section 11.1 +int ECCX08Class::aesMultiply(uint16_t slot, const byte input[], const byte h[], byte result[]) +{ + //contains H, then input + byte data[gfmInputLength]; + memcpy(data, h, aesInputLength); + memcpy(data + aesInputLength, input, aesInputLength); + // mode: 011 calculate Galois Field Multiple(GFM) on the input data + + return aes(0b00000011, slot, data, gfmInputLength, result); +} + +/* aes Funtion that sends the command to do AES transforms on data. + * parameters: + * mode (in) 0b00000000 for encryption. 0b00000001 for decryption, 0b00000011 for GFM + * slot (in) where the encryption key is. 0xFFFF will look in the tempkey, + * input (in) the input in a byte array + * inputLength (in) the number of bytes in the input. + * result (out) place to put the encrypted or decrypted data + */ +int ECCX08Class::aes(byte mode, uint16_t slot, const byte input[], int inputLength, byte result[]) +{ + if (!wakeup()) { + //block below waits half a second and tries again 5 times. + return 2; + } + + if (!sendCommand(0x51, mode, slot, input, inputLength)) { + return 3; + } + delay(26); + + int response = receiveResponseWithErrorCode(result, aesInputLength); + if (response != 1) { + return response + 10000; + } + + delay(1); + idle(); + + + return 1; +} + int ECCX08Class::beginSHA256() { uint8_t status; @@ -351,6 +594,59 @@ int ECCX08Class::readSlot(int slot, byte data[], int length) return 1; } + + +int ECCX08Class::writePubKey(int slot, const byte pubKey[], WritePubKeyErrors *error){ + //step 1: build the address + //addrs of 2nd and 3rd blocks are +100 each, so 140, 240 + int initialAddress; + if (slot == 8){ + initialAddress = 0x0040; + } else if(slot == 9){ + initialAddress = 0x0048; + } else if(slot == 10){ + initialAddress = 0x0050; + } else if(slot == 11){ + initialAddress = 0x0058; + } else if(slot == 12){ + initialAddress = 0x0060; + } else if(slot == 13){ + initialAddress = 0x0068; + } else if(slot == 14){ + initialAddress = 0x0070; + } else if(slot == 15){ + initialAddress = 0x0078; + } else{ + return -1; + } + //Step 2 pad the public key + // The 64 byte P256 public key gets written to a 72 byte slot in the following pattern + // | Block 1 | Block 2 | Block 3 | + // | Pad: 4 Bytes | PubKey[0:27] | PubKey[28:31] | Pad: 4 Bytes | PubKey[32:55] | PubKey[56:63] | + uint8_t publicKeyPadded[72]; + + memset(publicKeyPadded, 0, sizeof(publicKeyPadded)); + memcpy(&publicKeyPadded[4], &pubKey[0], 32); // Move X to padded position + memcpy(&publicKeyPadded[40], &pubKey[32], 32); // Move Y to padded position + + //then write the padded public key in three steps( you are allowed to write 32 even if there's not space, the chip will not bit-overflow) + for(int block = 0; block < 3; block++){ + uint16_t address = initialAddress+(block*0x0100); + int writeReport = write(2, address, &publicKeyPadded[block*32], 32); + error->success = true; + if (writeReport !=0) { + int complexWriteError = (((block+1)*1000)+writeReport); + error->success = false; + error->loopErrors[block] = complexWriteError; //1000, 2000, or 3000 for which step of write failed + } + delay(22);//delay max 20 at 0 cycle splitting, added 2 for margin. + } + + delay(1); + idle(); + return 1; +} + int ECCX08Class::writeSlot(int slot, const byte data[], int length) { if (slot < 0 || slot > 15) { @@ -358,6 +654,7 @@ int ECCX08Class::writeSlot(int slot, const byte data[], int length) } if (length % 4 != 0) { + Serial.printf("\n lenght: %d, lenth mod 4: %d", length, length %4); return 0; } @@ -369,7 +666,7 @@ int ECCX08Class::writeSlot(int slot, const byte data[], int length) } if (!write(2, addressForSlotOffset(slot, i), &data[i], chunkSize)) { - return 0; + return 2; } } @@ -391,8 +688,18 @@ int ECCX08Class::locked() return 0; } + + int ECCX08Class::writeConfiguration(const byte data[]) { + + if (!wakeup()) { + //block below waits half a second and tries again 5 times. + return 2; + } + + + // skip first 16 bytes, they are not writable for (int i = 16; i < 128; i += 4) { if (i == 84) { @@ -400,14 +707,29 @@ int ECCX08Class::writeConfiguration(const byte data[]) continue; } - if (!write(0, i / 4, &data[i], 4)) { - return 0; + int zone = 0b00000000; //zone[6](encrypted input) not set, otherwise all others 0. + int writeSuccess = sendCommand(0x12, zone, i/4, &data[i],4); + if (writeSuccess != 1){ + return writeSuccess; } + delay(46); } - + delay(1); + idle(); return 1; } +int ECCX08Class::writeConfigChunk(int configIndexNum, byte data[]){ + + if (!wakeup()) { + //block below waits half a second and tries again 5 times. + return 0; + } + + int success = sendCommand(0x12, 0, configIndexNum, data, 4); + return success; +} + int ECCX08Class::readConfiguration(byte data[]) { for (int i = 0; i < 128; i += 32) { @@ -419,8 +741,11 @@ int ECCX08Class::readConfiguration(byte data[]) return 1; } +/*lock will lock the data and config zones of the memory +*/ int ECCX08Class::lock() { + //Public Class lock. // lock config if (!lock(0)) { return 0; @@ -435,21 +760,30 @@ int ECCX08Class::lock() } int ECCX08Class::wakeup() -{ - _wire->setClock(_wakeupFrequency); - _wire->beginTransmission(0x00); - _wire->endTransmission(); - - delayMicroseconds(1500); - - byte response; - - if (!receiveResponse(&response, sizeof(response)) || response != 0x11) { +{ + int count = 0; + while(count < 5){//make wakeup try again if it fails. + count +=1; + _wire->setClock(_wakeupFrequency); + _wire->beginTransmission(0x00); + _wire->endTransmission(); + + delayMicroseconds(1500); + + byte response; + + if (!receiveResponse(&response, sizeof(response)) || response != 0x11) { + + } else { + break; + } + delay(46); + } + if(count == 5){ return 0; } _wire->setClock(_normalFrequency); - return 1; } @@ -480,6 +814,52 @@ int ECCX08Class::idle() return 1; } +/*WIP, Key slot info ought use the info command and a keyslot number +* to tell the user what is in the keyslot. +*/ +int ECCX08Class::keySlotInfo(uint8_t keySlotNum) +{ + if (!wakeup()) { + return 0; + } + uint8_t info; + if (!sendCommand(0x30, 0x01, keySlotNum)) { + return 0; + } + + delay(2); + + if (!receiveResponse(&info, sizeof(info))) { + return 0; + } + + delay(1); + idle(); + return info; +} +/*Wip, tempKeyInfo ought output the information about the state of the information in the tempkey +*/ +int ECCX08Class::tempKeyInfo() +{ + if (!wakeup()) { + return 0; + } + uint8_t info[2]; + if (!sendCommand(0x30, 0x02, 0x0000)) { + return 0; + } + + delay(2); + + if (!receiveResponse(&info, sizeof(info))) { + return 0; + } + + delay(1); + idle(); + return 1; +} + long ECCX08Class::version() { @@ -620,36 +1000,50 @@ int ECCX08Class::read(int zone, int address, byte buffer[], int length) } int ECCX08Class::write(int zone, int address, const byte buffer[], int length) +/* Return success = 1 + * return errors: + * 0: status is bad?!?!? + * 2: device did not wake + * 3: SendCommand failed. + * 4: input was the wrong length + * 5: receive response was bad + */ { uint8_t status; if (!wakeup()) { - return 0; + return 2; } if (length != 4 && length != 32) { - return 0; + return 4; } if (length == 32) { zone |= 0x80; } + Serial.printf("\n :write buffer length: %d", length); + Serial.print("\n :write buffer: "); + for(int bytes =0; bytes beginTransmission(_address); _wire->write(command, commandLength); - if (_wire->endTransmission() != 0) { - return 0; + int endTransmissionReport = _wire->endTransmission(); + if (endTransmissionReport != 0) { + return 100+endTransmissionReport; } - return 1; } @@ -745,6 +1167,47 @@ int ECCX08Class::receiveResponse(void* response, size_t length) return 1; } +/* receiveResponseWithErrorCode checks to see if the response is the expected length, and + * if not, reads the error code from the second byte read back from the chip. + * chip response bytes are Count, 1 to N bytes packet, and 2 bytes checksum. + * When there is an error, the response is 4 bytes long, so this is a function designed + * specifically to work with the responses of the aes command, as anything that responds with + * 1 byte as a successful command will pass right through it. + * + */ +int ECCX08Class::receiveResponseWithErrorCode(void* response, size_t length) +{ + int retries = 20; + size_t responseSize = length + 3; // 1 for length header, 2 for CRC + byte responseBuffer[responseSize]; + + while (_wire->requestFrom((uint8_t)_address, (size_t)responseSize, (bool)true) != responseSize && retries--); + + responseBuffer[0] = _wire->read(); + + // make sure length matches responseBuffer[0] ought be the lenght of the code in the response. + if (responseBuffer[0] != responseSize) { + //if the lenght is not what we expected, we expect a 4 byte error message where the second byte is the error + return (int)_wire->read() + responseBuffer[0]*100;//wire-read will be the second slot because it has already been triggered once. + //lenght*100 clears the first two digits for the error code. + + } + + for (size_t i = 1; _wire->available(); i++) { + responseBuffer[i] = _wire->read(); + } + + // verify CRC + uint16_t responseCrc = responseBuffer[length + 1] | (responseBuffer[length + 2] << 8); + + if (responseCrc != crc16(responseBuffer, responseSize - 2)) { + return 3000; + } + + memcpy(response, &responseBuffer[1], length); + return 1;//no problems detected +} + uint16_t ECCX08Class::crc16(const byte data[], size_t length) { if (data == NULL || length == 0) { @@ -778,4 +1241,4 @@ uint16_t ECCX08Class::crc16(const byte data[], size_t length) ECCX08Class ECCX08(CRYPTO_WIRE, 0x60); #else ECCX08Class ECCX08(Wire, 0x60); -#endif \ No newline at end of file +#endif diff --git a/src/ECCX08.h b/src/ECCX08.h index 50490a5..047778e 100644 --- a/src/ECCX08.h +++ b/src/ECCX08.h @@ -23,6 +23,12 @@ #include #include +struct WritePubKeyErrors +{ + bool success; + int loopErrors[3]; +}; + class ECCX08Class { public: @@ -41,10 +47,21 @@ class ECCX08Class int generatePrivateKey(int slot, byte publicKey[]); int generatePublicKey(int slot, byte publicKey[]); - + int refreshTempKey(byte publicKey[]); + int readPublicKey(uint8_t publicKeySlot, uint8_t publicKey[64]); + int ecdhKeyGen(uint8_t mode, uint8_t keyID, byte publicKey[]); + //byte[] ecdhKeyGen(int slot, byte mode, byte keyID[], byte dataX[], byte dataY[]); + int ecdsaVerify(const byte message[], const byte signature[], const byte pubkey[]); int ecSign(int slot, const byte message[], byte signature[]); + int nonce(byte rngMode, bool outputMode, byte inputNum[], int sizeOfInput); + //input is plaintext. this function writes ciphertext to result + int aes(byte mode, uint16_t slot, const byte input[], int inputLength, byte result[]); + int aesEncryptECB(uint16_t slot, const byte input[], byte result[]); + int aesDecryptECB(uint16_t slot, const byte input[], byte result[]); + int aesMultiply(uint16_t slot, const byte input[], const byte h[], byte result[]); + int beginSHA256(); int updateSHA256(const byte data[]); // 64 bytes int endSHA256(byte result[]); @@ -54,9 +71,14 @@ class ECCX08Class int writeSlot(int slot, const byte data[], int length); int locked(); + int lockSlot(uint8_t mode); int writeConfiguration(const byte data[]); + int writeConfigChunk(int configIndexNum, byte data[]); + int writePubKey(int slot, const byte pubKey[],WritePubKeyErrors *error); int readConfiguration(byte data[]); int lock(); + int tempKeyInfo(); + int keySlotInfo(uint8_t keySlotNum); private: int wakeup(); @@ -68,6 +90,7 @@ class ECCX08Class int verify(const byte signature[], const byte pubkey[]); int sign(int slot, byte signature[]); + int read(int zone, int address, byte buffer[], int length); int write(int zone, int address, const byte buffer[], int length); int lock(int zone); @@ -76,6 +99,7 @@ class ECCX08Class int sendCommand(uint8_t opcode, uint8_t param1, uint16_t param2, const byte data[] = NULL, size_t dataLength = 0); int receiveResponse(void* response, size_t length); + int receiveResponseWithErrorCode(void* response, size_t length); uint16_t crc16(const byte data[], size_t length); private: diff --git a/src/Ed25519.cpp b/src/Ed25519.cpp new file mode 100644 index 0000000..5732e73 --- /dev/null +++ b/src/Ed25519.cpp @@ -0,0 +1,642 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "Ed25519.h" +#include "Curve25519.h" +#include "Crypto.h" +#include "RNG.h" +#include "utility/LimbUtil.h" +#include + +/** + * \class Ed25519 Ed25519.h + * \brief Digital signatures based on the elliptic curve modulo 2^255 - 19. + * + * The first step in creating a digital signature with Ed25519 is to + * generate a key pair: + * + * \code + * uint8_t privateKey[32]; + * uint8_t publicKey[32]; + * + * Ed25519::generatePrivateKey(privateKey); + * Ed25519::derivePublicKey(publicKey, privateKey); + * \endcode + * + * The application can store both the private and public key for later + * signing operations. Or it can store just the private key and then + * derive the public key at the point where signing is to occur. + * + * Message signing produces a 64-byte signature as follows: + * + * \code + * uint8_t message[N]; + * uint8_t signature[64]; + * + * Ed25519::sign(signature, privateKey, publicKey, message, N); + * \endcode + * + * And then to verify the signature: + * + * \code + * if (!Ed25519::verify(signature, publicKey, message, N)) { + * // The signature is invalid. + * ... + * } + * \endcode + * + * \note The public functions in this class need a substantial amount of + * stack space to store intermediate results while the curve function is + * being evaluated. About 1.5k of free stack space is recommended for safety. + * + * References: https://tools.ietf.org/html/draft-irtf-cfrg-eddsa-05 + * + * \sa Curve25519 + */ + +/** @cond */ + +// 37095705934669439343138083508754565189542113879843219016388785533085940283555 +static limb_t const numD[NUM_LIMBS_256BIT] PROGMEM = { + LIMB_PAIR(0x135978A3, 0x75EB4DCA), LIMB_PAIR(0x4141D8AB, 0x00700A4D), + LIMB_PAIR(0x7779E898, 0x8CC74079), LIMB_PAIR(0x2B6FFE73, 0x52036CEE) +}; + +// d * 2 +static limb_t const numDx2[NUM_LIMBS_256BIT] PROGMEM = { + LIMB_PAIR(0x26B2F159, 0xEBD69B94), LIMB_PAIR(0x8283B156, 0x00E0149A), + LIMB_PAIR(0xEEF3D130, 0x198E80F2), LIMB_PAIR(0x56DFFCE7, 0x2406D9DC) +}; + +// Extended homogenous co-ordinates for the base point. +static limb_t const numBx[NUM_LIMBS_256BIT] PROGMEM = { + LIMB_PAIR(0x8F25D51A, 0xC9562D60), LIMB_PAIR(0x9525A7B2, 0x692CC760), + LIMB_PAIR(0xFDD6DC5C, 0xC0A4E231), LIMB_PAIR(0xCD6E53FE, 0x216936D3) +}; +static limb_t const numBy[NUM_LIMBS_256BIT] PROGMEM = { + LIMB_PAIR(0x66666658, 0x66666666), LIMB_PAIR(0x66666666, 0x66666666), + LIMB_PAIR(0x66666666, 0x66666666), LIMB_PAIR(0x66666666, 0x66666666) +}; +static limb_t const numBz[NUM_LIMBS_256BIT] PROGMEM = { + LIMB_PAIR(0x00000001, 0x00000000), LIMB_PAIR(0x00000000, 0x00000000), + LIMB_PAIR(0x00000000, 0x00000000), LIMB_PAIR(0x00000000, 0x00000000) +}; +static limb_t const numBt[NUM_LIMBS_256BIT] PROGMEM = { + LIMB_PAIR(0xA5B7DDA3, 0x6DDE8AB3), LIMB_PAIR(0x775152F5, 0x20F09F80), + LIMB_PAIR(0x64ABE37D, 0x66EA4E8E), LIMB_PAIR(0xD78B7665, 0x67875F0F) +}; + +// 2^252 + 27742317777372353535851937790883648493 +static limb_t const numQ[NUM_LIMBS_256BIT] PROGMEM = { + LIMB_PAIR(0x5CF5D3ED, 0x5812631A), LIMB_PAIR(0xA2F79CD6, 0x14DEF9DE), + LIMB_PAIR(0x00000000, 0x00000000), LIMB_PAIR(0x00000000, 0x10000000) +}; + +/** @endcond */ + +/** + * \brief Signs a message using a specific Ed25519 private key. + * + * \param signature The signature value. + * \param privateKey The private key to use to sign the message. + * \param publicKey The public key corresponding to \a privateKey. + * \param message Points to the message to be signed. + * \param len The length of the \a message to be signed. + * + * \sa verify(), derivePublicKey() + */ +void Ed25519::sign(uint8_t signature[64], const uint8_t privateKey[32], + const uint8_t publicKey[32], const void *message, size_t len) +{ + SHA512 hash; + uint8_t *buf = (uint8_t *)(hash.state.w); // Reuse hash buffer to save memory. + limb_t a[NUM_LIMBS_256BIT]; + limb_t r[NUM_LIMBS_256BIT]; + limb_t k[NUM_LIMBS_256BIT]; + limb_t t[NUM_LIMBS_512BIT + 1]; + Point rB; + + // Derive the secret scalar a and the message prefix from the private key. + deriveKeys(&hash, a, privateKey); + + // Hash the prefix and the message to derive r. + hash.reset(); + hash.update(buf + 32, 32); + hash.update(message, len); + hash.finalize(buf, 0); + reduceQFromBuffer(r, buf, t); + + // Encode rB into the first half of the signature buffer as R. + mul(rB, r); + encodePoint(signature, rB); + + // Hash R, A, and the message to get k. + hash.reset(); + hash.update(signature, 32); // R + hash.update(publicKey, 32); // A + hash.update(message, len); + hash.finalize(buf, 0); + reduceQFromBuffer(k, buf, t); + + // Compute s = (r + k * a) mod q. + Curve25519::mulNoReduce(t, k, a); + t[NUM_LIMBS_512BIT] = 0; + reduceQ(t, t); + BigNumberUtil::add(t, t, r, NUM_LIMBS_256BIT); + BigNumberUtil::reduceQuick_P(t, t, numQ, NUM_LIMBS_256BIT); + BigNumberUtil::packLE(signature + 32, 32, t, NUM_LIMBS_256BIT); + + // Clean up. + clean(a); + clean(r); + clean(k); + clean(t); + clean(rB); +} + +/** + * \brief Verifies a signature using a specific Ed25519 public key. + * + * \param signature The signature value to be verified. + * \param publicKey The public key to use to verify the signature. + * \param message The message whose signature is to be verified. + * \param len The length of the \a message to be verified. + * + * \return Returns true if the \a signature is valid for \a message; + * or false if the \a signature is not valid. + * + * \sa sign() + */ +bool Ed25519::verify(const uint8_t signature[64], const uint8_t publicKey[32], + const void *message, size_t len) +{ + SHA512 hash; + Point A; + Point R; + Point sB; + Point kA; + uint8_t *k = (uint8_t *)(hash.state.w); // Reuse hash buffer to save memory. + bool result = false; + + // Decode the public key and the R component of the signature. + if (decodePoint(A, publicKey) && decodePoint(R, signature)) { + // Reconstruct the k value from the signing step. + hash.reset(); + hash.update(signature, 32); + hash.update(publicKey, 32); + hash.update(message, len); + hash.finalize(k, 0); + + // Calculate s * B. The s value is stored temporarily in kA.t. + BigNumberUtil::unpackLE(kA.t, NUM_LIMBS_256BIT, signature + 32, 32); + mul(sB, kA.t, false); + + // Calculate R + k * A. We don't need sB.t in equal() below, + // so we reuse that as a temporary buffer when reducing k. + reduceQFromBuffer(sB.t, k, kA.x); + mul(kA, sB.t, A, false); + add(R, kA); + + // Compare s * B and R + k * A for equality. + result = equal(sB, R); + } + + // Clean up and exit. + clean(A); + clean(R); + clean(sB); + clean(kA); + return result; +} + +/** + * \brief Generates a private key for Ed25519 signing operations. + * + * \param privateKey The resulting private key. + * + * The private key is generated with \link RNGClass::rand() RNG.rand()\endlink. + * It is the caller's responsibility to ensure that the global random number + * pool has sufficient entropy to generate the 32 bytes of the key safely + * before calling this function. + * + * \sa derivePublicKey() + */ +void Ed25519::generatePrivateKey(uint8_t privateKey[32]) +{ + RNG.rand(privateKey, 32); +} + +/** + * \brief Derives the public key from a private key. + * + * \param publicKey The public key. + * \param privateKey The private key. + * + * \sa generatePrivateKey() + */ +void Ed25519::derivePublicKey(uint8_t publicKey[32], const uint8_t privateKey[32]) +{ + SHA512 hash; + limb_t a[NUM_LIMBS_256BIT]; + Point ptA; + + // Derive the secret scalar a from the private key. + deriveKeys(&hash, a, privateKey); + + // Compute the point A = aB and encode it. + mul(ptA, a); + encodePoint(publicKey, ptA); + + // Clean up and exit. + clean(a); + clean(ptA); +} + +/** + * \brief Reduces a number modulo q that was specified in a 512 bit buffer. + * + * \param result The result array, which must be NUM_LIMBS_256BIT limbs in size. + * \param buf The buffer containing the value to reduce in little-endian order. + * \param temp A temporary buffer of at least NUM_LIMBS_512BIT + 1 in size. + * + * \sa reduceQ() + */ +void Ed25519::reduceQFromBuffer(limb_t *result, const uint8_t buf[64], limb_t *temp) +{ + BigNumberUtil::unpackLE(temp, NUM_LIMBS_512BIT, buf, 64); + temp[NUM_LIMBS_512BIT] = 0; + reduceQ(result, temp); +} + +/** + * \brief Reduces a number modulo q. + * + * \param result The result array, which must be NUM_LIMBS_256BIT limbs in size. + * \param r The value to reduce, which must be NUM_LIMBS_512BIT + 1 + * limbs in size. + * + * The \a r array will be modified by this function as a side effect of + * the division. It is allowed for \a result to be the same as \a r. + * + * \sa reduceQFromBuffer() + */ +void Ed25519::reduceQ(limb_t *result, limb_t *r) +{ + // Algorithm from: http://en.wikipedia.org/wiki/Barrett_reduction + // + // We assume that r is less than or equal to (q - 1)^2. + // + // We want to compute result = r mod q. Find the smallest k such + // that 2^k > q. In our case, k = 253. Then set m = floor(4^k / q) + // and let r = r - q * floor(m * r / 4^k). This will be the result + // or it will be at most one subtraction of q away from the result. + // + // Note: 4^k = 4^253 = 2^506 = 2^512/2^6. We can more easily compute + // the result we want if we set m = floor(4^k * 2^6 / q) instead and + // then r = r - q * floor(m * r / 2^512). Because the slight extra + // precision in m, r is at most two subtractions of q away from the + // final result. + static limb_t const numM[NUM_LIMBS_256BIT + 1] PROGMEM = { + LIMB_PAIR(0x0A2C131B, 0xED9CE5A3), LIMB_PAIR(0x086329A7, 0x2106215D), + LIMB_PAIR(0xFFFFFFEB, 0xFFFFFFFF), LIMB_PAIR(0xFFFFFFFF, 0xFFFFFFFF), + 0x0F + }; + limb_t temp[NUM_LIMBS_512BIT + NUM_LIMBS_256BIT + 1]; + + // Multiply r by m. + BigNumberUtil::mul_P(temp, r, NUM_LIMBS_512BIT, numM, NUM_LIMBS_256BIT + 1); + + // Multiply (m * r) / 2^512 by q and subtract it from r. + // We can ignore the high words of the subtraction result + // because they will all turn into zero after the subtraction. + BigNumberUtil::mul_P(temp, temp + NUM_LIMBS_512BIT, NUM_LIMBS_256BIT + 1, + numQ, NUM_LIMBS_256BIT); + BigNumberUtil::sub(r, r, temp, NUM_LIMBS_256BIT); + + // Perform two subtractions of q from the result to reduce it. + BigNumberUtil::reduceQuick_P(result, r, numQ, NUM_LIMBS_256BIT); + BigNumberUtil::reduceQuick_P(result, result, numQ, NUM_LIMBS_256BIT); + + // Clean up and exit. + clean(temp); +} + +/** + * \brief Multiplies a value by a curve point. + * + * \param result The result of the multiplication. + * \param s The value, which must be NUM_LIMBS_256BIT limbs in size. + * \param p The curve point, which will be modified by this function. + * \param constTime Set to true if the evaluation must be constant-time + * because \a s is a secret value. + */ +void Ed25519::mul(Point &result, const limb_t *s, Point &p, bool constTime) +{ + Point q; + limb_t A[NUM_LIMBS_256BIT]; + limb_t B[NUM_LIMBS_256BIT]; + limb_t C[NUM_LIMBS_256BIT]; + limb_t D[NUM_LIMBS_256BIT]; + limb_t mask, select; + uint8_t sposn, t; + + // Initialize the result to (0, 1, 1, 0). + memset(&result, 0, sizeof(Point)); + result.y[0] = 1; + result.z[0] = 1; + + // Iterate over the 255 bits of "s" to calculate "s * p". + mask = 1; + sposn = 0; + for (t = 255; t > 0; --t) { + // Add p to the result to produce q. The specification refers + // to temporary variables A to H. We can dispense with E to H + // by using B, D, q.z, and q.t to hold those values temporarily. + select = s[sposn] & mask; + if (constTime || select) { + Curve25519::sub(A, result.y, result.x); + Curve25519::sub(C, p.y, p.x); + Curve25519::mul(A, A, C); + Curve25519::add(B, result.y, result.x); + Curve25519::add(C, p.y, p.x); + Curve25519::mul(B, B, C); + Curve25519::mul(C, result.t, p.t); + Curve25519::mul_P(C, C, numDx2); + Curve25519::mul(D, result.z, p.z); + Curve25519::add(D, D, D); + Curve25519::sub(q.t, B, A); // E = B - A + Curve25519::sub(q.z, D, C); // F = D - C + Curve25519::add(D, D, C); // G = D + C + Curve25519::add(B, B, A); // H = B + A + if (constTime) { + // Put the intermediate value into q. + Curve25519::mul(q.x, q.t, q.z); // q.x = E * F + Curve25519::mul(q.y, D, B); // q.y = G * H + Curve25519::mul(q.z, q.z, D); // q.z = F * G + Curve25519::mul(q.t, q.t, B); // q.t = E * H + + // Copy q into the result if the current bit of s is 1. + Curve25519::cmove(select, result.x, q.x); + Curve25519::cmove(select, result.y, q.y); + Curve25519::cmove(select, result.z, q.z); + Curve25519::cmove(select, result.t, q.t); + } else { + // Put the intermediate value directly into the result. + Curve25519::mul(result.x, q.t, q.z); // q.x = E * F + Curve25519::mul(result.y, D, B); // q.y = G * H + Curve25519::mul(result.z, q.z, D); // q.z = F * G + Curve25519::mul(result.t, q.t, B); // q.t = E * H + } + } + + // Double p for the next iteration. + Curve25519::sub(A, p.y, p.x); + Curve25519::square(A, A); + Curve25519::add(B, p.y, p.x); + Curve25519::square(B, B); + Curve25519::square(C, p.t); + Curve25519::mul_P(C, C, numDx2); + Curve25519::square(D, p.z); + Curve25519::add(D, D, D); + Curve25519::sub(p.t, B, A); // E = B - A + Curve25519::sub(p.z, D, C); // F = D - C + Curve25519::add(D, D, C); // G = D + C + Curve25519::add(B, B, A); // H = B + A + Curve25519::mul(p.x, p.t, p.z); // p.x = E * F + Curve25519::mul(p.y, D, B); // p.y = G * H + Curve25519::mul(p.z, p.z, D); // p.z = F * G + Curve25519::mul(p.t, p.t, B); // p.t = E * H + + // Move onto the next bit of s from lowest to highest. + if (mask != (((limb_t)1) << (LIMB_BITS - 1))) { + mask <<= 1; + } else { + ++sposn; + mask = 1; + } + } + + // Clean up. + clean(q); + clean(A); + clean(B); + clean(C); + clean(D); +} + +/** + * \brief Multiplies a value by the base point of the curve. + * + * \param result The result of the multiplication. + * \param s The value, which must be NUM_LIMBS_256BIT limbs in size. + * \param constTime Set to true if the evaluation must be constant-time + * because \a s is a secret values. + */ +void Ed25519::mul(Point &result, const limb_t *s, bool constTime) +{ + Point P; + memcpy_P(P.x, numBx, sizeof(P.x)); + memcpy_P(P.y, numBy, sizeof(P.y)); + memcpy_P(P.z, numBz, sizeof(P.z)); + memcpy_P(P.t, numBt, sizeof(P.t)); + mul(result, s, P, constTime); + clean(P); +} + +/** + * \brief Adds two curve points. + * + * \param p The first point and the result. + * \param q The second point. + */ +void Ed25519::add(Point &p, const Point &q) +{ + limb_t A[NUM_LIMBS_256BIT]; + limb_t B[NUM_LIMBS_256BIT]; + limb_t C[NUM_LIMBS_256BIT]; + limb_t D[NUM_LIMBS_256BIT]; + + Curve25519::sub(A, p.y, p.x); + Curve25519::sub(C, q.y, q.x); + Curve25519::mul(A, A, C); + Curve25519::add(B, p.y, p.x); + Curve25519::add(C, q.y, q.x); + Curve25519::mul(B, B, C); + Curve25519::mul(C, p.t, q.t); + Curve25519::mul_P(C, C, numDx2); + Curve25519::mul(D, p.z, q.z); + Curve25519::add(D, D, D); + Curve25519::sub(p.t, B, A); // E = B - A + Curve25519::sub(p.z, D, C); // F = D - C + Curve25519::add(D, D, C); // G = D + C + Curve25519::add(B, B, A); // H = B + A + Curve25519::mul(p.x, p.t, p.z); // p.x = E * F + Curve25519::mul(p.y, D, B); // p.y = G * H + Curve25519::mul(p.z, p.z, D); // p.z = F * G + Curve25519::mul(p.t, p.t, B); // p.t = E * H + + clean(A); + clean(B); + clean(C); + clean(D); +} + +/** + * \brief Determine if two curve points are equal. + * + * \param p The first curve point. + * \param q The second curve point. + * + * \return Returns true if \a p and \a q are equal; false otherwise. + */ +bool Ed25519::equal(const Point &p, const Point &q) +{ + limb_t a[NUM_LIMBS_256BIT]; + limb_t b[NUM_LIMBS_256BIT]; + bool result = true; + + Curve25519::mul(a, p.x, q.z); + Curve25519::mul(b, q.x, p.z); + result &= secure_compare(a, b, sizeof(a)); + + Curve25519::mul(a, p.y, q.z); + Curve25519::mul(b, q.y, p.z); + result &= secure_compare(a, b, sizeof(a)); + + clean(a); + clean(b); + return result; +} + +/** + * \brief Encodes a curve point into a 32-byte buffer. + * + * \param buf The buffer to encode into. + * \param point The curve point to encode. This value will be modified + * the function and effectively destroyed. + * + * \sa decodePoint() + */ +void Ed25519::encodePoint(uint8_t *buf, Point &point) +{ + // Convert the homogeneous coordinates into plain (x, y) coordinates: + // zinv = z^(-1) mod p + // x = x * zinv mod p + // y = y * zinv mod p + // We don't need the t coordinate, so use that to store zinv temporarily. + Curve25519::recip(point.t, point.z); + Curve25519::mul(point.x, point.x, point.t); + Curve25519::mul(point.y, point.y, point.t); + + // Copy the lowest bit of x to the highest bit of y. + point.y[NUM_LIMBS_256BIT - 1] |= (point.x[0] << (LIMB_BITS - 1)); + + // Convert y into little-endian in the return buffer. + BigNumberUtil::packLE(buf, 32, point.y, NUM_LIMBS_256BIT); +} + +/** + * \brief Decodes a curve point from a 32-byte buffer. + * + * \param point The curve point that was decoded from the buffer. + * \param buf The buffer to decode. + * + * \return Returns true if the point was decoded or false if the contents + * of the buffer do not correspond to a legitimate curve point. + * + * \note This function is not constant time so it should only be used + * on publicly-known values. + */ +bool Ed25519::decodePoint(Point &point, const uint8_t *buf) +{ + limb_t temp[NUM_LIMBS_256BIT]; + + // Convert the input buffer from little-endian into the limbs of y. + BigNumberUtil::unpackLE(point.y, NUM_LIMBS_256BIT, buf, 32); + + // The high bit of y is the sign bit for x. + limb_t sign = point.y[NUM_LIMBS_256BIT - 1] >> (LIMB_BITS - 1); + point.y[NUM_LIMBS_256BIT - 1] &= ~(((limb_t)1) << (LIMB_BITS - 1)); + + // Set z to 1. + memcpy_P(point.z, numBz, sizeof(point.z)); + + // Compute t = (y * y - 1) * modinv(d * y * y + 1). + Curve25519::square(point.t, point.y); + Curve25519::sub(point.x, point.t, point.z); + Curve25519::mul_P(point.t, point.t, numD); + Curve25519::add(point.t, point.t, point.z); + Curve25519::recip(temp, point.t); + Curve25519::mul(point.t, point.x, temp); + clean(temp); + + // Check for t = 0. + limb_t check = point.t[0]; + for (uint8_t posn = 1; posn < NUM_LIMBS_256BIT; ++posn) + check |= point.t[posn]; + if (!check) { + // If the sign bit is set, then decoding has failed. + // Otherwise x is zero and we're done. + if (sign) + return false; + memset(point.x, 0, sizeof(point.x)); + return true; + } + + // Recover x by taking the sqrt of t and flipping the sign if necessary. + if (!Curve25519::sqrt(point.x, point.t)) + return false; + if (sign != (point.x[0] & ((limb_t)1))) { + // The signs are different so we want the other square root. + memset(point.t, 0, sizeof(point.t)); + Curve25519::sub(point.x, point.t, point.x); + } + + // Finally, t = x * y. + Curve25519::mul(point.t, point.x, point.y); + return true; +} + +/** + * \brief Derive key material from a 32-byte private key. + * + * \param hash SHA512 hash object from the caller for use in this function. + * The 64-byte output buffer within this hash object will contain the + * hash prefix on exit. + * \param a The secret scalar derived from \a privateKey. This must be + * NUM_LIMBS_256BIT limbs in size. + * \param privateKey The 32-byte private key to derive all other values from. + */ +void Ed25519::deriveKeys(SHA512 *hash, limb_t *a, const uint8_t privateKey[32]) +{ + // Hash the private key to get the "a" scalar and the message prefix. + uint8_t *buf = (uint8_t *)(hash->state.w); // Reuse hash buffer to save memory. + hash->reset(); + hash->update(privateKey, 32); + hash->finalize(buf, 0); + buf[0] &= 0xF8; + buf[31] &= 0x7F; + buf[31] |= 0x40; + + // Unpack the first half of the hash value into "a". + BigNumberUtil::unpackLE(a, NUM_LIMBS_256BIT, buf, 32); +} diff --git a/src/Ed25519.h b/src/Ed25519.h new file mode 100644 index 0000000..bbdd9f2 --- /dev/null +++ b/src/Ed25519.h @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_ED25519_h +#define CRYPTO_ED25519_h + +#include "BigNumberUtil.h" +#include "SHA512.h" + +class Ed25519 +{ +public: + static void sign(uint8_t signature[64], const uint8_t privateKey[32], + const uint8_t publicKey[32], const void *message, + size_t len); + static bool verify(const uint8_t signature[64], const uint8_t publicKey[32], + const void *message, size_t len); + + static void generatePrivateKey(uint8_t privateKey[32]); + static void derivePublicKey(uint8_t publicKey[32], const uint8_t privateKey[32]); + +private: + // Constructor and destructor are private - cannot instantiate this class. + Ed25519(); + ~Ed25519(); + + // Curve point represented in extended homogeneous coordinates. + struct Point + { + limb_t x[32 / sizeof(limb_t)]; + limb_t y[32 / sizeof(limb_t)]; + limb_t z[32 / sizeof(limb_t)]; + limb_t t[32 / sizeof(limb_t)]; + }; + + static void reduceQFromBuffer(limb_t *result, const uint8_t buf[64], limb_t *temp); + static void reduceQ(limb_t *result, limb_t *r); + + static void mul(Point &result, const limb_t *s, Point &p, bool constTime = true); + static void mul(Point &result, const limb_t *s, bool constTime = true); + + static void add(Point &p, const Point &q); + + static bool equal(const Point &p, const Point &q); + + static void encodePoint(uint8_t *buf, Point &point); + static bool decodePoint(Point &point, const uint8_t *buf); + + static void deriveKeys(SHA512 *hash, limb_t *a, const uint8_t privateKey[32]); +}; + +#endif diff --git a/src/GCM.cpp b/src/GCM.cpp new file mode 100644 index 0000000..a7ec27e --- /dev/null +++ b/src/GCM.cpp @@ -0,0 +1,519 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "GCM.h" +#include "Crypto.h" +#include "utility/EndianUtil.h" +#include "AES.h" +#include +#include +#include + +/* gcmEncrypt encrypts a plaintext to return a ciphertext using AES-GCM-256 + * Parameters: + * uint8_t publicKey[64] - the public key for the intended recipient, 64 bytes + * uint8_t *plainText - the text to be encrypted, N bytes + * int plainTextLength - the size of the plaintext message, the integer N that represents the number of bytes in plainText + * uint8_t *cipherTextOutput - the output buffer for the encrypted bytes, C bytes + * int cipherTextOutputLength - the size of the output buffer, the integer C that represents the number of bytes in cipherTextOutput + * + * The correct size for C is N + overhead, where overhead is the GCM IV size of 12 plus the GCM tag size of 16. 12 + 16 = 28. + * Therefore, cipherTextOutput should be exactly (N + 28) bytes long and cipherTextOutputLength should be (N + 28). + * + * Returns: + * 1 - success + * -1 - cipherTextOutputLength was wrong size + * -2 - Crypto chip could not be initialized + * -3 - ECDH operation was unsuccessful + * Creates a ciphertext in the format of two bytes of array len, twelve bytes of IV, and n bytes + * of ciphertext appended to one another + */ +int gcmEncrypt(uint8_t publicKey[64], uint8_t *plainText, int plainTextLength, uint8_t *cipherTextOutput, int cipherTextOutputLength) +{ + const int ivSize = 12; + const int tagSize = 16; + + uint8_t iv[ivSize]; + uint8_t tag[tagSize]; + + int tempKeySlot = 0xFFFF; + + uint8_t encrypted[plainTextLength]; + + + if(cipherTextOutputLength != (plainTextLength + ivSize + tagSize)) + { + return -1; + } + + // Generate the shared secret using the given public key and the private key from the crypto chip and save the result in the crypto key's TempKey slot + int ecdhSuccess = ECCX08.ecdhKeyGen(0b00001000, 1, publicKey); + delay(26); // Give the crypto chip time to finish its internal processes so that future operations don't fail. + if(ecdhSuccess != 1) + { + return -3; + } + + // Make the IV + for (int index = 0; index < 12; index++) + { + int random = ECCX08.random(0xFF); + iv[index] = random; + } + + // run the gcm encryption + GCM gcm; + gcm.setIV(iv, ivSize); + gcm.encrypt(encrypted, plainText, plainTextLength); + gcm.computeTag(tag, tagSize); + + memcpy(cipherTextOutput, iv, ivSize); + memcpy(&cipherTextOutput[ivSize], encrypted, plainTextLength); + memcpy(&cipherTextOutput[ivSize+plainTextLength], tag, tagSize); + + return 1; +} + +/* gcmDecrypt decrypts a ciphertext with AES-GCM-256 to return the plaintext + * Parameters: + * uint8_t publicKey[64] - the public key of the sender, 64 bytes + * uint8_t *ciphertext - the output buffer for the encrypted bytes, C bytes + * int ciphertextLength - the size of the output buffer, the integer C that represents the number of bytes in cipherTextOutput + * uint8_t *plaintext - the text to be encryped, N bytes + * int plaintextLength - the size of the plaintext message, the integer N that represents the number of bytes in plainText + * + * The correct size for C is N + overhead, where overhead is the GCM IV size of 12 plus the GCM tag size of 16. 12 + 16 = 28. + * Therefore, cipherTextOutput should be exactly (N + 28) bytes long and cipherTextOutputLength should be (N + 28). + * + * Returns: + * 1 - success + * -1 - plaintextLength was wrong size + * -2 - ECDH operation was unsuccessful + * -3 - AEAD tag check failed + * Creates a ciphertext in the format of two bytes of array len, twelve bytes of IV, and n bytes + * of ciphertext appended to one another + */ +int gcmDecrypt(uint8_t publicKey[64], uint8_t *ciphertext, int ciphertextLength, uint8_t *plaintextOutput, int plaintextOutputLength) +{ + int tempKeySlot = 0xFFFF; + const int ivSize = 12; + const int tagSize = 16; + + uint8_t iv[ivSize]; + uint8_t tag[tagSize]; + uint8_t encrypted[plaintextOutputLength]; + uint8_t decrypted[plaintextOutputLength]; + + if(ciphertextLength != (plaintextOutputLength + ivSize + tagSize)) + { + return -1; + } + + memcpy(iv, ciphertext, ivSize); + memcpy(encrypted, &ciphertext[ivSize], plaintextOutputLength); + memcpy(tag, &ciphertext[ivSize+plaintextOutputLength], tagSize); + + // Generate the shared secret using the given public key and the private key from the crypto chip and save the result in the crypto key's TempKey slot + int ecdhSuccess = ECCX08.ecdhKeyGen(0b00001000, 1, publicKey); + delay(26); // Give the crypto chip time to finish its internal processes so that future operations don't fail. + if(ecdhSuccess != 1) + { + return -2; + } + + GCM gcm; + + Serial.print("Ciphertext: "); + gcm.printHex(ciphertext, ciphertextLength); + Serial.printf("Ciphertext length is %i\r\n", ciphertextLength); + + gcm.setIV(iv, ivSize); + gcm.decrypt(decrypted, encrypted, plaintextOutputLength); + + if (!gcm.checkTag(tag, tagSize)) + { + Serial.println("Public Key: "); + gcm.printHex(publicKey, 64); + Serial.println("Key Size is 64"); + + Serial.println("Decrypt IV:"); + gcm.printHex(iv, ivSize); + Serial.printf("IV size is %i\r\n", ivSize); + + Serial.println("EncryptedBytes: "); + gcm.printHex(encrypted, plaintextOutputLength); + Serial.printf("Encrypted Message Size is %i\r\n", plaintextOutputLength); + + Serial.println("Decrypt Tag:"); + gcm.printHex(tag, tagSize); + Serial.printf("Tag size is %i\r\n", tagSize); + + Serial.print("Decrypted bytes: "); + gcm.printHex(decrypted, plaintextOutputLength); + Serial.printf("Decrypted Message Size is %i\r\n", plaintextOutputLength); + + return -3; + } + + memcpy(plaintextOutput, decrypted, plaintextOutputLength); + + return 1; +} + +/** + * \class GCMCommon GCM.h + * \brief Concrete base class to assist with implementing GCM for + * 128-bit block ciphers. + * + * References: NIST SP 800-38D, + * http://en.wikipedia.org/wiki/Galois/Counter_Mode + * + * \sa GCM + */ + +/** + * \brief Constructs a new cipher in GCM mode. + * + * This constructor must be followed by a call to setBlockCipher(). + */ +GCMCommon::GCMCommon() + : blockCipher(0) +{ + state.authSize = 0; + state.dataSize = 0; + state.dataStarted = false; + state.posn = 16; +} + +/** + * \brief Destroys this cipher object after clearing sensitive information. + */ +GCMCommon::~GCMCommon() +{ + clean(state); +} + +size_t GCMCommon::keySize() const +{ + return blockCipher->keySize(); +} + +size_t GCMCommon::ivSize() const +{ + // The GCM specification recommends an IV size of 96 bits. + return 12; +} + +size_t GCMCommon::tagSize() const +{ + return 16; +} + +bool GCMCommon::setKey(const uint8_t *key, size_t len) +{ + // Set the encryption key for the block cipher. + return blockCipher->setKey(key, len); +} + +bool GCMCommon::setIV(const uint8_t *iv, size_t len) +{ + // Format the counter block from the IV. + if (len == 12) { + // IV's of exactly 96 bits are used directly as the counter block. + memcpy(state.counter, iv, 12); + state.counter[12] = 0; + state.counter[13] = 0; + state.counter[14] = 0; + state.counter[15] = 1; + } else { + // IV's of other sizes are hashed to produce the counter block + + ghash.reset();//calls mulInit, encrypts zeroblock, a 16 byte zero + ghash.update(iv, len); + ghash.pad(); + uint64_t sizes[2] = {0, htobe64(((uint64_t)len) * 8)}; + ghash.update(sizes, sizeof(sizes)); + clean(sizes); + ghash.finalize(state.counter, 16); + } + + // Reset the GCM object ready to process auth or payload data. + state.authSize = 0; + state.dataSize = 0; + state.dataStarted = false; + state.posn = 16; + + // Construct the hashing key by encrypting a zero block. + memset(state.nonce, 0, 16); + blockCipher->encryptBlock(state.nonce,state.nonce); + ghash.reset(); + + // Replace the hash key in "nonce" with the encrypted counter. + // This value will be XOR'ed with the final authentication hash + // value in computeTag(). + blockCipher->encryptBlock(state.nonce, state.counter); + return true; +} + +/** + * \brief Increments the least significant 32 bits of the counter block. + * + * \param counter The counter block to increment. + */ +static inline void increment(uint8_t counter[16]) +{ + uint16_t carry = 1; + carry += counter[15]; + counter[15] = (uint8_t)carry; + carry = (carry >> 8) + counter[14]; + counter[14] = (uint8_t)carry; + carry = (carry >> 8) + counter[13]; + counter[13] = (uint8_t)carry; + carry = (carry >> 8) + counter[12]; + counter[12] = (uint8_t)carry; +} + +void GCMCommon::encrypt(uint8_t *output, const uint8_t *input, size_t len) +{ + // Finalize the authenticated data if necessary. + if (!state.dataStarted) { + ghash.pad(); + state.dataStarted = true; + } + + // Encrypt the plaintext using the block cipher in counter mode. + uint8_t *out = output; + size_t size = len; + int tempkey = 0xFFFF; + while (size > 0) { + if (state.posn >= 16) { + increment(state.counter); + blockCipher->encryptBlockWithSlot(tempkey, state.stream, state.counter); + state.posn = 0; + } + + uint8_t temp = 16 - state.posn; + if (temp > size) + temp = size;//catch overflow? + uint8_t *stream = state.stream + state.posn; + state.posn += temp; + size -= temp; + // Encrypt as many bytes as we can using the keystream block. + while (temp > 0) { + uint8_t xorA = *input++; + uint8_t xorB = *stream++; + *out++ = xorA ^ xorB; + --temp; + + } + } + + //Feed the ciphertext into the hash. + //output because in the encrypt step, the xoring happens before hashing. + ghash.update(output, len); + + state.dataSize += len; +} + +void GCMCommon::decrypt(uint8_t *output, const uint8_t *input, size_t len) +{ + uint8_t *out = output; + int tempkey = 0xFFFF; + + // Finalize the authenticated data if necessary. + if (!state.dataStarted) { + ghash.pad(); + state.dataStarted = true; + } + + // Feed the ciphertext into the hash before we decrypt it. + //input in this case because the encryption and xoring will happen afterwards. + ghash.update(input, len); + state.dataSize += len; + // Decrypt the plaintext using the block cipher in counter mode. + while (len > 0) { + // Create a new keystream block if necessary. + if (state.posn >= 16) { + increment(state.counter); + blockCipher->encryptBlockWithSlot(tempkey,state.stream, state.counter); + state.posn = 0; + } + + // Decrypt as many bytes as we can using the keystream block. + uint8_t temp = 16 - state.posn; + if (temp > len) + temp = len; + uint8_t *stream = state.stream + state.posn; + state.posn += temp; + len -= temp; + while (temp > 0) { + uint8_t xorA = *input++; + uint8_t xorB = *stream++; + *output++ = xorA ^ xorB; + --temp; + } + } +} + +void GCMCommon::addAuthData(const void *data, size_t len) +{ + if (!state.dataStarted) { + ghash.update(data, len); + state.authSize += len; + } +} + +void GCMCommon::computeTag(void *tag, size_t len) +{ + // Pad the hashed data and add the sizes. + ghash.pad(); + uint64_t sizes[2] = { + htobe64(state.authSize * 8), + htobe64(state.dataSize * 8) + }; + ghash.update(sizes, sizeof(sizes)); + clean(sizes); + + // Get the finalized hash, encrypt it with the nonce, and return the tag. + ghash.finalize(state.stream, 16); + for (uint8_t posn = 0; posn < 16; ++posn) + state.stream[posn] ^= state.nonce[posn]; + if (len > 16) + len = 16; + memcpy(tag, state.stream, len); +} + +bool GCMCommon::checkTag(const void *tag, size_t len) +{ + // Can never match if the expected tag length is too long. + if (len > 16) + { + printf("Expected tag length is too long: %i\r\n", len); + return false; + } + + // Compute the tag and check it. + computeTag(state.counter, 16); + return secure_compare(state.counter, tag, len); +} + +void GCMCommon::clear() +{ + blockCipher->clear(); + ghash.clear(); + clean(state); + state.posn = 16; +} + +/** + * \fn void GCMCommon::setBlockCipher(BlockCipher *cipher) + * \brief Sets the block cipher to use for this GCM object. + * + * \param cipher The block cipher to use to implement GCM mode. + * This object must have a block size of 128 bits (16 bytes). + */ + +/** + * \class GCM GCM.h + * \brief Implementation of the Galois Counter Mode (GCM). + * + * GCM mode converts a block cipher into an authenticated cipher + * that uses the block cipher T to encrypt and GHASH to authenticate. + * + * The size of the key is determined by the underlying block cipher T. + * The IV is recommended to be 96 bits (12 bytes) in length, but other + * lengths are supported as well. The default tagSize() is 128 bits + * (16 bytes) but the GCM specification does allow other tag sizes: + * 32, 64, 96, 104, 112, 120, or 128 bits (4, 8, 12, 13, 14, 15, or 16 bytes). + * + * The template parameter T must be a concrete subclass of BlockCipher + * indicating the specific block cipher to use. The block cipher must + * have a block size of 128 bits. For example, the following creates a + * GCM object using AES256 as the underlying cipher and then uses it + * to encrypt and authenticate a \c plaintext block: + * + * \code + * GCM gcm; + * gcm.setKey(key, sizeof(key)); + * gcm.setIV(iv, sizeof(iv)); + * gcm.addAuthData(adata, sizeof(adata)); + * gcm.encrypt(ciphertext, plaintext, sizeof(plaintext)); + * gcm.computeTag(tag, sizeof(tag)); + * \endcode + * + * The decryption process is almost identical to convert a \c ciphertext and + * \a tag back into plaintext and then check the tag: + * + * \code + * GCM gcm; + * gcm.setKey(key, sizeof(key)); + * gcm.setIV(iv, sizeof(iv)); + * gcm.addAuthData(adata, sizeof(adata)); + * gcm.decrypt(plaintext, ciphertext, sizeof(ciphertext)); + * if (!gcm.checkTag(tag, sizeof(tag))) { + * // The data was invalid - do not use it. + * ... + * } + * \endcode + * + * The GCM class can also be used to implement GMAC message authentication + * by omitting the plaintext: + * + * \code + * GCM gcm; + * gcm.setKey(key, sizeof(key)); + * gcm.setIV(iv, sizeof(iv)); + * gcm.addAuthData(adata1, sizeof(adata1)); + * gcm.addAuthData(adata2, sizeof(adata1)); + * ... + * gcm.addAuthData(adataN, sizeof(adataN)); + * gcm.computeTag(tag, sizeof(tag)); + * \endcode + * + * References: NIST SP 800-38D, + * http://en.wikipedia.org/wiki/Galois/Counter_Mode + * + * \sa GCMCommon, GHASH + */ + +/** + * \fn GCM::GCM() + * \brief Constructs a new GCM object for the block cipher T. + */ + +void GCMCommon::printHex(uint8_t *data, int dataSize) +{ + // hex strings are two per byte, plus 1 for our null termination + int bufferSize = (dataSize * 2) + 1; + int lastCharIndex = bufferSize - 1; + char buffer[bufferSize]; + + for(int j = 0; j < dataSize; j++) + { + sprintf(&buffer[2*j], "%02X", data[j]); + } + + // Add null termination + buffer[lastCharIndex] = 0; + + Serial.println(buffer); +} \ No newline at end of file diff --git a/src/GCM.h b/src/GCM.h new file mode 100644 index 0000000..fa02651 --- /dev/null +++ b/src/GCM.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_GCM_h +#define CRYPTO_GCM_h + +#include "AuthenticatedCipher.h" +#include "BlockCipher.h" +#include "GHASH.h" +#include + +int gcmEncrypt(uint8_t publicKey[64], uint8_t *plainText, int plainTextLength, uint8_t *cipherTextOutput, int cipherTextOutputLength); +int gcmDecrypt(uint8_t publicKey[64], uint8_t *ciphertext, int ciphertextLength, uint8_t *plaintextOutput, int plaintextOutputLength); + +class GCMCommon : public AuthenticatedCipher +{ +public: + virtual ~GCMCommon(); + + size_t keySize() const; + size_t ivSize() const; + size_t tagSize() const; + + bool setKey(const uint8_t *key, size_t len); + bool setIV(const uint8_t *iv, size_t len); + + void encrypt(uint8_t *output, const uint8_t *input, size_t len); + void decrypt(uint8_t *output, const uint8_t *input, size_t len); + + void addAuthData(const void *data, size_t len); + + void computeTag(void *tag, size_t len); + bool checkTag(const void *tag, size_t len); + + void printHex(uint8_t *data, int dataSize); + + void clear(); + +protected: + GCMCommon(); + void setBlockCipher(BlockCipher *cipher) { blockCipher = cipher; } + +private: + BlockCipher *blockCipher; + GHASH ghash; + struct { + uint8_t counter[16]; + uint8_t stream[16]; + uint8_t nonce[16]; + uint64_t authSize; + uint64_t dataSize; + bool dataStarted; + uint8_t posn; + } state; +}; + +template +class GCM : public GCMCommon +{ +public: + GCM() { setBlockCipher(&cipher); } + +private: + T cipher; +}; + +#endif diff --git a/src/GF128.cpp b/src/GF128.cpp new file mode 100644 index 0000000..7a1885d --- /dev/null +++ b/src/GF128.cpp @@ -0,0 +1,692 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#include "GF128.h" +#include "utility/EndianUtil.h" +#include +#include "Crypto.h" +#include +#include +#include "AES.h" + +/** + * \class GF128 GF128.h + * \brief Operations in the Galois field GF(2^128). + * + * This class contains helper functions for performing operations in + * the Galois field GF(2^128) which is used as the basis of GCM and GHASH. + * These functions are provided for use by other cryptographic protocols + * that make use of GF(2^128). + * + * Most of the functions in this class use the field, polynomial, and + * byte ordering conventions described in NIST SP 800-38D (GCM). The one + * exception is dblEAX() which uses the conventions of EAX mode instead. + * + * References: NIST SP 800-38D + * + * \sa GCM, GHASH + */ + + + +/* A function that slices the bytes from an array of uint32_t to a byte array. + * parameters + * N[4] (in) the array of uint32_t + * ByteN (out) the variable which the function fills with bytes. + */ +void uint32toByte(const uint32_t N[4], byte byteN[16]){ + int count = 0; + byte a; + byte b; + byte c; + byte d; + byte * temp; + for(int index = 0; index < 4; index ++){ + a = (int) (N[index] & 0xff000000)>> 24; + temp = &a; + memcpy(byteN+count, temp, 1); + count++; + + b = (int) ( (N[index] & 0x00ff0000)>> 16 ); + temp = &b; + memcpy(byteN+count, temp, 1); + count++; + + c = (int) ((N[index] & 0x0000ff00)>> 8); + temp = &c; + memcpy(byteN+count, temp, 1); + count++; + + d = (int)(N[index] & 0x000000ff); + temp = &d; + memcpy(byteN+count, temp, 1); + count++; + } +} + +/* bytesToUint32 takes a 16 long byte array into a 4 long uint32_t array + * parameters + * byteN (in) 16 bytes lont byte array + * *out pointer to a malloced return variable. + */ +void bytesToUint32(byte byteN[16], uint32_t *out){ + uint32_t state = 0x00000000; + int buildingblock; + int offset; + + for(int index = 0; index < 4; index++){ + offset = (index+1) * 8; + buildingblock = byteN[index] << (32-offset); + state = state|buildingblock; + } + uint32_t partA = state; + + state = 0x00000000; + for(int index=0; index < 4; index++){ + offset = (index+1) * 8; + buildingblock = byteN[index+4] << (32-offset); + state = state|buildingblock; + } + uint32_t partB = state; + + state = 0x00000000; + for(int index=0; index < 4; index++){ + offset = (index+1) * 8; + buildingblock = byteN[index+8] << (32-offset); + state = state|buildingblock; + } + uint32_t partC =state; + + state = 0x00000000; + for(int index=0; index < 4; index++){ + offset = (index+1) * 8; + buildingblock = byteN[index+12] << 32-offset; + state = state|buildingblock; + } + uint32_t partD = state; + + uint32_t nTemp[4] = {partA, partB, partC, partD}; + memcpy(out, nTemp, 4*sizeof(uint32_t)); +} + +/** + * \brief Initialize multiplication in the GF(2^128) field. + * + * \param H The hash state to be initialized. + * \param key Points to the 16 byte authentication key which is assumed + * to be in big-endian byte order. + * + * This function and the companion mul() are intended for use by other + * classes that need access to the raw GF(2^128) field multiplication of + * GHASH without the overhead of GHASH itself. + * + * \sa mul(), dbl() + */ +void GF128::mulInit(uint32_t H[4]) +{ +#if defined(__AVR__) + // Copy the key into H but leave it in big endian order because + // we can correct for the byte order in mul() below. + memcpy(H, key, 16); +#else + // Copy the key into H and convert from big endian to host order.????? + uint8_t zeroblock[16] ={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + uint8_t HByte[16]; + + //aesEncryptECB(uint16_t slot, const byte input[], byte result[] + ECCX08.aesEncryptECB(0xFFFF, zeroblock, HByte); + + + + bytesToUint32(HByte, H); + //memcpy(H, key, 16); +#if defined(CRYPTO_LITTLE_ENDIAN) + H[0] = be32toh(H[0]); + H[1] = be32toh(H[1]); + H[2] = be32toh(H[2]); + H[3] = be32toh(H[3]); +#endif +#endif +} + +/** + * \brief Calls the ECCX08 chip to preform a Galois Feild multiply. + * + * \param Y (in/out) The first value to multiply, and the result. This array is + * assumed to be in big-endian order on entry and exit. + * \param H The second value to multiply, which must have been initialized + * by the mulInit() function. + + * + * \sa mulInit(), dbl() + */ +void GF128::aesMul(uint32_t Y[4], const uint32_t H[4]){ + byte aesMultiplyResult[16]; + byte byteH[16]; + byte byteY[16]; + + uint32toByte(Y, byteY); + uint32toByte(H, byteH); + //passing 0xFFFF in to maintain formatting, even though multiply ignores the slot + ECCX08.aesMultiply(0xFFFF, byteY, byteH, aesMultiplyResult); + + bytesToUint32(aesMultiplyResult, Y); +} + + +/** + * \brief Perform a multiplication in the GF(2^128) field. + * + * \param Y The first value to multiply, and the result. This array is + * assumed to be in big-endian order on entry and exit. + * \param H The second value to multiply, which must have been initialized + * by the mulInit() function. + * + * This function and the companion mulInit() are intended for use by other + * classes that need access to the raw GF(2^128) field multiplication of + * GHASH without the overhead of GHASH itself. + * + * \sa mulInit(), dbl() + */ +void GF128::mul(uint32_t Y[4], const uint32_t H[4]) +{ + #if defined(__AVR__) + uint32_t Z[4] = {0, 0, 0, 0}; // Z = 0 + uint32_t V0 = H[0]; // V = H + uint32_t V1 = H[1]; + uint32_t V2 = H[2]; + uint32_t V3 = H[3]; + + // Multiply Z by V for the set bits in Y, starting at the top. + // This is a very simple bit by bit version that may not be very + // fast but it should be resistant to cache timing attacks. + for (uint8_t posn = 0; posn < 16; ++posn) { + uint8_t value = ((const uint8_t *)Y)[posn]; + for (uint8_t bit = 0; bit < 8; ++bit) { + __asm__ __volatile__ ( + // Extract the high bit of "value" and turn it into a mask. + "ldd r24,%8\n" + "lsl r24\n" + "std %8,r24\n" + "mov __tmp_reg__,__zero_reg__\n" + "sbc __tmp_reg__,__zero_reg__\n" + + // XOR V with Z if the bit is 1. + "mov r24,%D0\n" // Z0 ^= (V0 & mask) + "and r24,__tmp_reg__\n" + "ldd r25,%D4\n" + "eor r25,r24\n" + "std %D4,r25\n" + "mov r24,%C0\n" + "and r24,__tmp_reg__\n" + "ldd r25,%C4\n" + "eor r25,r24\n" + "std %C4,r25\n" + "mov r24,%B0\n" + "and r24,__tmp_reg__\n" + "ldd r25,%B4\n" + "eor r25,r24\n" + "std %B4,r25\n" + "mov r24,%A0\n" + "and r24,__tmp_reg__\n" + "ldd r25,%A4\n" + "eor r25,r24\n" + "std %A4,r25\n" + "mov r24,%D1\n" // Z1 ^= (V1 & mask) + "and r24,__tmp_reg__\n" + "ldd r25,%D5\n" + "eor r25,r24\n" + "std %D5,r25\n" + "mov r24,%C1\n" + "and r24,__tmp_reg__\n" + "ldd r25,%C5\n" + "eor r25,r24\n" + "std %C5,r25\n" + "mov r24,%B1\n" + "and r24,__tmp_reg__\n" + "ldd r25,%B5\n" + "eor r25,r24\n" + "std %B5,r25\n" + "mov r24,%A1\n" + "and r24,__tmp_reg__\n" + "ldd r25,%A5\n" + "eor r25,r24\n" + "std %A5,r25\n" + "mov r24,%D2\n" // Z2 ^= (V2 & mask) + "and r24,__tmp_reg__\n" + "ldd r25,%D6\n" + "eor r25,r24\n" + "std %D6,r25\n" + "mov r24,%C2\n" + "and r24,__tmp_reg__\n" + "ldd r25,%C6\n" + "eor r25,r24\n" + "std %C6,r25\n" + "mov r24,%B2\n" + "and r24,__tmp_reg__\n" + "ldd r25,%B6\n" + "eor r25,r24\n" + "std %B6,r25\n" + "mov r24,%A2\n" + "and r24,__tmp_reg__\n" + "ldd r25,%A6\n" + "eor r25,r24\n" + "std %A6,r25\n" + "mov r24,%D3\n" // Z3 ^= (V3 & mask) + "and r24,__tmp_reg__\n" + "ldd r25,%D7\n" + "eor r25,r24\n" + "std %D7,r25\n" + "mov r24,%C3\n" + "and r24,__tmp_reg__\n" + "ldd r25,%C7\n" + "eor r25,r24\n" + "std %C7,r25\n" + "mov r24,%B3\n" + "and r24,__tmp_reg__\n" + "ldd r25,%B7\n" + "eor r25,r24\n" + "std %B7,r25\n" + "mov r24,%A3\n" + "and r24,__tmp_reg__\n" + "ldd r25,%A7\n" + "eor r25,r24\n" + "std %A7,r25\n" + + // Rotate V right by 1 bit. + "lsr %A0\n" + "ror %B0\n" + "ror %C0\n" + "ror %D0\n" + "ror %A1\n" + "ror %B1\n" + "ror %C1\n" + "ror %D1\n" + "ror %A2\n" + "ror %B2\n" + "ror %C2\n" + "ror %D2\n" + "ror %A3\n" + "ror %B3\n" + "ror %C3\n" + "ror %D3\n" + "mov r24,__zero_reg__\n" + "sbc r24,__zero_reg__\n" + "andi r24,0xE1\n" + "eor %A0,r24\n" + : "+r"(V0), "+r"(V1), "+r"(V2), "+r"(V3) + : "Q"(Z[0]), "Q"(Z[1]), "Q"(Z[2]), "Q"(Z[3]), "Q"(value) + : "r24", "r25" + ); + } + } + + // We have finished the block so copy Z into Y and byte-swap. + __asm__ __volatile__ ( + "ldd __tmp_reg__,%A0\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%B0\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%C0\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%D0\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%A1\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%B1\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%C1\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%D1\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%A2\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%B2\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%C2\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%D2\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%A3\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%B3\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%C3\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%D3\n" + "st X,__tmp_reg__\n" + : : "Q"(Z[0]), "Q"(Z[1]), "Q"(Z[2]), "Q"(Z[3]), "x"(Y) + ); +#else // !__AVR__ + uint32_t Z0 = 0; // Z = 0 + uint32_t Z1 = 0; + uint32_t Z2 = 0; + uint32_t Z3 = 0; + uint32_t V0 = H[0]; // V = H + uint32_t V1 = H[1]; + uint32_t V2 = H[2]; + uint32_t V3 = H[3]; + + // Multiply Z by V for the set bits in Y, starting at the top. + // This is a very simple bit by bit version that may not be very + // fast but it should be resistant to cache timing attacks. + for (uint8_t posn = 0; posn < 16; ++posn) { + uint8_t value = ((const uint8_t *)Y)[posn]; + for (uint8_t bit = 0; bit < 8; ++bit, value <<= 1) { + // Extract the high bit of "value" and turn it into a mask. + uint32_t mask = (~((uint32_t)(value >> 7))) + 1; + + // XOR V with Z if the bit is 1. + Z0 ^= (V0 & mask); + Z1 ^= (V1 & mask); + Z2 ^= (V2 & mask); + Z3 ^= (V3 & mask); + + // Rotate V right by 1 bit. + mask = ((~(V3 & 0x01)) + 1) & 0xE1000000; + V3 = (V3 >> 1) | (V2 << 31); + V2 = (V2 >> 1) | (V1 << 31); + V1 = (V1 >> 1) | (V0 << 31); + V0 = (V0 >> 1) ^ mask; + } + } + + // We have finished the block so copy Z into Y and byte-swap. + Y[0] = htobe32(Z0); + Y[1] = htobe32(Z1); + Y[2] = htobe32(Z2); + Y[3] = htobe32(Z3); +#endif // !__AVR__ +} + +/** + * \brief Doubles a value in the GF(2^128) field. + * + * \param V The value to double, and the result. This array is + * assumed to be in big-endian order on entry and exit. + * + * Block cipher modes such as XEX + * are similar to CTR mode but instead of incrementing the nonce every + * block, the modes multiply the nonce by 2 in the GF(2^128) field every + * block. This function is provided to help with implementing such modes. + * + * \sa dblEAX(), dblXTS(), mul() + */ +void GF128::dbl(uint32_t V[4]) +{ +#if defined(__AVR__) + __asm__ __volatile__ ( + "ld r16,Z\n" + "ldd r17,Z+1\n" + "ldd r18,Z+2\n" + "ldd r19,Z+3\n" + "lsr r16\n" + "ror r17\n" + "ror r18\n" + "ror r19\n" + "std Z+1,r17\n" + "std Z+2,r18\n" + "std Z+3,r19\n" + "ldd r17,Z+4\n" + "ldd r18,Z+5\n" + "ldd r19,Z+6\n" + "ldd r20,Z+7\n" + "ror r17\n" + "ror r18\n" + "ror r19\n" + "ror r20\n" + "std Z+4,r17\n" + "std Z+5,r18\n" + "std Z+6,r19\n" + "std Z+7,r20\n" + "ldd r17,Z+8\n" + "ldd r18,Z+9\n" + "ldd r19,Z+10\n" + "ldd r20,Z+11\n" + "ror r17\n" + "ror r18\n" + "ror r19\n" + "ror r20\n" + "std Z+8,r17\n" + "std Z+9,r18\n" + "std Z+10,r19\n" + "std Z+11,r20\n" + "ldd r17,Z+12\n" + "ldd r18,Z+13\n" + "ldd r19,Z+14\n" + "ldd r20,Z+15\n" + "ror r17\n" + "ror r18\n" + "ror r19\n" + "ror r20\n" + "std Z+12,r17\n" + "std Z+13,r18\n" + "std Z+14,r19\n" + "std Z+15,r20\n" + "mov r17,__zero_reg__\n" + "sbc r17,__zero_reg__\n" + "andi r17,0xE1\n" + "eor r16,r17\n" + "st Z,r16\n" + : : "z"(V) + : "r16", "r17", "r18", "r19", "r20" + ); +#else + uint32_t V0 = be32toh(V[0]); + uint32_t V1 = be32toh(V[1]); + uint32_t V2 = be32toh(V[2]); + uint32_t V3 = be32toh(V[3]); + uint32_t mask = ((~(V3 & 0x01)) + 1) & 0xE1000000; + V3 = (V3 >> 1) | (V2 << 31); + V2 = (V2 >> 1) | (V1 << 31); + V1 = (V1 >> 1) | (V0 << 31); + V0 = (V0 >> 1) ^ mask; + V[0] = htobe32(V0); + V[1] = htobe32(V1); + V[2] = htobe32(V2); + V[3] = htobe32(V3); +#endif +} + +/** + * \brief Doubles a value in the GF(2^128) field using EAX conventions. + * + * \param V The value to double, and the result. This array is + * assumed to be in big-endian order on entry and exit. + * + * This function differs from dbl() that it uses the conventions of EAX mode + * instead of those of NIST SP 800-38D (GCM). The two operations have + * equivalent security but the bits are ordered differently with the + * value shifted left instead of right. + * + * References: https://en.wikipedia.org/wiki/EAX_mode, + * http://web.cs.ucdavis.edu/~rogaway/papers/eax.html + * + * \sa dbl(), dblXTS(), mul() + */ +void GF128::dblEAX(uint32_t V[4]) +{ +#if defined(__AVR__) + __asm__ __volatile__ ( + "ldd r16,Z+15\n" + "ldd r17,Z+14\n" + "ldd r18,Z+13\n" + "ldd r19,Z+12\n" + "lsl r16\n" + "rol r17\n" + "rol r18\n" + "rol r19\n" + "std Z+14,r17\n" + "std Z+13,r18\n" + "std Z+12,r19\n" + "ldd r17,Z+11\n" + "ldd r18,Z+10\n" + "ldd r19,Z+9\n" + "ldd r20,Z+8\n" + "rol r17\n" + "rol r18\n" + "rol r19\n" + "rol r20\n" + "std Z+11,r17\n" + "std Z+10,r18\n" + "std Z+9,r19\n" + "std Z+8,r20\n" + "ldd r17,Z+7\n" + "ldd r18,Z+6\n" + "ldd r19,Z+5\n" + "ldd r20,Z+4\n" + "rol r17\n" + "rol r18\n" + "rol r19\n" + "rol r20\n" + "std Z+7,r17\n" + "std Z+6,r18\n" + "std Z+5,r19\n" + "std Z+4,r20\n" + "ldd r17,Z+3\n" + "ldd r18,Z+2\n" + "ldd r19,Z+1\n" + "ld r20,Z\n" + "rol r17\n" + "rol r18\n" + "rol r19\n" + "rol r20\n" + "std Z+3,r17\n" + "std Z+2,r18\n" + "std Z+1,r19\n" + "st Z,r20\n" + "mov r17,__zero_reg__\n" + "sbc r17,__zero_reg__\n" + "andi r17,0x87\n" + "eor r16,r17\n" + "std Z+15,r16\n" + : : "z"(V) + : "r16", "r17", "r18", "r19", "r20" + ); +#else + uint32_t V0 = be32toh(V[0]); + uint32_t V1 = be32toh(V[1]); + uint32_t V2 = be32toh(V[2]); + uint32_t V3 = be32toh(V[3]); + uint32_t mask = ((~(V0 >> 31)) + 1) & 0x00000087; + V0 = (V0 << 1) | (V1 >> 31); + V1 = (V1 << 1) | (V2 >> 31); + V2 = (V2 << 1) | (V3 >> 31); + V3 = (V3 << 1) ^ mask; + V[0] = htobe32(V0); + V[1] = htobe32(V1); + V[2] = htobe32(V2); + V[3] = htobe32(V3); +#endif +} + +/** + * \brief Doubles a value in the GF(2^128) field using XTS conventions. + * + * \param V The value to double, and the result. This array is + * assumed to be in littlen-endian order on entry and exit. + * + * This function differs from dbl() that it uses the conventions of XTS mode + * instead of those of NIST SP 800-38D (GCM). The two operations have + * equivalent security but the bits are ordered differently with the + * value shifted left instead of right. + * + * References: IEEE Std. 1619-2007, XTS-AES + * + * \sa dbl(), dblEAX(), mul() + */ +void GF128::dblXTS(uint32_t V[4]) +{ +#if defined(__AVR__) + __asm__ __volatile__ ( + "ld r16,Z\n" + "ldd r17,Z+1\n" + "ldd r18,Z+2\n" + "ldd r19,Z+3\n" + "lsl r16\n" + "rol r17\n" + "rol r18\n" + "rol r19\n" + "std Z+1,r17\n" + "std Z+2,r18\n" + "std Z+3,r19\n" + "ldd r17,Z+4\n" + "ldd r18,Z+5\n" + "ldd r19,Z+6\n" + "ldd r20,Z+7\n" + "rol r17\n" + "rol r18\n" + "rol r19\n" + "rol r20\n" + "std Z+4,r17\n" + "std Z+5,r18\n" + "std Z+6,r19\n" + "std Z+7,r20\n" + "ldd r17,Z+8\n" + "ldd r18,Z+9\n" + "ldd r19,Z+10\n" + "ldd r20,Z+11\n" + "rol r17\n" + "rol r18\n" + "rol r19\n" + "rol r20\n" + "std Z+8,r17\n" + "std Z+9,r18\n" + "std Z+10,r19\n" + "std Z+11,r20\n" + "ldd r17,Z+12\n" + "ldd r18,Z+13\n" + "ldd r19,Z+14\n" + "ldd r20,Z+15\n" + "rol r17\n" + "rol r18\n" + "rol r19\n" + "rol r20\n" + "std Z+12,r17\n" + "std Z+13,r18\n" + "std Z+14,r19\n" + "std Z+15,r20\n" + "mov r17,__zero_reg__\n" + "sbc r17,__zero_reg__\n" + "andi r17,0x87\n" + "eor r16,r17\n" + "st Z,r16\n" + : : "z"(V) + : "r16", "r17", "r18", "r19", "r20" + ); +#else + uint32_t V0 = le32toh(V[0]); + uint32_t V1 = le32toh(V[1]); + uint32_t V2 = le32toh(V[2]); + uint32_t V3 = le32toh(V[3]); + uint32_t mask = ((~(V3 >> 31)) + 1) & 0x00000087; + V3 = (V3 << 1) | (V2 >> 31); + V2 = (V2 << 1) | (V1 >> 31); + V1 = (V1 << 1) | (V0 >> 31); + V0 = (V0 << 1) ^ mask; + V[0] = htole32(V0); + V[1] = htole32(V1); + V[2] = htole32(V2); + V[3] = htole32(V3); +#endif +} \ No newline at end of file diff --git a/src/GF128.h b/src/GF128.h new file mode 100644 index 0000000..a1f3dcd --- /dev/null +++ b/src/GF128.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_GF128_h +#define CRYPTO_GF128_h + +#include +#include + +class GF128 +{ +private: + GF128() {} + ~GF128() {} + +public: + static void mulInit(uint32_t H[4]); + static void mul(uint32_t Y[4], const uint32_t H[4]); + static void aesMul(uint32_t Y[4], const uint32_t H[4]); + static void dbl(uint32_t V[4]); + static void dblEAX(uint32_t V[4]); + static void dblXTS(uint32_t V[4]); +}; + +#endif diff --git a/src/GHASH.cpp b/src/GHASH.cpp new file mode 100644 index 0000000..5b6b4ce --- /dev/null +++ b/src/GHASH.cpp @@ -0,0 +1,195 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "GHASH.h" +#include "GF128.h" +#include "Crypto.h" +#include "AES.h" +#include +#include + +/** + * \class GHASH GHASH.h + * \brief Implementation of the GHASH message authenticator. + * + * GHASH is the message authentication part of Galois Counter Mode (GCM). + * + * \note GHASH is not the same as GMAC. GHASH implements the low level + * hashing primitive that is used by both GCM and GMAC. GMAC can be + * simulated using GCM and an empty plaintext/ciphertext. + * + * References: NIST SP 800-38D, + * http://en.wikipedia.org/wiki/Galois/Counter_Mode + * + * \sa GCM + */ + +/** + * \brief Constructs a new GHASH message authenticator. + */ +GHASH::GHASH() +{ + state.posn = 0; +} + +/** + * \brief Destroys this GHASH message authenticator. + */ +GHASH::~GHASH() +{ + clean(state); +} + +/** + * \brief Resets the GHASH message authenticator for a new session. + * + * + * + * \sa update(), finalize() + */ +void GHASH::reset() +{ + GF128::mulInit(state.H); + memset(state.Y, 0, sizeof(state.Y)); + state.posn = 0; +} + +/* for some reason, the 32_t is not the in the correct endian-ness, such that converting from uint8_t to uin32_t is scrambling our data + * lines that ought be passed to the chip as bits 0123456789abcdef are being passed as 32107654ba98fecd + * this un-shifts the jumbled up text + * parameters + * jumbled(in) array to be unjumbled + * out (out) fixed array. + */ +void unjumble(uint32_t *jumbled){ + for(int parts = 0;parts<4; parts++){ + int a = (jumbled[parts] & 0x000000ff)<<24; + int b = (jumbled[parts] & 0x0000ff00)<<8; + int c = (jumbled[parts] & 0x00ff0000)>>8; + int d = (jumbled[parts] & 0xff000000)>>24; + uint32_t rightEndian =a^b^c^d; + uint32_t *rEptr = &rightEndian; + memcpy(jumbled+(parts), rEptr, 4); + } + } + + +/** + * \brief Updates the message authenticator with more data. + * + * \param data Data to be hashed. + * \param len Number of bytes of data to be hashed. + * + * If finalize() has already been called, then the behavior of update() will + * be undefined. Call reset() first to start a new authentication process. + * + * \sa pad(), reset(), finalize() + */ +void GHASH::update(const void *data, size_t len) +{ + // XOR the input with state.Y in 16-byte chunks and process them. + const uint8_t *d = (const uint8_t *)data; + + while (len > 0) { + uint8_t size = 16 - state.posn; + if (size > len) + size = len; + uint8_t *y = ((uint8_t *)state.Y) + state.posn; + uint8_t ytemp[16] ={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + int grab; + int count =0; + int correctOrder; + int offset = 5; + for (uint8_t i = 0; i < size; ++i){ + offset-=2; + correctOrder = i + offset; + if(offset == -3){ + offset = 5; + } + ytemp[i] =d[i] ^ y[i]; + } + len -= size; + state.posn += size; + d += size; + + memcpy(state.Y, &ytemp, sizeof(ytemp)); + unjumble(state.Y); + if (state.posn == 16) { + GF128::aesMul(state.Y, state.H); + unjumble(state.Y); + state.posn = 0; + } + } +} + + + +/** + * \brief Finalizes the authentication process and returns the token. + * + * \param token The buffer to return the token value in. + * \param len The length of the \a token buffer between 0 and 16. + * + * If \a len is less than 16, then the token value will be truncated to + * the first \a len bytes. If \a len is greater than 16, then the remaining + * bytes will left unchanged. + * + * If finalize() is called again, then the returned \a token value is + * undefined. Call reset() first to start a new authentication process. + * + * \sa reset(), update() + */ +void GHASH::finalize(void *token, size_t len) +{ + // Pad with zeroes to a multiple of 16 bytes. + pad(); + + // The token is the current value of Y. + if (len > 16) + len = 16; + memcpy(token, state.Y, len); +} + +/** + * \brief Pads the input stream with zero bytes to a multiple of 16. + * + * \sa update() + */ +void GHASH::pad() +{ + if (state.posn != 0) { + // Padding involves XOR'ing the rest of state.Y with zeroes, + // which does nothing. Immediately process the next chunk. + GF128::aesMul(state.Y, state.H); + unjumble(state.Y); + state.posn = 0; + } +} + +/** + * \brief Clears the authenticator's state, removing all sensitive data. + */ +void GHASH::clear() +{ + clean(state); +} + diff --git a/src/GHASH.h b/src/GHASH.h new file mode 100644 index 0000000..f121462 --- /dev/null +++ b/src/GHASH.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_GHASH_h +#define CRYPTO_GHASH_h + +#include +#include + +class GHASH +{ +public: + GHASH(); + ~GHASH(); + + void reset(); + void update(const void *data, size_t len); + void finalize(void *token, size_t len); + + void pad(); + + void clear(); + +private: + struct { + uint32_t H[4]; + uint32_t Y[4]; + uint8_t posn; + } state; +}; + +#endif diff --git a/src/Hash.cpp b/src/Hash.cpp new file mode 100644 index 0000000..ef13257 --- /dev/null +++ b/src/Hash.cpp @@ -0,0 +1,180 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "Hash.h" +#include + +/** + * \class Hash Hash.h + * \brief Abstract base class for cryptographic hash algorithms. + * + * \sa SHA256, SHA3_256, BLAKE2s + */ + +/** + * \brief Constructs a new hash object. + */ +Hash::Hash() +{ +} + +/** + * \brief Destroys this hash object. + * + * \note Subclasses are responsible for clearing any sensitive data + * that remains in the hash object when it is destroyed. + * + * \sa clear() + */ +Hash::~Hash() +{ +} + +/** + * \fn size_t Hash::hashSize() const + * \brief Size of the hash result from finalize(). + * + * \sa finalize(), blockSize() + */ + +/** + * \fn size_t Hash::blockSize() const + * \brief Size of the internal block used by the hash algorithm. + * + * \sa update(), hashSize() + */ + +/** + * \fn void Hash::reset() + * \brief Resets the hash ready for a new hashing process. + * + * \sa update(), finalize(), resetHMAC() + */ + +/** + * \fn void Hash::update(const void *data, size_t len) + * \brief Updates the hash with more data. + * + * \param data Data to be hashed. + * \param len Number of bytes of data to be hashed. + * + * If finalize() has already been called, then the behavior of update() will + * be undefined. Call reset() first to start a new hashing process. + * + * \sa reset(), finalize() + */ + +/** + * \fn void Hash::finalize(void *hash, size_t len) + * \brief Finalizes the hashing process and returns the hash. + * + * \param hash The buffer to return the hash value in. + * \param len The length of the \a hash buffer, normally hashSize(). + * + * If \a len is less than hashSize(), then the hash value will be + * truncated to the first \a len bytes. If \a len is greater than + * hashSize(), then the remaining bytes will left unchanged. + * + * If finalize() is called again, then the returned \a hash value is + * undefined. Call reset() first to start a new hashing process. + * + * \sa reset(), update(), finalizeHMAC() + */ + +/** + * \fn void Hash::clear() + * \brief Clears the hash state, removing all sensitive data, and then + * resets the hash ready for a new hashing process. + * + * \sa reset() + */ + +/** + * \fn void Hash::resetHMAC(const void *key, size_t keyLen) + * \brief Resets the hash ready for a new HMAC hashing process. + * + * \param key Points to the HMAC key for the hashing process. + * \param keyLen Size of the HMAC \a key in bytes. + * + * The following example computes a HMAC over a series of data blocks + * with a specific key: + * + * \code + * hash.resetHMAC(key, sizeof(key)); + * hash.update(data1, sizeof(data1)); + * hash.update(data2, sizeof(data2)); + * ... + * hash.update(dataN, sizeof(dataN)); + * hash.finalizeHMAC(key, sizeof(key), hmac, sizeof(hmac)); + * \endcode + * + * The same key must be passed to both resetHMAC() and finalizeHMAC(). + * + * \sa finalizeHMAC(), reset() + */ + +/** + * \fn void Hash::finalizeHMAC(const void *key, size_t keyLen, void *hash, size_t hashLen) + * \brief Finalizes the HMAC hashing process and returns the hash. + * + * \param key Points to the HMAC key for the hashing process. The contents + * of this array must be identical to the value passed to resetHMAC(). + * \param keyLen Size of the HMAC \a key in bytes. + * \param hash The buffer to return the hash value in. + * \param hashLen The length of the \a hash buffer, normally hashSize(). + * + * \sa resetHMAC(), finalize() + */ + +/** + * \brief Formats a HMAC key into a block. + * + * \param block The block to format the key into. Must be at least + * blockSize() bytes in length. + * \param key Points to the HMAC key for the hashing process. + * \param len Length of the HMAC \a key in bytes. + * \param pad Inner (0x36) or outer (0x5C) padding value to XOR with + * the formatted HMAC key. + * + * This function is intended to help subclasses implement resetHMAC() and + * finalizeHMAC() by directly formatting the HMAC key into the subclass's + * internal block buffer and resetting the hash. + */ +void Hash::formatHMACKey(void *block, const void *key, size_t len, uint8_t pad) +{ + size_t size = blockSize(); + reset(); + if (len <= size) { + memcpy(block, key, len); + } else { + update(key, len); + len = hashSize(); + finalize(block, len); + reset(); + } + uint8_t *b = (uint8_t *)block; + memset(b + len, pad, size - len); + while (len > 0) { + *b++ ^= pad; + --len; + } +} diff --git a/src/Hash.h b/src/Hash.h new file mode 100644 index 0000000..2f7db16 --- /dev/null +++ b/src/Hash.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_HASH_h +#define CRYPTO_HASH_h + +#include +#include + +class Hash +{ +public: + Hash(); + virtual ~Hash(); + + virtual size_t hashSize() const = 0; + virtual size_t blockSize() const = 0; + + virtual void reset() = 0; + virtual void update(const void *data, size_t len) = 0; + virtual void finalize(void *hash, size_t len) = 0; + + virtual void clear() = 0; + + virtual void resetHMAC(const void *key, size_t keyLen) = 0; + virtual void finalizeHMAC(const void *key, size_t keyLen, void *hash, size_t hashLen) = 0; + +protected: + void formatHMACKey(void *block, const void *key, size_t len, uint8_t pad); +}; + +#endif diff --git a/src/KeccakCore.cpp b/src/KeccakCore.cpp new file mode 100644 index 0000000..4cc108a --- /dev/null +++ b/src/KeccakCore.cpp @@ -0,0 +1,2013 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "KeccakCore.h" +#include "Crypto.h" +#include "utility/EndianUtil.h" +#include "utility/RotateUtil.h" +#include "utility/ProgMemUtil.h" +#include + +/** + * \class KeccakCore KeccakCore.h + * \brief Keccak core sponge function. + * + * KeccakCore provides the core sponge function for different capacities. + * It is used to implement algorithms such as SHA3 and SHAKE. + * + * References: http://en.wikipedia.org/wiki/SHA-3 + * + * \sa SHA3_256, SHAKE256 + */ + +#if !defined(CRYPTO_LITTLE_ENDIAN) +// All of the Arduino platforms we care about are little-endian. +#error "KeccakCore is not supported on big-endian platforms yet - todo" +#endif + +/** + * \brief Constructs a new Keccak sponge function. + * + * The capacity() will initially be set to 1536, which normally won't be + * of much use to the caller. The constructor should be followed by a + * call to setCapacity() to select the capacity of interest. + */ +KeccakCore::KeccakCore() + : _blockSize(8) +{ + memset(state.A, 0, sizeof(state.A)); + state.inputSize = 0; + state.outputSize = 0; +} + +/** + * \brief Destroys this Keccak sponge function after clearing all + * sensitive information. + */ +KeccakCore::~KeccakCore() +{ + clean(state); +} + +/** + * \brief Returns the capacity of the sponge function in bits. + * + * \sa setCapacity(), blockSize() + */ +size_t KeccakCore::capacity() const +{ + return 1600 - ((size_t)_blockSize) * 8; +} + +/** + * \brief Sets the capacity of the Keccak sponge function in bits. + * + * \param capacity The capacity of the Keccak sponge function in bits which + * should be a multiple of 64 and between 64 and 1536. + * + * \note It is possible to create a sponge function with this constructor that + * doesn't strictly conform with the capacity and hash size constraints + * defined in the relevant standards. It is the responsibility of callers + * to only use standard parameter combinations. + * + * \sa capacity(), blockSize() + */ +void KeccakCore::setCapacity(size_t capacity) +{ + _blockSize = (1600 - capacity) / 8; + reset(); +} + +/** + * \fn size_t KeccakCore::blockSize() const + * \brief Returns the input block size for the sponge function in bytes. + * + * The block size is (1600 - capacity()) / 8. + * + * \sa capacity() + */ + +/** + * \brief Resets the Keccak sponge function ready for a new session. + * + * \sa update(), extract() + */ +void KeccakCore::reset() +{ + memset(state.A, 0, sizeof(state.A)); + state.inputSize = 0; + state.outputSize = 0; +} + +/** + * \brief Updates the Keccak sponge function with more input data. + * + * \param data The extra input data to incorporate. + * \param size The size of the new data to incorporate. + * + * This function will invoke the sponge function whenever a full blockSize() + * bytes of input data have been accumulated. Call pad() after the last + * block to finalize the input before calling extract(). + * + * \sa pad(), extract(), reset() + */ +void KeccakCore::update(const void *data, size_t size) +{ + // Stop generating output while we incorporate the new data. + state.outputSize = 0; + + // Break the input up into chunks and process each in turn. + const uint8_t *d = (const uint8_t *)data; + while (size > 0) { + uint8_t len = _blockSize - state.inputSize; + if (len > size) + len = size; + uint8_t *Abytes = ((uint8_t *)state.A) + state.inputSize; + for (uint8_t posn = 0; posn < len; ++posn) + Abytes[posn] ^= d[posn]; + state.inputSize += len; + size -= len; + d += len; + if (state.inputSize == _blockSize) { + keccakp(); + state.inputSize = 0; + } + } +} + +/** + * \brief Pads the last block of input data to blockSize(). + * + * \param tag The tag byte to add to the padding to identify SHA3 (0x06), + * SHAKE (0x1F), or the plain pre-standardized version of Keccak (0x01). + * + * The sponge function will be invoked to process the completed padding block. + * + * \sa update(), extract() + */ +void KeccakCore::pad(uint8_t tag) +{ + // Padding for SHA3-NNN variants according to FIPS 202 appends "01", + // then another "1", then many zero bits, followed by a final "1". + // SHAKE appends "1111" first instead of "01". Note that SHA-3 numbers + // bits from the least significant, so appending "01" is equivalent + // to 0x02 for byte-aligned data, not 0x40. + uint8_t size = state.inputSize; + uint64_t *Awords = &(state.A[0][0]); + Awords[size / 8] ^= (((uint64_t)tag) << ((size % 8) * 8)); + Awords[(_blockSize - 1) / 8] ^= 0x8000000000000000ULL; + keccakp(); + state.inputSize = 0; + state.outputSize = 0; +} + +/** + * \brief Extracts data from the Keccak sponge function. + * + * \param data The data buffer to fill with extracted data. + * \param size The number number of bytes of extracted data that are required. + * + * If more than blockSize() bytes are required, the sponge function will + * be invoked to generate additional data. + * + * \sa update(), reset(), encrypt() + */ +void KeccakCore::extract(void *data, size_t size) +{ + // Stop accepting input while we are generating output. + state.inputSize = 0; + + // Copy the output data into the caller's return buffer. + uint8_t *d = (uint8_t *)data; + uint8_t tempSize; + while (size > 0) { + // Generate another output block if the current one has been exhausted. + if (state.outputSize >= _blockSize) { + keccakp(); + state.outputSize = 0; + } + + // How many bytes can we copy this time around? + tempSize = _blockSize - state.outputSize; + if (tempSize > size) + tempSize = size; + + // Copy the partial output data into the caller's return buffer. + memcpy(d, ((uint8_t *)(state.A)) + state.outputSize, tempSize); + state.outputSize += tempSize; + size -= tempSize; + d += tempSize; + } +} + +/** + * \brief Extracts data from the Keccak sponge function and uses it to + * encrypt a buffer. + * + * \param output The output buffer to write to, which may be the same + * buffer as \a input. The \a output buffer must have at least as many + * bytes as the \a input buffer. + * \param input The input buffer to read from. + * \param size The number of bytes to encrypt. + * + * This function extracts data from the sponge function and then XOR's + * it with \a input to generate the \a output. + * + * If more than blockSize() bytes are required, the sponge function will + * be invoked to generate additional data. + * + * \sa update(), reset(), extract() + */ +void KeccakCore::encrypt(void *output, const void *input, size_t size) +{ + // Stop accepting input while we are generating output. + state.inputSize = 0; + + // Copy the output data into the caller's return buffer. + uint8_t *out = (uint8_t *)output; + const uint8_t *in = (const uint8_t *)input; + uint8_t tempSize; + while (size > 0) { + // Generate another output block if the current one has been exhausted. + if (state.outputSize >= _blockSize) { + keccakp(); + state.outputSize = 0; + } + + // How many bytes can we extract this time around? + tempSize = _blockSize - state.outputSize; + if (tempSize > size) + tempSize = size; + + // XOR the partial output data into the caller's return buffer. + const uint8_t *d = ((const uint8_t *)(state.A)) + state.outputSize; + for (uint8_t index = 0; index < tempSize; ++index) + out[index] = in[index] ^ d[index]; + state.outputSize += tempSize; + size -= tempSize; + out += tempSize; + in += tempSize; + } +} + +/** + * \brief Clears all sensitive data from this object. + */ +void KeccakCore::clear() +{ + clean(state); +} + +/** + * \brief Sets a HMAC key for a Keccak-based hash algorithm. + * + * \param key Points to the HMAC key for the hashing process. + * \param len Length of the HMAC \a key in bytes. + * \param pad Inner (0x36) or outer (0x5C) padding value to XOR with + * the formatted HMAC key. + * \param hashSize The size of the output from the hash algorithm. + * + * This function is intended to help classes implement Hash::resetHMAC() and + * Hash::finalizeHMAC() by directly formatting the HMAC key into the + * internal block buffer and resetting the hash. + */ +void KeccakCore::setHMACKey(const void *key, size_t len, uint8_t pad, size_t hashSize) +{ + uint8_t *Abytes = (uint8_t *)state.A; + size_t size = blockSize(); + reset(); + if (len <= size) { + // Because the state has just been reset, state.A is set to + // all-zeroes. We can copy the key directly into the state + // and then XOR the block with the pad value. + memcpy(Abytes, key, len); + } else { + // The key is larger than the block size. Hash it down. + // Afterwards, state.A will contain the first block of data + // to be extracted. We truncate it to the first "hashSize" + // bytes and XOR with the padding. + update(key, len); + this->pad(0x06); + memset(Abytes + hashSize, pad, size - hashSize); + memset(Abytes + size, 0, sizeof(state.A) - size); + size = hashSize; + } + while (size > 0) { + *Abytes++ ^= pad; + --size; + } + keccakp(); +} + +/** + * \brief Transform the state with the KECCAK-p sponge function with b = 1600. + */ +void KeccakCore::keccakp() +{ + uint64_t B[5][5]; +#if defined(__AVR__) + // This assembly code was generated by the "genkeccak.c" program. + // Do not modify this code directly. Instead modify "genkeccak.c" + // and then re-generate the code here. + for (uint8_t round = 0; round < 24; ++round) { + __asm__ __volatile__ ( + "push r29\n" + "push r28\n" + "mov r28,r26\n" + "mov r29,r27\n" + + // Step mapping theta. Compute C. + "ldi r20,5\n" + "100:\n" + "ld r8,Z\n" + "ldd r9,Z+1\n" + "ldd r10,Z+2\n" + "ldd r11,Z+3\n" + "ldd r12,Z+4\n" + "ldd r13,Z+5\n" + "ldd r14,Z+6\n" + "ldd r15,Z+7\n" + "ldi r19,4\n" + "101:\n" + "adiw r30,40\n" + "ld __tmp_reg__,Z\n" + "eor r8,__tmp_reg__\n" + "ldd __tmp_reg__,Z+1\n" + "eor r9,__tmp_reg__\n" + "ldd __tmp_reg__,Z+2\n" + "eor r10,__tmp_reg__\n" + "ldd __tmp_reg__,Z+3\n" + "eor r11,__tmp_reg__\n" + "ldd __tmp_reg__,Z+4\n" + "eor r12,__tmp_reg__\n" + "ldd __tmp_reg__,Z+5\n" + "eor r13,__tmp_reg__\n" + "ldd __tmp_reg__,Z+6\n" + "eor r14,__tmp_reg__\n" + "ldd __tmp_reg__,Z+7\n" + "eor r15,__tmp_reg__\n" + "dec r19\n" + "brne 101b\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + "subi r30,152\n" + "sbc r31,__zero_reg__\n" + "dec r20\n" + "brne 100b\n" + "sbiw r30,40\n" + "sbiw r26,40\n" + + // Step mapping theta. Compute D and XOR with A. + "ldd r8,Y+8\n" + "ldd r9,Y+9\n" + "ldd r10,Y+10\n" + "ldd r11,Y+11\n" + "ldd r12,Y+12\n" + "ldd r13,Y+13\n" + "ldd r14,Y+14\n" + "ldd r15,Y+15\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "ldd __tmp_reg__,Y+32\n" + "eor r8,__tmp_reg__\n" + "ldd __tmp_reg__,Y+33\n" + "eor r9,__tmp_reg__\n" + "ldd __tmp_reg__,Y+34\n" + "eor r10,__tmp_reg__\n" + "ldd __tmp_reg__,Y+35\n" + "eor r11,__tmp_reg__\n" + "ldd __tmp_reg__,Y+36\n" + "eor r12,__tmp_reg__\n" + "ldd __tmp_reg__,Y+37\n" + "eor r13,__tmp_reg__\n" + "ldd __tmp_reg__,Y+38\n" + "eor r14,__tmp_reg__\n" + "ldd __tmp_reg__,Y+39\n" + "eor r15,__tmp_reg__\n" + "ldi r19,5\n" + "103:\n" + "ld __tmp_reg__,Z\n" + "eor __tmp_reg__,r8\n" + "st Z,__tmp_reg__\n" + "ldd __tmp_reg__,Z+1\n" + "eor __tmp_reg__,r9\n" + "std Z+1,__tmp_reg__\n" + "ldd __tmp_reg__,Z+2\n" + "eor __tmp_reg__,r10\n" + "std Z+2,__tmp_reg__\n" + "ldd __tmp_reg__,Z+3\n" + "eor __tmp_reg__,r11\n" + "std Z+3,__tmp_reg__\n" + "ldd __tmp_reg__,Z+4\n" + "eor __tmp_reg__,r12\n" + "std Z+4,__tmp_reg__\n" + "ldd __tmp_reg__,Z+5\n" + "eor __tmp_reg__,r13\n" + "std Z+5,__tmp_reg__\n" + "ldd __tmp_reg__,Z+6\n" + "eor __tmp_reg__,r14\n" + "std Z+6,__tmp_reg__\n" + "ldd __tmp_reg__,Z+7\n" + "eor __tmp_reg__,r15\n" + "std Z+7,__tmp_reg__\n" + "adiw r30,40\n" + "dec r19\n" + "brne 103b\n" + "subi r30,192\n" + "sbc r31,__zero_reg__\n" + "ldd r8,Y+16\n" + "ldd r9,Y+17\n" + "ldd r10,Y+18\n" + "ldd r11,Y+19\n" + "ldd r12,Y+20\n" + "ldd r13,Y+21\n" + "ldd r14,Y+22\n" + "ldd r15,Y+23\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "ld __tmp_reg__,Y\n" + "eor r8,__tmp_reg__\n" + "ldd __tmp_reg__,Y+1\n" + "eor r9,__tmp_reg__\n" + "ldd __tmp_reg__,Y+2\n" + "eor r10,__tmp_reg__\n" + "ldd __tmp_reg__,Y+3\n" + "eor r11,__tmp_reg__\n" + "ldd __tmp_reg__,Y+4\n" + "eor r12,__tmp_reg__\n" + "ldd __tmp_reg__,Y+5\n" + "eor r13,__tmp_reg__\n" + "ldd __tmp_reg__,Y+6\n" + "eor r14,__tmp_reg__\n" + "ldd __tmp_reg__,Y+7\n" + "eor r15,__tmp_reg__\n" + "ldi r19,5\n" + "104:\n" + "ld __tmp_reg__,Z\n" + "eor __tmp_reg__,r8\n" + "st Z,__tmp_reg__\n" + "ldd __tmp_reg__,Z+1\n" + "eor __tmp_reg__,r9\n" + "std Z+1,__tmp_reg__\n" + "ldd __tmp_reg__,Z+2\n" + "eor __tmp_reg__,r10\n" + "std Z+2,__tmp_reg__\n" + "ldd __tmp_reg__,Z+3\n" + "eor __tmp_reg__,r11\n" + "std Z+3,__tmp_reg__\n" + "ldd __tmp_reg__,Z+4\n" + "eor __tmp_reg__,r12\n" + "std Z+4,__tmp_reg__\n" + "ldd __tmp_reg__,Z+5\n" + "eor __tmp_reg__,r13\n" + "std Z+5,__tmp_reg__\n" + "ldd __tmp_reg__,Z+6\n" + "eor __tmp_reg__,r14\n" + "std Z+6,__tmp_reg__\n" + "ldd __tmp_reg__,Z+7\n" + "eor __tmp_reg__,r15\n" + "std Z+7,__tmp_reg__\n" + "adiw r30,40\n" + "dec r19\n" + "brne 104b\n" + "subi r30,192\n" + "sbc r31,__zero_reg__\n" + "ldd r8,Y+24\n" + "ldd r9,Y+25\n" + "ldd r10,Y+26\n" + "ldd r11,Y+27\n" + "ldd r12,Y+28\n" + "ldd r13,Y+29\n" + "ldd r14,Y+30\n" + "ldd r15,Y+31\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "ldd __tmp_reg__,Y+8\n" + "eor r8,__tmp_reg__\n" + "ldd __tmp_reg__,Y+9\n" + "eor r9,__tmp_reg__\n" + "ldd __tmp_reg__,Y+10\n" + "eor r10,__tmp_reg__\n" + "ldd __tmp_reg__,Y+11\n" + "eor r11,__tmp_reg__\n" + "ldd __tmp_reg__,Y+12\n" + "eor r12,__tmp_reg__\n" + "ldd __tmp_reg__,Y+13\n" + "eor r13,__tmp_reg__\n" + "ldd __tmp_reg__,Y+14\n" + "eor r14,__tmp_reg__\n" + "ldd __tmp_reg__,Y+15\n" + "eor r15,__tmp_reg__\n" + "ldi r19,5\n" + "105:\n" + "ld __tmp_reg__,Z\n" + "eor __tmp_reg__,r8\n" + "st Z,__tmp_reg__\n" + "ldd __tmp_reg__,Z+1\n" + "eor __tmp_reg__,r9\n" + "std Z+1,__tmp_reg__\n" + "ldd __tmp_reg__,Z+2\n" + "eor __tmp_reg__,r10\n" + "std Z+2,__tmp_reg__\n" + "ldd __tmp_reg__,Z+3\n" + "eor __tmp_reg__,r11\n" + "std Z+3,__tmp_reg__\n" + "ldd __tmp_reg__,Z+4\n" + "eor __tmp_reg__,r12\n" + "std Z+4,__tmp_reg__\n" + "ldd __tmp_reg__,Z+5\n" + "eor __tmp_reg__,r13\n" + "std Z+5,__tmp_reg__\n" + "ldd __tmp_reg__,Z+6\n" + "eor __tmp_reg__,r14\n" + "std Z+6,__tmp_reg__\n" + "ldd __tmp_reg__,Z+7\n" + "eor __tmp_reg__,r15\n" + "std Z+7,__tmp_reg__\n" + "adiw r30,40\n" + "dec r19\n" + "brne 105b\n" + "subi r30,192\n" + "sbc r31,__zero_reg__\n" + "ldd r8,Y+32\n" + "ldd r9,Y+33\n" + "ldd r10,Y+34\n" + "ldd r11,Y+35\n" + "ldd r12,Y+36\n" + "ldd r13,Y+37\n" + "ldd r14,Y+38\n" + "ldd r15,Y+39\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "ldd __tmp_reg__,Y+16\n" + "eor r8,__tmp_reg__\n" + "ldd __tmp_reg__,Y+17\n" + "eor r9,__tmp_reg__\n" + "ldd __tmp_reg__,Y+18\n" + "eor r10,__tmp_reg__\n" + "ldd __tmp_reg__,Y+19\n" + "eor r11,__tmp_reg__\n" + "ldd __tmp_reg__,Y+20\n" + "eor r12,__tmp_reg__\n" + "ldd __tmp_reg__,Y+21\n" + "eor r13,__tmp_reg__\n" + "ldd __tmp_reg__,Y+22\n" + "eor r14,__tmp_reg__\n" + "ldd __tmp_reg__,Y+23\n" + "eor r15,__tmp_reg__\n" + "ldi r19,5\n" + "106:\n" + "ld __tmp_reg__,Z\n" + "eor __tmp_reg__,r8\n" + "st Z,__tmp_reg__\n" + "ldd __tmp_reg__,Z+1\n" + "eor __tmp_reg__,r9\n" + "std Z+1,__tmp_reg__\n" + "ldd __tmp_reg__,Z+2\n" + "eor __tmp_reg__,r10\n" + "std Z+2,__tmp_reg__\n" + "ldd __tmp_reg__,Z+3\n" + "eor __tmp_reg__,r11\n" + "std Z+3,__tmp_reg__\n" + "ldd __tmp_reg__,Z+4\n" + "eor __tmp_reg__,r12\n" + "std Z+4,__tmp_reg__\n" + "ldd __tmp_reg__,Z+5\n" + "eor __tmp_reg__,r13\n" + "std Z+5,__tmp_reg__\n" + "ldd __tmp_reg__,Z+6\n" + "eor __tmp_reg__,r14\n" + "std Z+6,__tmp_reg__\n" + "ldd __tmp_reg__,Z+7\n" + "eor __tmp_reg__,r15\n" + "std Z+7,__tmp_reg__\n" + "adiw r30,40\n" + "dec r19\n" + "brne 106b\n" + "subi r30,192\n" + "sbc r31,__zero_reg__\n" + "ld r8,Y\n" + "ldd r9,Y+1\n" + "ldd r10,Y+2\n" + "ldd r11,Y+3\n" + "ldd r12,Y+4\n" + "ldd r13,Y+5\n" + "ldd r14,Y+6\n" + "ldd r15,Y+7\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "ldd __tmp_reg__,Y+24\n" + "eor r8,__tmp_reg__\n" + "ldd __tmp_reg__,Y+25\n" + "eor r9,__tmp_reg__\n" + "ldd __tmp_reg__,Y+26\n" + "eor r10,__tmp_reg__\n" + "ldd __tmp_reg__,Y+27\n" + "eor r11,__tmp_reg__\n" + "ldd __tmp_reg__,Y+28\n" + "eor r12,__tmp_reg__\n" + "ldd __tmp_reg__,Y+29\n" + "eor r13,__tmp_reg__\n" + "ldd __tmp_reg__,Y+30\n" + "eor r14,__tmp_reg__\n" + "ldd __tmp_reg__,Y+31\n" + "eor r15,__tmp_reg__\n" + "ldi r19,5\n" + "107:\n" + "ld __tmp_reg__,Z\n" + "eor __tmp_reg__,r8\n" + "st Z,__tmp_reg__\n" + "ldd __tmp_reg__,Z+1\n" + "eor __tmp_reg__,r9\n" + "std Z+1,__tmp_reg__\n" + "ldd __tmp_reg__,Z+2\n" + "eor __tmp_reg__,r10\n" + "std Z+2,__tmp_reg__\n" + "ldd __tmp_reg__,Z+3\n" + "eor __tmp_reg__,r11\n" + "std Z+3,__tmp_reg__\n" + "ldd __tmp_reg__,Z+4\n" + "eor __tmp_reg__,r12\n" + "std Z+4,__tmp_reg__\n" + "ldd __tmp_reg__,Z+5\n" + "eor __tmp_reg__,r13\n" + "std Z+5,__tmp_reg__\n" + "ldd __tmp_reg__,Z+6\n" + "eor __tmp_reg__,r14\n" + "std Z+6,__tmp_reg__\n" + "ldd __tmp_reg__,Z+7\n" + "eor __tmp_reg__,r15\n" + "std Z+7,__tmp_reg__\n" + "adiw r30,40\n" + "dec r19\n" + "brne 107b\n" + "subi r30,232\n" + "sbc r31,__zero_reg__\n" + + // Step mappings rho and pi combined into one step. + + // B[0][0] = A[0][0] + "ld r8,Z\n" + "ldd r9,Z+1\n" + "ldd r10,Z+2\n" + "ldd r11,Z+3\n" + "ldd r12,Z+4\n" + "ldd r13,Z+5\n" + "ldd r14,Z+6\n" + "ldd r15,Z+7\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + + // B[1][0] = leftRotate28_64(A[0][3]) + "adiw r26,32\n" + "ldd r8,Z+24\n" + "ldd r9,Z+25\n" + "ldd r10,Z+26\n" + "ldd r11,Z+27\n" + "ldd r12,Z+28\n" + "ldd r13,Z+29\n" + "ldd r14,Z+30\n" + "ldd r15,Z+31\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + + // B[2][0] = leftRotate1_64(A[0][1]) + "adiw r26,32\n" + "ldd r8,Z+8\n" + "ldd r9,Z+9\n" + "ldd r10,Z+10\n" + "ldd r11,Z+11\n" + "ldd r12,Z+12\n" + "ldd r13,Z+13\n" + "ldd r14,Z+14\n" + "ldd r15,Z+15\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + + // B[3][0] = leftRotate27_64(A[0][4]) + "adiw r26,32\n" + "ldd r8,Z+32\n" + "ldd r9,Z+33\n" + "ldd r10,Z+34\n" + "ldd r11,Z+35\n" + "ldd r12,Z+36\n" + "ldd r13,Z+37\n" + "ldd r14,Z+38\n" + "ldd r15,Z+39\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + + // B[4][0] = leftRotate62_64(A[0][2]) + "adiw r26,32\n" + "ldd r8,Z+16\n" + "ldd r9,Z+17\n" + "ldd r10,Z+18\n" + "ldd r11,Z+19\n" + "ldd r12,Z+20\n" + "ldd r13,Z+21\n" + "ldd r14,Z+22\n" + "ldd r15,Z+23\n" + "bst r8,0\n" + "ror r15\n" + "ror r14\n" + "ror r13\n" + "ror r12\n" + "ror r11\n" + "ror r10\n" + "ror r9\n" + "ror r8\n" + "bld r15,7\n" + "bst r8,0\n" + "ror r15\n" + "ror r14\n" + "ror r13\n" + "ror r12\n" + "ror r11\n" + "ror r10\n" + "ror r9\n" + "ror r8\n" + "bld r15,7\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + + // B[0][1] = leftRotate44_64(A[1][1]) + "subi r26,160\n" + "sbc r27,__zero_reg__\n" + "adiw r30,40\n" + "ldd r8,Z+8\n" + "ldd r9,Z+9\n" + "ldd r10,Z+10\n" + "ldd r11,Z+11\n" + "ldd r12,Z+12\n" + "ldd r13,Z+13\n" + "ldd r14,Z+14\n" + "ldd r15,Z+15\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + + // B[1][1] = leftRotate20_64(A[1][4]) + "adiw r26,32\n" + "ldd r8,Z+32\n" + "ldd r9,Z+33\n" + "ldd r10,Z+34\n" + "ldd r11,Z+35\n" + "ldd r12,Z+36\n" + "ldd r13,Z+37\n" + "ldd r14,Z+38\n" + "ldd r15,Z+39\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "st X+,r14\n" + "st X+,r15\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + + // B[2][1] = leftRotate6_64(A[1][2]) + "adiw r26,32\n" + "ldd r8,Z+16\n" + "ldd r9,Z+17\n" + "ldd r10,Z+18\n" + "ldd r11,Z+19\n" + "ldd r12,Z+20\n" + "ldd r13,Z+21\n" + "ldd r14,Z+22\n" + "ldd r15,Z+23\n" + "bst r8,0\n" + "ror r15\n" + "ror r14\n" + "ror r13\n" + "ror r12\n" + "ror r11\n" + "ror r10\n" + "ror r9\n" + "ror r8\n" + "bld r15,7\n" + "bst r8,0\n" + "ror r15\n" + "ror r14\n" + "ror r13\n" + "ror r12\n" + "ror r11\n" + "ror r10\n" + "ror r9\n" + "ror r8\n" + "bld r15,7\n" + "st X+,r15\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + + // B[3][1] = leftRotate36_64(A[1][0]) + "adiw r26,32\n" + "ld r8,Z\n" + "ldd r9,Z+1\n" + "ldd r10,Z+2\n" + "ldd r11,Z+3\n" + "ldd r12,Z+4\n" + "ldd r13,Z+5\n" + "ldd r14,Z+6\n" + "ldd r15,Z+7\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + + // B[4][1] = leftRotate55_64(A[1][3]) + "adiw r26,32\n" + "ldd r8,Z+24\n" + "ldd r9,Z+25\n" + "ldd r10,Z+26\n" + "ldd r11,Z+27\n" + "ldd r12,Z+28\n" + "ldd r13,Z+29\n" + "ldd r14,Z+30\n" + "ldd r15,Z+31\n" + "bst r8,0\n" + "ror r15\n" + "ror r14\n" + "ror r13\n" + "ror r12\n" + "ror r11\n" + "ror r10\n" + "ror r9\n" + "ror r8\n" + "bld r15,7\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + "st X+,r8\n" + + // B[0][2] = leftRotate43_64(A[2][2]) + "subi r26,160\n" + "sbc r27,__zero_reg__\n" + "adiw r30,40\n" + "ldd r8,Z+16\n" + "ldd r9,Z+17\n" + "ldd r10,Z+18\n" + "ldd r11,Z+19\n" + "ldd r12,Z+20\n" + "ldd r13,Z+21\n" + "ldd r14,Z+22\n" + "ldd r15,Z+23\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + + // B[1][2] = leftRotate3_64(A[2][0]) + "adiw r26,32\n" + "ld r8,Z\n" + "ldd r9,Z+1\n" + "ldd r10,Z+2\n" + "ldd r11,Z+3\n" + "ldd r12,Z+4\n" + "ldd r13,Z+5\n" + "ldd r14,Z+6\n" + "ldd r15,Z+7\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + + // B[2][2] = leftRotate25_64(A[2][3]) + "adiw r26,32\n" + "ldd r8,Z+24\n" + "ldd r9,Z+25\n" + "ldd r10,Z+26\n" + "ldd r11,Z+27\n" + "ldd r12,Z+28\n" + "ldd r13,Z+29\n" + "ldd r14,Z+30\n" + "ldd r15,Z+31\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + + // B[3][2] = leftRotate10_64(A[2][1]) + "adiw r26,32\n" + "ldd r8,Z+8\n" + "ldd r9,Z+9\n" + "ldd r10,Z+10\n" + "ldd r11,Z+11\n" + "ldd r12,Z+12\n" + "ldd r13,Z+13\n" + "ldd r14,Z+14\n" + "ldd r15,Z+15\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "st X+,r15\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + + // B[4][2] = leftRotate39_64(A[2][4]) + "adiw r26,32\n" + "ldd r8,Z+32\n" + "ldd r9,Z+33\n" + "ldd r10,Z+34\n" + "ldd r11,Z+35\n" + "ldd r12,Z+36\n" + "ldd r13,Z+37\n" + "ldd r14,Z+38\n" + "ldd r15,Z+39\n" + "bst r8,0\n" + "ror r15\n" + "ror r14\n" + "ror r13\n" + "ror r12\n" + "ror r11\n" + "ror r10\n" + "ror r9\n" + "ror r8\n" + "bld r15,7\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + + // B[0][3] = leftRotate21_64(A[3][3]) + "subi r26,160\n" + "sbc r27,__zero_reg__\n" + "adiw r30,40\n" + "ldd r8,Z+24\n" + "ldd r9,Z+25\n" + "ldd r10,Z+26\n" + "ldd r11,Z+27\n" + "ldd r12,Z+28\n" + "ldd r13,Z+29\n" + "ldd r14,Z+30\n" + "ldd r15,Z+31\n" + "bst r8,0\n" + "ror r15\n" + "ror r14\n" + "ror r13\n" + "ror r12\n" + "ror r11\n" + "ror r10\n" + "ror r9\n" + "ror r8\n" + "bld r15,7\n" + "bst r8,0\n" + "ror r15\n" + "ror r14\n" + "ror r13\n" + "ror r12\n" + "ror r11\n" + "ror r10\n" + "ror r9\n" + "ror r8\n" + "bld r15,7\n" + "bst r8,0\n" + "ror r15\n" + "ror r14\n" + "ror r13\n" + "ror r12\n" + "ror r11\n" + "ror r10\n" + "ror r9\n" + "ror r8\n" + "bld r15,7\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + + // B[1][3] = leftRotate45_64(A[3][1]) + "adiw r26,32\n" + "ldd r8,Z+8\n" + "ldd r9,Z+9\n" + "ldd r10,Z+10\n" + "ldd r11,Z+11\n" + "ldd r12,Z+12\n" + "ldd r13,Z+13\n" + "ldd r14,Z+14\n" + "ldd r15,Z+15\n" + "bst r8,0\n" + "ror r15\n" + "ror r14\n" + "ror r13\n" + "ror r12\n" + "ror r11\n" + "ror r10\n" + "ror r9\n" + "ror r8\n" + "bld r15,7\n" + "bst r8,0\n" + "ror r15\n" + "ror r14\n" + "ror r13\n" + "ror r12\n" + "ror r11\n" + "ror r10\n" + "ror r9\n" + "ror r8\n" + "bld r15,7\n" + "bst r8,0\n" + "ror r15\n" + "ror r14\n" + "ror r13\n" + "ror r12\n" + "ror r11\n" + "ror r10\n" + "ror r9\n" + "ror r8\n" + "bld r15,7\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + "st X+,r8\n" + "st X+,r9\n" + + // B[2][3] = leftRotate8_64(A[3][4]) + "adiw r26,32\n" + "ldd r8,Z+32\n" + "ldd r9,Z+33\n" + "ldd r10,Z+34\n" + "ldd r11,Z+35\n" + "ldd r12,Z+36\n" + "ldd r13,Z+37\n" + "ldd r14,Z+38\n" + "ldd r15,Z+39\n" + "st X+,r15\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + + // B[3][3] = leftRotate15_64(A[3][2]) + "adiw r26,32\n" + "ldd r8,Z+16\n" + "ldd r9,Z+17\n" + "ldd r10,Z+18\n" + "ldd r11,Z+19\n" + "ldd r12,Z+20\n" + "ldd r13,Z+21\n" + "ldd r14,Z+22\n" + "ldd r15,Z+23\n" + "bst r8,0\n" + "ror r15\n" + "ror r14\n" + "ror r13\n" + "ror r12\n" + "ror r11\n" + "ror r10\n" + "ror r9\n" + "ror r8\n" + "bld r15,7\n" + "st X+,r14\n" + "st X+,r15\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + + // B[4][3] = leftRotate41_64(A[3][0]) + "adiw r26,32\n" + "ld r8,Z\n" + "ldd r9,Z+1\n" + "ldd r10,Z+2\n" + "ldd r11,Z+3\n" + "ldd r12,Z+4\n" + "ldd r13,Z+5\n" + "ldd r14,Z+6\n" + "ldd r15,Z+7\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + + // B[0][4] = leftRotate14_64(A[4][4]) + "subi r26,160\n" + "sbc r27,__zero_reg__\n" + "adiw r30,40\n" + "ldd r8,Z+32\n" + "ldd r9,Z+33\n" + "ldd r10,Z+34\n" + "ldd r11,Z+35\n" + "ldd r12,Z+36\n" + "ldd r13,Z+37\n" + "ldd r14,Z+38\n" + "ldd r15,Z+39\n" + "bst r8,0\n" + "ror r15\n" + "ror r14\n" + "ror r13\n" + "ror r12\n" + "ror r11\n" + "ror r10\n" + "ror r9\n" + "ror r8\n" + "bld r15,7\n" + "bst r8,0\n" + "ror r15\n" + "ror r14\n" + "ror r13\n" + "ror r12\n" + "ror r11\n" + "ror r10\n" + "ror r9\n" + "ror r8\n" + "bld r15,7\n" + "st X+,r14\n" + "st X+,r15\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + + // B[1][4] = leftRotate61_64(A[4][2]) + "adiw r26,32\n" + "ldd r8,Z+16\n" + "ldd r9,Z+17\n" + "ldd r10,Z+18\n" + "ldd r11,Z+19\n" + "ldd r12,Z+20\n" + "ldd r13,Z+21\n" + "ldd r14,Z+22\n" + "ldd r15,Z+23\n" + "bst r8,0\n" + "ror r15\n" + "ror r14\n" + "ror r13\n" + "ror r12\n" + "ror r11\n" + "ror r10\n" + "ror r9\n" + "ror r8\n" + "bld r15,7\n" + "bst r8,0\n" + "ror r15\n" + "ror r14\n" + "ror r13\n" + "ror r12\n" + "ror r11\n" + "ror r10\n" + "ror r9\n" + "ror r8\n" + "bld r15,7\n" + "bst r8,0\n" + "ror r15\n" + "ror r14\n" + "ror r13\n" + "ror r12\n" + "ror r11\n" + "ror r10\n" + "ror r9\n" + "ror r8\n" + "bld r15,7\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + + // B[2][4] = leftRotate18_64(A[4][0]) + "adiw r26,32\n" + "ld r8,Z\n" + "ldd r9,Z+1\n" + "ldd r10,Z+2\n" + "ldd r11,Z+3\n" + "ldd r12,Z+4\n" + "ldd r13,Z+5\n" + "ldd r14,Z+6\n" + "ldd r15,Z+7\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "st X+,r14\n" + "st X+,r15\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + + // B[3][4] = leftRotate56_64(A[4][3]) + "adiw r26,32\n" + "ldd r8,Z+24\n" + "ldd r9,Z+25\n" + "ldd r10,Z+26\n" + "ldd r11,Z+27\n" + "ldd r12,Z+28\n" + "ldd r13,Z+29\n" + "ldd r14,Z+30\n" + "ldd r15,Z+31\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + "st X+,r8\n" + + // B[4][4] = leftRotate2_64(A[4][1]) + "adiw r26,32\n" + "ldd r8,Z+8\n" + "ldd r9,Z+9\n" + "ldd r10,Z+10\n" + "ldd r11,Z+11\n" + "ldd r12,Z+12\n" + "ldd r13,Z+13\n" + "ldd r14,Z+14\n" + "ldd r15,Z+15\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "lsl r8\n" + "rol r9\n" + "rol r10\n" + "rol r11\n" + "rol r12\n" + "rol r13\n" + "rol r14\n" + "rol r15\n" + "adc r8, __zero_reg__\n" + "st X+,r8\n" + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + "subi r26,200\n" + "sbc r27,__zero_reg__\n" + "subi r30,160\n" + "sbc r31,__zero_reg__\n" + + // Step mapping chi. + "ldi r20,5\n" + "50:\n" + "ld r8,Y\n" + "ldd r9,Y+8\n" + "ldd r10,Y+16\n" + "ldd r11,Y+24\n" + "ldd r12,Y+32\n" + "mov r13,r9\n" + "com r13\n" + "and r13,r10\n" + "eor r13,r8\n" + "mov r14,r10\n" + "com r14\n" + "and r14,r11\n" + "eor r14,r9\n" + "mov r15,r11\n" + "com r15\n" + "and r15,r12\n" + "eor r15,r10\n" + "mov r17,r12\n" + "com r17\n" + "and r17,r8\n" + "eor r17,r11\n" + "mov r16,r8\n" + "com r16\n" + "and r16,r9\n" + "eor r16,r12\n" + "st Z,r13\n" + "std Z+8,r14\n" + "std Z+16,r15\n" + "std Z+24,r17\n" + "std Z+32,r16\n" + "ldd r8,Y+1\n" + "ldd r9,Y+9\n" + "ldd r10,Y+17\n" + "ldd r11,Y+25\n" + "ldd r12,Y+33\n" + "mov r13,r9\n" + "com r13\n" + "and r13,r10\n" + "eor r13,r8\n" + "mov r14,r10\n" + "com r14\n" + "and r14,r11\n" + "eor r14,r9\n" + "mov r15,r11\n" + "com r15\n" + "and r15,r12\n" + "eor r15,r10\n" + "mov r17,r12\n" + "com r17\n" + "and r17,r8\n" + "eor r17,r11\n" + "mov r16,r8\n" + "com r16\n" + "and r16,r9\n" + "eor r16,r12\n" + "std Z+1,r13\n" + "std Z+9,r14\n" + "std Z+17,r15\n" + "std Z+25,r17\n" + "std Z+33,r16\n" + "ldd r8,Y+2\n" + "ldd r9,Y+10\n" + "ldd r10,Y+18\n" + "ldd r11,Y+26\n" + "ldd r12,Y+34\n" + "mov r13,r9\n" + "com r13\n" + "and r13,r10\n" + "eor r13,r8\n" + "mov r14,r10\n" + "com r14\n" + "and r14,r11\n" + "eor r14,r9\n" + "mov r15,r11\n" + "com r15\n" + "and r15,r12\n" + "eor r15,r10\n" + "mov r17,r12\n" + "com r17\n" + "and r17,r8\n" + "eor r17,r11\n" + "mov r16,r8\n" + "com r16\n" + "and r16,r9\n" + "eor r16,r12\n" + "std Z+2,r13\n" + "std Z+10,r14\n" + "std Z+18,r15\n" + "std Z+26,r17\n" + "std Z+34,r16\n" + "ldd r8,Y+3\n" + "ldd r9,Y+11\n" + "ldd r10,Y+19\n" + "ldd r11,Y+27\n" + "ldd r12,Y+35\n" + "mov r13,r9\n" + "com r13\n" + "and r13,r10\n" + "eor r13,r8\n" + "mov r14,r10\n" + "com r14\n" + "and r14,r11\n" + "eor r14,r9\n" + "mov r15,r11\n" + "com r15\n" + "and r15,r12\n" + "eor r15,r10\n" + "mov r17,r12\n" + "com r17\n" + "and r17,r8\n" + "eor r17,r11\n" + "mov r16,r8\n" + "com r16\n" + "and r16,r9\n" + "eor r16,r12\n" + "std Z+3,r13\n" + "std Z+11,r14\n" + "std Z+19,r15\n" + "std Z+27,r17\n" + "std Z+35,r16\n" + "ldd r8,Y+4\n" + "ldd r9,Y+12\n" + "ldd r10,Y+20\n" + "ldd r11,Y+28\n" + "ldd r12,Y+36\n" + "mov r13,r9\n" + "com r13\n" + "and r13,r10\n" + "eor r13,r8\n" + "mov r14,r10\n" + "com r14\n" + "and r14,r11\n" + "eor r14,r9\n" + "mov r15,r11\n" + "com r15\n" + "and r15,r12\n" + "eor r15,r10\n" + "mov r17,r12\n" + "com r17\n" + "and r17,r8\n" + "eor r17,r11\n" + "mov r16,r8\n" + "com r16\n" + "and r16,r9\n" + "eor r16,r12\n" + "std Z+4,r13\n" + "std Z+12,r14\n" + "std Z+20,r15\n" + "std Z+28,r17\n" + "std Z+36,r16\n" + "ldd r8,Y+5\n" + "ldd r9,Y+13\n" + "ldd r10,Y+21\n" + "ldd r11,Y+29\n" + "ldd r12,Y+37\n" + "mov r13,r9\n" + "com r13\n" + "and r13,r10\n" + "eor r13,r8\n" + "mov r14,r10\n" + "com r14\n" + "and r14,r11\n" + "eor r14,r9\n" + "mov r15,r11\n" + "com r15\n" + "and r15,r12\n" + "eor r15,r10\n" + "mov r17,r12\n" + "com r17\n" + "and r17,r8\n" + "eor r17,r11\n" + "mov r16,r8\n" + "com r16\n" + "and r16,r9\n" + "eor r16,r12\n" + "std Z+5,r13\n" + "std Z+13,r14\n" + "std Z+21,r15\n" + "std Z+29,r17\n" + "std Z+37,r16\n" + "ldd r8,Y+6\n" + "ldd r9,Y+14\n" + "ldd r10,Y+22\n" + "ldd r11,Y+30\n" + "ldd r12,Y+38\n" + "mov r13,r9\n" + "com r13\n" + "and r13,r10\n" + "eor r13,r8\n" + "mov r14,r10\n" + "com r14\n" + "and r14,r11\n" + "eor r14,r9\n" + "mov r15,r11\n" + "com r15\n" + "and r15,r12\n" + "eor r15,r10\n" + "mov r17,r12\n" + "com r17\n" + "and r17,r8\n" + "eor r17,r11\n" + "mov r16,r8\n" + "com r16\n" + "and r16,r9\n" + "eor r16,r12\n" + "std Z+6,r13\n" + "std Z+14,r14\n" + "std Z+22,r15\n" + "std Z+30,r17\n" + "std Z+38,r16\n" + "ldd r8,Y+7\n" + "ldd r9,Y+15\n" + "ldd r10,Y+23\n" + "ldd r11,Y+31\n" + "ldd r12,Y+39\n" + "mov r13,r9\n" + "com r13\n" + "and r13,r10\n" + "eor r13,r8\n" + "mov r14,r10\n" + "com r14\n" + "and r14,r11\n" + "eor r14,r9\n" + "mov r15,r11\n" + "com r15\n" + "and r15,r12\n" + "eor r15,r10\n" + "mov r17,r12\n" + "com r17\n" + "and r17,r8\n" + "eor r17,r11\n" + "mov r16,r8\n" + "com r16\n" + "and r16,r9\n" + "eor r16,r12\n" + "std Z+7,r13\n" + "std Z+15,r14\n" + "std Z+23,r15\n" + "std Z+31,r17\n" + "std Z+39,r16\n" + "adiw r30,40\n" + "adiw r28,40\n" + "dec r20\n" + "breq 51f\n" + "rjmp 50b\n" + "51:\n" + "pop r28\n" + "pop r29\n" + + // Done + : : "x"(B), "z"(state.A) + : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "r16", "r17", "r18", "r19", "r20", "r21", "memory" + ); +#else + static const uint8_t addMod5Table[9] PROGMEM = { + 0, 1, 2, 3, 4, 0, 1, 2, 3 + }; + #define addMod5(x, y) (pgm_read_byte(&(addMod5Table[(x) + (y)]))) + uint64_t D; + uint8_t index, index2; + for (uint8_t round = 0; round < 24; ++round) { + // Step mapping theta. The specification mentions two temporary + // arrays of size 5 called C and D. To save a bit of memory, + // we use the first row of B to store C and compute D on the fly. + for (index = 0; index < 5; ++index) { + B[0][index] = state.A[0][index] ^ state.A[1][index] ^ + state.A[2][index] ^ state.A[3][index] ^ + state.A[4][index]; + } + for (index = 0; index < 5; ++index) { + D = B[0][addMod5(index, 4)] ^ + leftRotate1_64(B[0][addMod5(index, 1)]); + for (index2 = 0; index2 < 5; ++index2) + state.A[index2][index] ^= D; + } + + // Step mapping rho and pi combined into a single step. + // Rotate all lanes by a specific offset and rearrange. + B[0][0] = state.A[0][0]; + B[1][0] = leftRotate28_64(state.A[0][3]); + B[2][0] = leftRotate1_64 (state.A[0][1]); + B[3][0] = leftRotate27_64(state.A[0][4]); + B[4][0] = leftRotate62_64(state.A[0][2]); + B[0][1] = leftRotate44_64(state.A[1][1]); + B[1][1] = leftRotate20_64(state.A[1][4]); + B[2][1] = leftRotate6_64 (state.A[1][2]); + B[3][1] = leftRotate36_64(state.A[1][0]); + B[4][1] = leftRotate55_64(state.A[1][3]); + B[0][2] = leftRotate43_64(state.A[2][2]); + B[1][2] = leftRotate3_64 (state.A[2][0]); + B[2][2] = leftRotate25_64(state.A[2][3]); + B[3][2] = leftRotate10_64(state.A[2][1]); + B[4][2] = leftRotate39_64(state.A[2][4]); + B[0][3] = leftRotate21_64(state.A[3][3]); + B[1][3] = leftRotate45_64(state.A[3][1]); + B[2][3] = leftRotate8_64 (state.A[3][4]); + B[3][3] = leftRotate15_64(state.A[3][2]); + B[4][3] = leftRotate41_64(state.A[3][0]); + B[0][4] = leftRotate14_64(state.A[4][4]); + B[1][4] = leftRotate61_64(state.A[4][2]); + B[2][4] = leftRotate18_64(state.A[4][0]); + B[3][4] = leftRotate56_64(state.A[4][3]); + B[4][4] = leftRotate2_64 (state.A[4][1]); + + // Step mapping chi. Combine each lane with two other lanes in its row. + for (index = 0; index < 5; ++index) { + for (index2 = 0; index2 < 5; ++index2) { + state.A[index2][index] = + B[index2][index] ^ + ((~B[index2][addMod5(index, 1)]) & + B[index2][addMod5(index, 2)]); + } + } +#endif + + // Step mapping iota. XOR A[0][0] with the round constant. + static uint64_t const RC[24] PROGMEM = { + 0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808AULL, + 0x8000000080008000ULL, 0x000000000000808BULL, 0x0000000080000001ULL, + 0x8000000080008081ULL, 0x8000000000008009ULL, 0x000000000000008AULL, + 0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000AULL, + 0x000000008000808BULL, 0x800000000000008BULL, 0x8000000000008089ULL, + 0x8000000000008003ULL, 0x8000000000008002ULL, 0x8000000000000080ULL, + 0x000000000000800AULL, 0x800000008000000AULL, 0x8000000080008081ULL, + 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL + }; + state.A[0][0] ^= pgm_read_qword(RC + round); + } +} diff --git a/src/KeccakCore.h b/src/KeccakCore.h new file mode 100644 index 0000000..ca2447e --- /dev/null +++ b/src/KeccakCore.h @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_KECCAKCORE_H +#define CRYPTO_KECCAKCORE_H + +#include +#include + +class KeccakCore +{ +public: + KeccakCore(); + ~KeccakCore(); + + size_t capacity() const; + void setCapacity(size_t capacity); + + size_t blockSize() const { return _blockSize; } + + void reset(); + + void update(const void *data, size_t size); + void pad(uint8_t tag); + + void extract(void *data, size_t size); + void encrypt(void *output, const void *input, size_t size); + + void clear(); + + void setHMACKey(const void *key, size_t len, uint8_t pad, size_t hashSize); + +private: + struct { + uint64_t A[5][5]; + uint8_t inputSize; + uint8_t outputSize; + } state; + uint8_t _blockSize; + + void keccakp(); +}; + +#endif diff --git a/src/NoiseSource.cpp b/src/NoiseSource.cpp new file mode 100644 index 0000000..588e1ab --- /dev/null +++ b/src/NoiseSource.cpp @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "NoiseSource.h" +#include "RNG.h" + +/** + * \class NoiseSource NoiseSource.h + * \brief Abstract base class for random noise sources. + * + * \sa \link RNGClass RNG\endlink, TransistorNoiseSource + */ + +/** + * \brief Constructs a new random noise source. + */ +NoiseSource::NoiseSource() +{ +} + +/** + * \brief Destroys this random noise source. + */ +NoiseSource::~NoiseSource() +{ +} + +/** + * \fn bool NoiseSource::calibrating() const + * \brief Determine if the noise source is still calibrating itself. + * + * \return Returns true if calibration is in progress; false if the noise + * source is generating valid random data. + * + * Noise sources that require calibration start doing so at system startup + * and then switch over to random data generation once calibration is complete. + * Since no random data is being generated during calibration, the output + * from \link RNGClass::rand() RNG.rand()\endlink may be predictable. + * Use \link RNGClass::available() RNG.available()\endlink to determine + * when sufficient entropy is available to generate good random values. + * + * It is possible that the noise source never exits calibration. This can + * happen if the input voltage is insufficient to trigger noise or if the + * noise source is not connected. Noise sources may also periodically + * recalibrate themselves. + * + * \sa stir() + */ + +/** + * \fn void NoiseSource::stir() + * \brief Stirs entropy from this noise source into the global random + * number pool. + * + * This function should call output() to add the entropy from this noise + * source to the global random number pool. + * + * The noise source should batch up the entropy data, providing between + * 16 and 48 bytes of data each time. If the noise source does not have + * sufficient entropy data at the moment, it should return without stiring + * the current data in. + * + * \sa calibrating(), output() + */ + +/** + * \brief Called when the noise source is added to RNG with + * \link RNGClass::addNoiseSource() RNG.addNoiseSource()\endlink. + * + * This function is intended for noise source initialization tasks that + * must be performed after \link RNGClass::begin() RNG.begin()\endlink + * has been called to initialize the global random number pool. + * For example, if the noise source has a unique identifier or serial + * number then this function can stir it into the pool at startup time. + */ +void NoiseSource::added() +{ + // Nothing to do here. +} + +/** + * \brief Called from subclasses to output noise to the global random + * number pool. + * + * \param data Points to the noise data. + * \param len Number of bytes of noise data. + * \param credit The number of bits of entropy to credit for the data. + * Note that this is bits, not bytes. + * + * The default implementation of this function calls + * \link RNGClass::stir() RNG.stir()\endlink to add the entropy from + * this noise source to the global random number pool. + * + * This function may be overridden by subclasses to capture the raw + * output from the noise source before it is mixed into the pool to + * allow the raw data to be analyzed for randomness. + */ +void NoiseSource::output(const uint8_t *data, size_t len, unsigned int credit) +{ + RNG.stir(data, len, credit); +} diff --git a/src/NoiseSource.h b/src/NoiseSource.h new file mode 100644 index 0000000..7baf243 --- /dev/null +++ b/src/NoiseSource.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_NOISESOURCE_H +#define CRYPTO_NOISESOURCE_H + +#include +#include + +class NoiseSource +{ +public: + NoiseSource(); + virtual ~NoiseSource(); + + virtual bool calibrating() const = 0; + virtual void stir() = 0; + + virtual void added(); + +protected: + virtual void output(const uint8_t *data, size_t len, unsigned int credit); +}; + +#endif diff --git a/src/OMAC.cpp b/src/OMAC.cpp new file mode 100644 index 0000000..70cf083 --- /dev/null +++ b/src/OMAC.cpp @@ -0,0 +1,200 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "OMAC.h" +#include "GF128.h" +#include "Crypto.h" +#include + +/** + * \class OMAC OMAC.h + * \brief Implementation of the OMAC message authenticator. + * + * OMAC is the message authentication part of EAX mode. It is provided + * as a separate class for the convenience of applications that need + * message authentication separate from encryption. + * + * References: https://en.wikipedia.org/wiki/EAX_mode, + * http://web.cs.ucdavis.edu/~rogaway/papers/eax.html + * + * \sa EAX + */ + +/** + * \brief Constructs a new OMAC object. + * + * This constructor must be followed by a call to setBlockCipher() + * to specify the block cipher to use. + */ +OMAC::OMAC() + : _blockCipher(0) + , posn(0) +{ +} + +/** + * \brief Destroys this OMAC object. + * + * \sa clear() + */ +OMAC::~OMAC() +{ + clean(b); +} + +/** + * \fn BlockCipher *OMAC::blockCipher() const + * \brief Gets the block cipher that is in use for this OMAC object. + * + * \sa setBlockCipher() + */ + +/** + * \fn void OMAC::setBlockCipher(BlockCipher *cipher) + * \brief Sets the block cipher to use for this OMAC object. + * + * \param cipher The block cipher to use to implement OMAC. + * This object must have a block size of 128 bits (16 bytes). + * + * \sa blockCipher() + */ + +/** + * \brief Initialises the first OMAC hashing context and creates the B value. + * + * \param omac The OMAC hashing context. + * + * This function must be called first before initNext(), update(), or + * finalize() to create the B value from the OMAC algorithm which is + * used to finalize later hashes. It is assumed that setBlockCipher() + * has already been called. + * + * The tag value for the context is implicitly set to zero, which means + * that the context can be used for ordinary hashing as long as the + * data that follows is non-zero in length. Alternatively, initNext() + * can be called to restart the context with a specific tag. + * + * This function must be called again whenever the block cipher or the + * key changes. + * + * \sa initNext(), update(), finalize() + */ +void OMAC::initFirst(uint8_t omac[16]) +{ + // Start the OMAC context. We assume that the data that follows + // will be at least 1 byte in length so that we can encrypt the + // zeroes now to derive the B value. + memset(omac, 0, 16); + _blockCipher->encryptBlock(omac, omac); + posn = 0; + + // Generate the B value from the encrypted block of zeroes. + // We will need this later when finalising the OMAC hashes. + memcpy(b, omac, 16); + GF128::dblEAX(b); +} + +/** + * \brief Initialises or restarts an OMAC hashing context. + * + * \param omac The OMAC hashing context. + * \param tag The tag value indicating which OMAC calculation we are doing. + * + * It is assumed that initFirst() was called previously to create the B + * value for the context. + * + * \sa initFirst(), update(), finalize() + */ +void OMAC::initNext(uint8_t omac[16], uint8_t tag) +{ + memset(omac, 0, 15); + omac[15] = tag; + posn = 16; +} + +/** + * \brief Updates an OMAC hashing context with more data. + * + * \param omac The OMAC hashing context. + * \param data Points to the data to be hashed. + * \param size The number of bytes to be hashed. + * + * \sa initFirst(), initNext(), finalize() + */ +void OMAC::update(uint8_t omac[16], const uint8_t *data, size_t size) +{ + while (size > 0) { + // Encrypt the current block if it is already full. + if (posn == 16) { + _blockCipher->encryptBlock(omac, omac); + posn = 0; + } + + // XOR the incoming data with the current block. + uint8_t len = 16 - posn; + if (len > size) + len = (uint8_t)size; + for (uint8_t index = 0; index < len; ++index) + omac[posn++] ^= data[index]; + + // Move onto the next block. + size -= len; + data += len; + } +} + +/** + * \brief Finalises an OMAC hashing context. + * + * \param omac The OMAC hashing context on entry, the final OMAC value on exit. + * + * \sa initFirst(), initNext(), update() + */ +void OMAC::finalize(uint8_t omac[16]) +{ + // Apply padding if necessary. + if (posn != 16) { + // Need padding: XOR with P = 2 * B. + uint32_t p[4]; + memcpy(p, b, 16); + GF128::dblEAX(p); + omac[posn] ^= 0x80; + for (uint8_t index = 0; index < 16; ++index) + omac[index] ^= ((const uint8_t *)p)[index]; + clean(p); + } else { + // No padding necessary: XOR with B. + for (uint8_t index = 0; index < 16; ++index) + omac[index] ^= ((const uint8_t *)b)[index]; + } + + // Encrypt the hash to get the final OMAC value. + _blockCipher->encryptBlock(omac, omac); +} + +/** + * \brief Clears all security-sensitive state from this object. + */ +void OMAC::clear() +{ + clean(b); +} diff --git a/src/OMAC.h b/src/OMAC.h new file mode 100644 index 0000000..37e0fa7 --- /dev/null +++ b/src/OMAC.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_OMAC_H +#define CRYPTO_OMAC_H + +#include "BlockCipher.h" + +class OMAC +{ +public: + OMAC(); + ~OMAC(); + + BlockCipher *blockCipher() const { return _blockCipher; } + void setBlockCipher(BlockCipher *cipher) { _blockCipher = cipher; } + + void initFirst(uint8_t omac[16]); + void initNext(uint8_t omac[16], uint8_t tag); + void update(uint8_t omac[16], const uint8_t *data, size_t size); + void finalize(uint8_t omac[16]); + + void clear(); + +private: + BlockCipher *_blockCipher; + uint32_t b[4]; + uint8_t posn; +}; + +#endif diff --git a/src/P521.cpp b/src/P521.cpp new file mode 100644 index 0000000..f46df3c --- /dev/null +++ b/src/P521.cpp @@ -0,0 +1,1642 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "P521.h" +#include "Crypto.h" +#include "RNG.h" +#include "SHA512.h" +#include "utility/LimbUtil.h" +#include + +/** + * \class P521 P521.h + * \brief Elliptic curve operations with the NIST P-521 curve. + * + * This class supports both ECDH key exchange and ECDSA signatures. + * + * \note The public functions in this class need a substantial amount of + * stack space to store intermediate results while the curve function is + * being evaluated. About 2k of free stack space is recommended for safety. + * + * References: NIST FIPS 186-4, + * RFC 6090, + * RFC 6979, + * RFC 5903 + * + * \sa Curve25519 + */ + +// Number of limbs that are needed to represent a 521-bit number. +#define NUM_LIMBS_521BIT NUM_LIMBS_BITS(521) + +// Number of limbs that are needed to represent a 1042-bit number. +// To simply things we also require that this be twice the size of +// NUM_LIMB_521BIT which involves a little wastage at the high end +// of one extra limb for 8-bit and 32-bit limbs. There is no +// wastage for 16-bit limbs. +#define NUM_LIMBS_1042BIT (NUM_LIMBS_BITS(521) * 2) + +// The overhead of clean() calls in mul(), etc can add up to a lot of +// processing time. Only do such cleanups if strict mode has been enabled. +#if defined(P521_STRICT_CLEAN) +#define strict_clean(x) clean(x) +#else +#define strict_clean(x) do { ; } while (0) +#endif + +// Expand the partial 9-bit left over limb at the top of a 521-bit number. +#if BIGNUMBER_LIMB_8BIT +#define LIMB_PARTIAL(value) ((uint8_t)(value)), \ + ((uint8_t)((value) >> 8)) +#else +#define LIMB_PARTIAL(value) (value) +#endif + +/** @cond */ + +// The group order "q" value from RFC 4754 and RFC 5903. This is the +// same as the "n" value from Appendix D.1.2.5 of NIST FIPS 186-4. +static limb_t const P521_q[NUM_LIMBS_521BIT] PROGMEM = { + LIMB_PAIR(0x91386409, 0xbb6fb71e), LIMB_PAIR(0x899c47ae, 0x3bb5c9b8), + LIMB_PAIR(0xf709a5d0, 0x7fcc0148), LIMB_PAIR(0xbf2f966b, 0x51868783), + LIMB_PAIR(0xfffffffa, 0xffffffff), LIMB_PAIR(0xffffffff, 0xffffffff), + LIMB_PAIR(0xffffffff, 0xffffffff), LIMB_PAIR(0xffffffff, 0xffffffff), + LIMB_PARTIAL(0x1ff) +}; + +// The "b" value from Appendix D.1.2.5 of NIST FIPS 186-4. +static limb_t const P521_b[NUM_LIMBS_521BIT] PROGMEM = { + LIMB_PAIR(0x6b503f00, 0xef451fd4), LIMB_PAIR(0x3d2c34f1, 0x3573df88), + LIMB_PAIR(0x3bb1bf07, 0x1652c0bd), LIMB_PAIR(0xec7e937b, 0x56193951), + LIMB_PAIR(0x8ef109e1, 0xb8b48991), LIMB_PAIR(0x99b315f3, 0xa2da725b), + LIMB_PAIR(0xb68540ee, 0x929a21a0), LIMB_PAIR(0x8e1c9a1f, 0x953eb961), + LIMB_PARTIAL(0x051) +}; + +// The "Gx" value from Appendix D.1.2.5 of NIST FIPS 186-4. +static limb_t const P521_Gx[NUM_LIMBS_521BIT] PROGMEM = { + LIMB_PAIR(0xc2e5bd66, 0xf97e7e31), LIMB_PAIR(0x856a429b, 0x3348b3c1), + LIMB_PAIR(0xa2ffa8de, 0xfe1dc127), LIMB_PAIR(0xefe75928, 0xa14b5e77), + LIMB_PAIR(0x6b4d3dba, 0xf828af60), LIMB_PAIR(0x053fb521, 0x9c648139), + LIMB_PAIR(0x2395b442, 0x9e3ecb66), LIMB_PAIR(0x0404e9cd, 0x858e06b7), + LIMB_PARTIAL(0x0c6) +}; + +// The "Gy" value from Appendix D.1.2.5 of NIST FIPS 186-4. +static limb_t const P521_Gy[NUM_LIMBS_521BIT] PROGMEM = { + LIMB_PAIR(0x9fd16650, 0x88be9476), LIMB_PAIR(0xa272c240, 0x353c7086), + LIMB_PAIR(0x3fad0761, 0xc550b901), LIMB_PAIR(0x5ef42640, 0x97ee7299), + LIMB_PAIR(0x273e662c, 0x17afbd17), LIMB_PAIR(0x579b4468, 0x98f54449), + LIMB_PAIR(0x2c7d1bd9, 0x5c8a5fb4), LIMB_PAIR(0x9a3bc004, 0x39296a78), + LIMB_PARTIAL(0x118) +}; + +/** @endcond */ + +/** + * \brief Evaluates the curve function. + * + * \param result The result of applying the curve function, which consists + * of the x and y values of the result point encoded in big-endian order. + * \param f The scalar value to multiply by \a point to create the \a result. + * This is assumed to be be a 521-bit number in big-endian order. + * \param point The curve point to multiply consisting of the x and y + * values encoded in big-endian order. If \a point is NULL, then the + * generator Gx and Gy values for the curve will be used instead. + * + * \return Returns true if \a f * \a point could be evaluated, or false if + * \a point is not a point on the curve. + * + * This function provides access to the raw curve operation for testing + * purposes. Normally an application would use a higher-level function + * like dh1(), dh2(), sign(), or verify(). + * + * \sa dh1(), sign() + */ +bool P521::eval(uint8_t result[132], const uint8_t f[66], const uint8_t point[132]) +{ + limb_t x[NUM_LIMBS_521BIT]; + limb_t y[NUM_LIMBS_521BIT]; + bool ok; + + // Unpack the curve point from the parameters and validate it. + if (point) { + BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, point, 66); + BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, point + 66, 66); + ok = validate(x, y); + } else { + memcpy_P(x, P521_Gx, sizeof(x)); + memcpy_P(y, P521_Gy, sizeof(y)); + ok = true; + } + + // Evaluate the curve function. + evaluate(x, y, f); + + // Pack the answer into the result array. + BigNumberUtil::packBE(result, 66, x, NUM_LIMBS_521BIT); + BigNumberUtil::packBE(result + 66, 66, y, NUM_LIMBS_521BIT); + + // Clean up. + clean(x); + clean(y); + return ok; +} + +/** + * \brief Performs phase 1 of an ECDH key exchange using P-521. + * + * \param k The key value to send to the other party as part of the exchange. + * \param f The generated secret value for this party. This must not be + * transmitted to any party or stored in permanent storage. It only needs + * to be kept in memory until dh2() is called. + * + * The \a f value is generated with \link RNGClass::rand() RNG.rand()\endlink. + * It is the caller's responsibility to ensure that the global random number + * pool has sufficient entropy to generate the 66 bytes of \a f safely + * before calling this function. + * + * The following example demonstrates how to perform a full ECDH + * key exchange using dh1() and dh2(): + * + * \code + * uint8_t f[66]; + * uint8_t k[132]; + * + * // Generate the secret value "f" and the public value "k". + * P521::dh1(k, f); + * + * // Send "k" to the other party. + * ... + * + * // Read the "k" value that the other party sent to us. + * ... + * + * // Generate the shared secret in "f". + * if (!P521::dh2(k, f)) { + * // The received "k" value was invalid - abort the session. + * ... + * } + * + * // The "f" value can now be used to generate session keys for encryption. + * ... + * \endcode + * + * Reference: RFC 6090 + * + * \sa dh2() + */ +void P521::dh1(uint8_t k[132], uint8_t f[66]) +{ + generatePrivateKey(f); + derivePublicKey(k, f); +} + +/** + * \brief Performs phase 2 of an ECDH key exchange using P-521. + * + * \param k The public key value that was received from the other + * party as part of the exchange. + * \param f On entry, this is the secret value for this party that was + * generated by dh1(). On exit, this will be the shared secret. + * + * \return Returns true if the key exchange was successful, or false if + * the \a k value is invalid. + * + * Reference: RFC 6090 + * + * \sa dh1() + */ +bool P521::dh2(const uint8_t k[132], uint8_t f[66]) +{ + // Unpack the (x, y) point from k. + limb_t x[NUM_LIMBS_521BIT]; + limb_t y[NUM_LIMBS_521BIT]; + BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, k, 66); + BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, k + 66, 66); + + // Validate the curve point. We keep going to preserve the timing. + bool ok = validate(x, y); + + // Evaluate the curve function. + evaluate(x, y, f); + + // The secret key is the x component of the final value. + BigNumberUtil::packBE(f, 66, x, NUM_LIMBS_521BIT); + + // Clean up. + clean(x); + clean(y); + return ok; +} + +/** + * \brief Signs a message using a specific P-521 private key. + * + * \param signature The signature value. + * \param privateKey The private key to use to sign the message. + * \param message Points to the message to be signed. + * \param len The length of the \a message to be signed. + * \param hash The hash algorithm to use to hash the \a message before signing. + * If \a hash is NULL, then the \a message is assumed to already be a hash + * value from some previous process. + * + * This function generates deterministic ECDSA signatures according to + * RFC 6979. The \a hash function is used to generate the k value for + * the signature. If \a hash is NULL, then SHA512 is used. + * The \a hash object must be capable of HMAC mode. + * + * The length of the hashed message must be less than or equal to 64 + * bytes in size. Longer messages will be truncated to 64 bytes. + * + * References: RFC 6090, + * RFC 6979 + * + * \sa verify(), generatePrivateKey() + */ +void P521::sign(uint8_t signature[132], const uint8_t privateKey[66], + const void *message, size_t len, Hash *hash) +{ + uint8_t hm[66]; + uint8_t k[66]; + limb_t x[NUM_LIMBS_521BIT]; + limb_t y[NUM_LIMBS_521BIT]; + limb_t t[NUM_LIMBS_521BIT]; + uint64_t count = 0; + + // Format the incoming message, hashing it if necessary. + if (hash) { + // Hash the message. + hash->reset(); + hash->update(message, len); + len = hash->hashSize(); + if (len > 64) + len = 64; + memset(hm, 0, 66 - len); + hash->finalize(hm + 66 - len, len); + } else { + // The message is the hash. + if (len > 64) + len = 64; + memset(hm, 0, 66 - len); + memcpy(hm + 66 - len, message, len); + } + + // Keep generating k values until both r and s are non-zero. + for (;;) { + // Generate the k value deterministically according to RFC 6979. + if (hash) + generateK(k, hm, privateKey, hash, count); + else + generateK(k, hm, privateKey, count); + + // Generate r = kG.x mod q. + memcpy_P(x, P521_Gx, sizeof(x)); + memcpy_P(y, P521_Gy, sizeof(y)); + evaluate(x, y, k); + BigNumberUtil::reduceQuick_P(x, x, P521_q, NUM_LIMBS_521BIT); + BigNumberUtil::packBE(signature, 66, x, NUM_LIMBS_521BIT); + + // If r is zero, then we need to generate a new k value. + // This is utterly improbable, but let's be safe anyway. + if (BigNumberUtil::isZero(x, NUM_LIMBS_521BIT)) { + ++count; + continue; + } + + // Generate s = (privateKey * r + hm) / k mod q. + BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, privateKey, 66); + mulQ(y, y, x); + BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, hm, 66); + BigNumberUtil::add(x, x, y, NUM_LIMBS_521BIT); + BigNumberUtil::reduceQuick_P(x, x, P521_q, NUM_LIMBS_521BIT); + BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, k, 66); + recipQ(t, y); + mulQ(x, x, t); + BigNumberUtil::packBE(signature + 66, 66, x, NUM_LIMBS_521BIT); + + // Exit the loop if s is non-zero. + if (!BigNumberUtil::isZero(x, NUM_LIMBS_521BIT)) + break; + + // We need to generate a new k value according to RFC 6979. + // This is utterly improbable, but let's be safe anyway. + ++count; + } + + // Clean up. + clean(hm); + clean(k); + clean(x); + clean(y); + clean(t); +} + +/** + * \brief Verifies a signature using a specific P-521 public key. + * + * \param signature The signature value to be verified. + * \param publicKey The public key to use to verify the signature. + * \param message The message whose signature is to be verified. + * \param len The length of the \a message to be verified. + * \param hash The hash algorithm to use to hash the \a message before + * verification. If \a hash is NULL, then the \a message is assumed to + * already be a hash value from some previous process. + * + * The length of the hashed message must be less than or equal to 64 + * bytes in size. Longer messages will be truncated to 64 bytes. + * + * \return Returns true if the \a signature is valid for \a message; + * or false if the \a publicKey or \a signature is not valid. + * + * \sa sign() + */ +bool P521::verify(const uint8_t signature[132], + const uint8_t publicKey[132], + const void *message, size_t len, Hash *hash) +{ + limb_t x[NUM_LIMBS_521BIT]; + limb_t y[NUM_LIMBS_521BIT]; + limb_t r[NUM_LIMBS_521BIT]; + limb_t s[NUM_LIMBS_521BIT]; + limb_t u1[NUM_LIMBS_521BIT]; + limb_t u2[NUM_LIMBS_521BIT]; + uint8_t t[66]; + bool ok = false; + + // Because we are operating on public values, we don't need to + // be as strict about constant time. Bail out early if there + // is a problem with the parameters. + + // Unpack the signature. The values must be between 1 and q - 1. + BigNumberUtil::unpackBE(r, NUM_LIMBS_521BIT, signature, 66); + BigNumberUtil::unpackBE(s, NUM_LIMBS_521BIT, signature + 66, 66); + if (BigNumberUtil::isZero(r, NUM_LIMBS_521BIT) || + BigNumberUtil::isZero(s, NUM_LIMBS_521BIT) || + !BigNumberUtil::sub_P(x, r, P521_q, NUM_LIMBS_521BIT) || + !BigNumberUtil::sub_P(x, s, P521_q, NUM_LIMBS_521BIT)) { + goto failed; + } + + // Unpack the public key and check that it is a valid curve point. + BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, publicKey, 66); + BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, publicKey + 66, 66); + if (!validate(x, y)) { + goto failed; + } + + // Hash the message to generate hm, which we store into u1. + if (hash) { + // Hash the message. + hash->reset(); + hash->update(message, len); + len = hash->hashSize(); + if (len > 64) + len = 64; + hash->finalize(u2, len); + BigNumberUtil::unpackBE(u1, NUM_LIMBS_521BIT, (uint8_t *)u2, len); + } else { + // The message is the hash. + if (len > 64) + len = 64; + BigNumberUtil::unpackBE(u1, NUM_LIMBS_521BIT, (uint8_t *)message, len); + } + + // Compute u1 = hm * s^-1 mod q and u2 = r * s^-1 mod q. + recipQ(u2, s); + mulQ(u1, u1, u2); + mulQ(u2, r, u2); + + // Compute the curve point R = u2 * publicKey + u1 * G. + BigNumberUtil::packBE(t, 66, u2, NUM_LIMBS_521BIT); + evaluate(x, y, t); + memcpy_P(u2, P521_Gx, sizeof(x)); + memcpy_P(s, P521_Gy, sizeof(y)); + BigNumberUtil::packBE(t, 66, u1, NUM_LIMBS_521BIT); + evaluate(u2, s, t); + addAffine(u2, s, x, y); + + // If R.x = r mod q, then the signature is valid. + BigNumberUtil::reduceQuick_P(u1, u2, P521_q, NUM_LIMBS_521BIT); + ok = secure_compare(u1, r, NUM_LIMBS_521BIT * sizeof(limb_t)); + + // Clean up and exit. +failed: + clean(x); + clean(y); + clean(r); + clean(s); + clean(u1); + clean(u2); + clean(t); + return ok; +} + +/** + * \brief Generates a private key for P-521 signing operations. + * + * \param privateKey The resulting private key. + * + * The private key is generated with \link RNGClass::rand() RNG.rand()\endlink. + * It is the caller's responsibility to ensure that the global random number + * pool has sufficient entropy to generate the 521 bits of the key safely + * before calling this function. + * + * \sa derivePublicKey(), sign() + */ +void P521::generatePrivateKey(uint8_t privateKey[66]) +{ + // Generate a random 521-bit value for the private key. The value + // must be generated uniformly at random between 1 and q - 1 where q + // is the group order (RFC 6090). We use the recommended algorithm + // from Appendix B of RFC 6090: generate a random 521-bit value + // and discard it if it is not within the range 1 to q - 1. + limb_t x[NUM_LIMBS_521BIT]; + do { + RNG.rand((uint8_t *)x, sizeof(x)); +#if BIGNUMBER_LIMB_8BIT + x[NUM_LIMBS_521BIT - 1] &= 0x01; +#else + x[NUM_LIMBS_521BIT - 1] &= 0x1FF; +#endif + BigNumberUtil::packBE(privateKey, 66, x, NUM_LIMBS_521BIT); + } while (BigNumberUtil::isZero(x, NUM_LIMBS_521BIT) || + !BigNumberUtil::sub_P(x, x, P521_q, NUM_LIMBS_521BIT)); + clean(x); +} + +/** + * \brief Derives the public key from a private key for P-521 + * signing operations. + * + * \param publicKey The public key. + * \param privateKey The private key, which is assumed to have been + * created by generatePrivateKey(). + * + * \sa generatePrivateKey(), verify() + */ +void P521::derivePublicKey(uint8_t publicKey[132], const uint8_t privateKey[66]) +{ + // Evaluate the curve function starting with the generator. + limb_t x[NUM_LIMBS_521BIT]; + limb_t y[NUM_LIMBS_521BIT]; + memcpy_P(x, P521_Gx, sizeof(x)); + memcpy_P(y, P521_Gy, sizeof(y)); + evaluate(x, y, privateKey); + + // Pack the (x, y) point into the public key. + BigNumberUtil::packBE(publicKey, 66, x, NUM_LIMBS_521BIT); + BigNumberUtil::packBE(publicKey + 66, 66, y, NUM_LIMBS_521BIT); + + // Clean up. + clean(x); + clean(y); +} + +/** + * \brief Validates a private key value to ensure that it is + * between 1 and q - 1. + * + * \param privateKey The private key value to validate. + * \return Returns true if \a privateKey is valid, false if not. + * + * \sa isValidPublicKey() + */ +bool P521::isValidPrivateKey(const uint8_t privateKey[66]) +{ + // The value "q" as a byte array from most to least significant. + static uint8_t const P521_q_bytes[66] PROGMEM = { + 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFA, 0x51, 0x86, 0x87, 0x83, 0xBF, 0x2F, + 0x96, 0x6B, 0x7F, 0xCC, 0x01, 0x48, 0xF7, 0x09, + 0xA5, 0xD0, 0x3B, 0xB5, 0xC9, 0xB8, 0x89, 0x9C, + 0x47, 0xAE, 0xBB, 0x6F, 0xB7, 0x1E, 0x91, 0x38, + 0x64, 0x09 + }; + uint8_t zeroTest = 0; + uint8_t posn = 66; + uint16_t borrow = 0; + while (posn > 0) { + --posn; + + // Check for zero. + zeroTest |= privateKey[posn]; + + // Subtract P521_q_bytes from the key. If there is no borrow, + // then the key value was greater than or equal to q. + borrow = ((uint16_t)(privateKey[posn])) - + pgm_read_byte(&(P521_q_bytes[posn])) - + ((borrow >> 8) & 0x01); + } + return zeroTest != 0 && borrow != 0; +} + +/** + * \brief Validates a public key to ensure that it is a valid curve point. + * + * \param publicKey The public key value to validate. + * \return Returns true if \a publicKey is valid, false if not. + * + * \sa isValidPrivateKey() + */ +bool P521::isValidPublicKey(const uint8_t publicKey[132]) +{ + limb_t x[NUM_LIMBS_521BIT]; + limb_t y[NUM_LIMBS_521BIT]; + BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, publicKey, 66); + BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, publicKey + 66, 66); + bool ok = validate(x, y); + clean(x); + clean(y); + return ok; +} + +/** + * \fn bool P521::isValidCurvePoint(const uint8_t point[132]) + * \brief Validates a point to ensure that it is on the curve. + * + * \param point The point to validate. + * \return Returns true if \a point is valid and on the curve, false if not. + * + * This is a convenience function that calls isValidPublicKey() as the + * two operations are equivalent. + */ + +/** + * \brief Evaluates the curve function by multiplying (x, y) by f. + * + * \param x The X co-ordinate of the curve point. Replaced with the X + * co-ordinate of the result on exit. + * \param y The Y co-ordinate of the curve point. Replaced with the Y + * co-ordinate of the result on exit. + * \param f The 521-bit scalar to multiply (x, y) by, most significant + * bit first. + */ +void P521::evaluate(limb_t *x, limb_t *y, const uint8_t f[66]) +{ + limb_t x1[NUM_LIMBS_521BIT]; + limb_t y1[NUM_LIMBS_521BIT]; + limb_t z1[NUM_LIMBS_521BIT]; + limb_t x2[NUM_LIMBS_521BIT]; + limb_t y2[NUM_LIMBS_521BIT]; + limb_t z2[NUM_LIMBS_521BIT]; + + // We want the input in Jacobian co-ordinates. The point (x, y, z) + // corresponds to the affine point (x / z^2, y / z^3), so if we set z + // to 1 we end up with Jacobian co-ordinates. Remember that z is 1 + // and continue on. + + // Set the answer to the point-at-infinity initially (z = 0). + memset(x1, 0, sizeof(x1)); + memset(y1, 0, sizeof(y1)); + memset(z1, 0, sizeof(z1)); + + // Special handling for the highest bit. We can skip dblPoint()/addPoint() + // and simply conditionally move (x, y, z) into (x1, y1, z1). + uint8_t select = (f[0] & 0x01); + cmove(select, x1, x); + cmove(select, y1, y); + cmove1(select, z1); // z = 1 + + // Iterate over the remaining 520 bits of f from highest to lowest. + uint8_t mask = 0x80; + uint8_t fposn = 1; + for (uint16_t t = 520; t > 0; --t) { + // Double the answer. + dblPoint(x1, y1, z1, x1, y1, z1); + + // Add (x, y, z) to (x1, y1, z1) for the next 1 bit. + // We must always do this to preserve the overall timing. + // The z value is always 1 so we can omit that argument. + addPoint(x2, y2, z2, x1, y1, z1, x, y/*, z*/); + + // If the bit was 1, then move (x2, y2, z2) into (x1, y1, z1). + select = (f[fposn] & mask); + cmove(select, x1, x2); + cmove(select, y1, y2); + cmove(select, z1, z2); + + // Move onto the next bit. + mask >>= 1; + if (!mask) { + ++fposn; + mask = 0x80; + } + } + + // Convert from Jacobian co-ordinates back into affine co-ordinates. + // x = x1 * (z1^2)^-1, y = y1 * (z1^3)^-1. + recip(x2, z1); + square(y2, x2); + mul(x, x1, y2); + mul(y2, y2, x2); + mul(y, y1, y2); + + // Clean up. + clean(x1); + clean(y1); + clean(z1); + clean(x2); + clean(y2); + clean(z2); +} + +/** + * \brief Adds two affine points. + * + * \param x1 The X value for the first point to add, and the result. + * \param y1 The Y value for the first point to add, and the result. + * \param x2 The X value for the second point to add. + * \param y2 The Y value for the second point to add. + * + * The Z values for the two points are assumed to be 1. + */ +void P521::addAffine(limb_t *x1, limb_t *y1, const limb_t *x2, const limb_t *y2) +{ + limb_t xout[NUM_LIMBS_521BIT]; + limb_t yout[NUM_LIMBS_521BIT]; + limb_t zout[NUM_LIMBS_521BIT]; + limb_t z1[NUM_LIMBS_521BIT]; + + // z1 = 1 + z1[0] = 1; + memset(z1 + 1, 0, (NUM_LIMBS_521BIT - 1) * sizeof(limb_t)); + + // Add the two points. + addPoint(xout, yout, zout, x1, y1, z1, x2, y2/*, z2*/); + + // Convert from Jacobian co-ordinates back into affine co-ordinates. + // x1 = xout * (zout^2)^-1, y1 = yout * (zout^3)^-1. + recip(z1, zout); + square(zout, z1); + mul(x1, xout, zout); + mul(zout, zout, z1); + mul(y1, yout, zout); + + // Clean up. + clean(xout); + clean(yout); + clean(zout); + clean(z1); +} + +/** + * \brief Validates that (x, y) is actually a point on the curve. + * + * \param x The X co-ordinate of the point to test. + * \param y The Y co-ordinate of the point to test. + * \return Returns true if (x, y) is on the curve, or false if not. + * + * \sa inRange() + */ +bool P521::validate(const limb_t *x, const limb_t *y) +{ + bool result; + + // If x or y is greater than or equal to 2^521 - 1, then the + // point is definitely not on the curve. Preserve timing by + // delaying the reporting of the result until later. + result = inRange(x); + result &= inRange(y); + + // We need to check that y^2 = x^3 - 3 * x + b mod 2^521 - 1. + limb_t t1[NUM_LIMBS_521BIT]; + limb_t t2[NUM_LIMBS_521BIT]; + square(t1, x); + mul(t1, t1, x); + mulLiteral(t2, x, 3); + sub(t1, t1, t2); + memcpy_P(t2, P521_b, sizeof(t2)); + add(t1, t1, t2); + square(t2, y); + result &= secure_compare(t1, t2, sizeof(t1)); + clean(t1); + clean(t2); + return result; +} + +/** + * \brief Determines if a value is between 0 and 2^521 - 2. + * + * \param x The value to test. + * \return Returns true if \a x is in range, false if not. + * + * \sa validate() + */ +bool P521::inRange(const limb_t *x) +{ + // Do a trial subtraction of 2^521 - 1 from x, which is equivalent + // to adding 1 and subtracting 2^521. We only need the carry. + dlimb_t carry = 1; + limb_t word = 0; + for (uint8_t index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry += *x++; + word = (limb_t)carry; + carry >>= LIMB_BITS; + } + + // Determine the carry out from the low 521 bits. +#if BIGNUMBER_LIMB_8BIT + carry = (carry << 7) + (word >> 1); +#else + carry = (carry << (LIMB_BITS - 9)) + (word >> 9); +#endif + + // If the carry is zero, then x was in range. Otherwise it is out + // of range. Check for zero in a way that preserves constant timing. + word = (limb_t)(carry | (carry >> LIMB_BITS)); + word = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - word) >> LIMB_BITS); + return (bool)word; +} + +/** + * \brief Reduces a number modulo 2^521 - 1. + * + * \param result The array that will contain the result when the + * function exits. Must be NUM_LIMBS_521BIT limbs in size. + * \param x The number to be reduced, which must be NUM_LIMBS_1042BIT + * limbs in size and less than square(2^521 - 1). This array can be + * the same as \a result. + */ +void P521::reduce(limb_t *result, const limb_t *x) +{ +#if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT || BIGNUMBER_LIMB_64BIT + // According to NIST FIPS 186-4, we add the high 521 bits to the + // low 521 bits and then do a trial subtraction of 2^521 - 1. + // We do both in a single step. Subtracting 2^521 - 1 is equivalent + // to adding 1 and subtracting 2^521. + uint8_t index; + const limb_t *xl = x; + const limb_t *xh = x + NUM_LIMBS_521BIT; + limb_t *rr = result; + dlimb_t carry; + limb_t word = x[NUM_LIMBS_521BIT - 1]; + carry = (word >> 9) + 1; + word &= 0x1FF; + for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) { + carry += *xl++; + carry += ((dlimb_t)(*xh++)) << (LIMB_BITS - 9); + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + carry += word; + carry += ((dlimb_t)(x[NUM_LIMBS_1042BIT - 1])) << (LIMB_BITS - 9); + word = (limb_t)carry; + *rr = word; + + // If the carry out was 1, then mask it off and we have the answer. + // If the carry out was 0, then we need to add 2^521 - 1 back again. + // To preserve the timing we perform a conditional subtract of 1 and + // then mask off the high bits. + carry = ((word >> 9) ^ 0x01) & 0x01; + rr = result; + for (index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry = ((dlimb_t)(*rr)) - carry; + *rr++ = (limb_t)carry; + carry = (carry >> LIMB_BITS) & 0x01; + } + *(--rr) &= 0x1FF; +#elif BIGNUMBER_LIMB_8BIT + // Same as above, but for 8-bit limbs. + uint8_t index; + const limb_t *xl = x; + const limb_t *xh = x + NUM_LIMBS_521BIT; + limb_t *rr = result; + dlimb_t carry; + limb_t word = x[NUM_LIMBS_521BIT - 1]; + carry = (word >> 1) + 1; + word &= 0x01; + for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) { + carry += *xl++; + carry += ((dlimb_t)(*xh++)) << 7; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + carry += word; + carry += ((dlimb_t)(x[NUM_LIMBS_1042BIT - 1])) << 1; + word = (limb_t)carry; + *rr = word; + carry = ((word >> 1) ^ 0x01) & 0x01; + rr = result; + for (index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry = ((dlimb_t)(*rr)) - carry; + *rr++ = (limb_t)carry; + carry = (carry >> LIMB_BITS) & 0x01; + } + *(--rr) &= 0x01; +#else + #error "Don't know how to reduce values mod 2^521 - 1" +#endif +} + +/** + * \brief Quickly reduces a number modulo 2^521 - 1. + * + * \param x The number to be reduced, which must be NUM_LIMBS_521BIT + * limbs in size and less than or equal to 2 * (2^521 - 2). + * + * The answer is also put into \a x and will consist of NUM_LIMBS_521BIT limbs. + * + * This function is intended for reducing the result of additions where + * the caller knows that \a x is within the described range. A single + * trial subtraction is all that is needed to reduce the number. + */ +void P521::reduceQuick(limb_t *x) +{ + // Perform a trial subtraction of 2^521 - 1 from x. This is + // equivalent to adding 1 and subtracting 2^521 - 1. + uint8_t index; + limb_t *xx = x; + dlimb_t carry = 1; + for (index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry += *xx; + *xx++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + + // If the carry out was 1, then mask it off and we have the answer. + // If the carry out was 0, then we need to add 2^521 - 1 back again. + // To preserve the timing we perform a conditional subtract of 1 and + // then mask off the high bits. +#if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT || BIGNUMBER_LIMB_64BIT + carry = ((x[NUM_LIMBS_521BIT - 1] >> 9) ^ 0x01) & 0x01; + xx = x; + for (index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry = ((dlimb_t)(*xx)) - carry; + *xx++ = (limb_t)carry; + carry = (carry >> LIMB_BITS) & 0x01; + } + *(--xx) &= 0x1FF; +#elif BIGNUMBER_LIMB_8BIT + carry = ((x[NUM_LIMBS_521BIT - 1] >> 1) ^ 0x01) & 0x01; + xx = x; + for (index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry = ((dlimb_t)(*xx)) - carry; + *xx++ = (limb_t)carry; + carry = (carry >> LIMB_BITS) & 0x01; + } + *(--xx) &= 0x01; +#endif +} + +/** + * \brief Multiplies two 521-bit values to produce a 1042-bit result. + * + * \param result The result, which must be NUM_LIMBS_1042BIT limbs in size + * and must not overlap with \a x or \a y. + * \param x The first value to multiply, which must be NUM_LIMBS_521BIT + * limbs in size. + * \param y The second value to multiply, which must be NUM_LIMBS_521BIT + * limbs in size. + * + * \sa mul() + */ +void P521::mulNoReduce(limb_t *result, const limb_t *x, const limb_t *y) +{ + uint8_t i, j; + dlimb_t carry; + limb_t word; + const limb_t *yy; + limb_t *rr; + + // Multiply the lowest word of x by y. + carry = 0; + word = x[0]; + yy = y; + rr = result; + for (i = 0; i < NUM_LIMBS_521BIT; ++i) { + carry += ((dlimb_t)(*yy++)) * word; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + *rr = (limb_t)carry; + + // Multiply and add the remaining words of x by y. + for (i = 1; i < NUM_LIMBS_521BIT; ++i) { + word = x[i]; + carry = 0; + yy = y; + rr = result + i; + for (j = 0; j < NUM_LIMBS_521BIT; ++j) { + carry += ((dlimb_t)(*yy++)) * word; + carry += *rr; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + *rr = (limb_t)carry; + } +} + +/** + * \brief Multiplies two values and then reduces the result modulo 2^521 - 1. + * + * \param result The result, which must be NUM_LIMBS_521BIT limbs in size + * and can be the same array as \a x or \a y. + * \param x The first value to multiply, which must be NUM_LIMBS_521BIT limbs + * in size and less than 2^521 - 1. + * \param y The second value to multiply, which must be NUM_LIMBS_521BIT limbs + * in size and less than 2^521 - 1. This can be the same array as \a x. + */ +void P521::mul(limb_t *result, const limb_t *x, const limb_t *y) +{ + limb_t temp[NUM_LIMBS_1042BIT]; + mulNoReduce(temp, x, y); + reduce(result, temp); + strict_clean(temp); + crypto_feed_watchdog(); +} + +/** + * \fn void P521::square(limb_t *result, const limb_t *x) + * \brief Squares a value and then reduces it modulo 2^521 - 1. + * + * \param result The result, which must be NUM_LIMBS_521BIT limbs in size and + * can be the same array as \a x. + * \param x The value to square, which must be NUM_LIMBS_521BIT limbs in size + * and less than 2^521 - 1. + */ + +/** + * \brief Multiply a value by a single-limb literal modulo 2^521 - 1. + * + * \param result The result, which must be NUM_LIMBS_521BIT limbs in size and + * can be the same array as \a x. + * \param x The first value to multiply, which must be NUM_LIMBS_521BIT limbs + * in size and less than 2^521 - 1. + * \param y The second value to multiply, which must be less than 128. + */ +void P521::mulLiteral(limb_t *result, const limb_t *x, limb_t y) +{ + uint8_t index; + dlimb_t carry = 0; + const limb_t *xx = x; + limb_t *rr = result; + + // Multiply x by the literal and put it into the result array. + // We assume that y is small enough that overflow from the + // highest limb will not occur during this process. + for (index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry += ((dlimb_t)(*xx++)) * y; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + + // Reduce the value modulo 2^521 - 1. The high half is only a + // single limb, so we can short-cut some of reduce() here. +#if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT || BIGNUMBER_LIMB_64BIT + limb_t word = result[NUM_LIMBS_521BIT - 1]; + carry = (word >> 9) + 1; + word &= 0x1FF; + rr = result; + for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) { + carry += *rr; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + carry += word; + word = (limb_t)carry; + *rr = word; + + // If the carry out was 1, then mask it off and we have the answer. + // If the carry out was 0, then we need to add 2^521 - 1 back again. + // To preserve the timing we perform a conditional subtract of 1 and + // then mask off the high bits. + carry = ((word >> 9) ^ 0x01) & 0x01; + rr = result; + for (index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry = ((dlimb_t)(*rr)) - carry; + *rr++ = (limb_t)carry; + carry = (carry >> LIMB_BITS) & 0x01; + } + *(--rr) &= 0x1FF; +#elif BIGNUMBER_LIMB_8BIT + // Same as above, but for 8-bit limbs. + limb_t word = result[NUM_LIMBS_521BIT - 1]; + carry = (word >> 1) + 1; + word &= 0x01; + rr = result; + for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) { + carry += *rr; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + carry += word; + word = (limb_t)carry; + *rr = word; + carry = ((word >> 1) ^ 0x01) & 0x01; + rr = result; + for (index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry = ((dlimb_t)(*rr)) - carry; + *rr++ = (limb_t)carry; + carry = (carry >> LIMB_BITS) & 0x01; + } + *(--rr) &= 0x01; +#endif +} + +/** + * \brief Adds two values and then reduces the result modulo 2^521 - 1. + * + * \param result The result, which must be NUM_LIMBS_521BIT limbs in size + * and can be the same array as \a x or \a y. + * \param x The first value to multiply, which must be NUM_LIMBS_521BIT + * limbs in size and less than 2^521 - 1. + * \param y The second value to multiply, which must be NUM_LIMBS_521BIT + * limbs in size and less than 2^521 - 1. + */ +void P521::add(limb_t *result, const limb_t *x, const limb_t *y) +{ + dlimb_t carry = 0; + limb_t *rr = result; + for (uint8_t posn = 0; posn < NUM_LIMBS_521BIT; ++posn) { + carry += *x++; + carry += *y++; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + reduceQuick(result); +} + +/** + * \brief Subtracts two values and then reduces the result modulo 2^521 - 1. + * + * \param result The result, which must be NUM_LIMBS_521BIT limbs in size + * and can be the same array as \a x or \a y. + * \param x The first value to multiply, which must be NUM_LIMBS_521BIT + * limbs in size and less than 2^521 - 1. + * \param y The second value to multiply, which must be NUM_LIMBS_521BIT + * limbs in size and less than 2^521 - 1. + */ +void P521::sub(limb_t *result, const limb_t *x, const limb_t *y) +{ + dlimb_t borrow; + uint8_t posn; + limb_t *rr = result; + + // Subtract y from x to generate the intermediate result. + borrow = 0; + for (posn = 0; posn < NUM_LIMBS_521BIT; ++posn) { + borrow = ((dlimb_t)(*x++)) - (*y++) - ((borrow >> LIMB_BITS) & 0x01); + *rr++ = (limb_t)borrow; + } + + // If we had a borrow, then the result has gone negative and we + // have to add 2^521 - 1 to the result to make it positive again. + // The top bits of "borrow" will be all 1's if there is a borrow + // or it will be all 0's if there was no borrow. Easiest is to + // conditionally subtract 1 and then mask off the high bits. + rr = result; + borrow = (borrow >> LIMB_BITS) & 1U; + borrow = ((dlimb_t)(*rr)) - borrow; + *rr++ = (limb_t)borrow; + for (posn = 1; posn < NUM_LIMBS_521BIT; ++posn) { + borrow = ((dlimb_t)(*rr)) - ((borrow >> LIMB_BITS) & 0x01); + *rr++ = (limb_t)borrow; + } +#if BIGNUMBER_LIMB_8BIT + *(--rr) &= 0x01; +#else + *(--rr) &= 0x1FF; +#endif +} + +/** + * \brief Doubles a point represented in Jacobian co-ordinates. + * + * \param xout The X value for the result. + * \param yout The Y value for the result. + * \param zout The Z value for the result. + * \param xin The X value for the point to be doubled. + * \param yin The Y value for the point to be doubled. + * \param zin The Z value for the point to be doubled. + * + * The output parameters can be the same as the input parameters + * to double in-place. + * + * Reference: http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b + */ +void P521::dblPoint(limb_t *xout, limb_t *yout, limb_t *zout, + const limb_t *xin, const limb_t *yin, + const limb_t *zin) +{ + limb_t alpha[NUM_LIMBS_521BIT]; + limb_t beta[NUM_LIMBS_521BIT]; + limb_t gamma[NUM_LIMBS_521BIT]; + limb_t delta[NUM_LIMBS_521BIT]; + limb_t tmp[NUM_LIMBS_521BIT]; + + // Double the point. If it is the point at infinity (z = 0), + // then zout will still be zero at the end of this process so + // we don't need any special handling for that case. + square(delta, zin); // delta = z^2 + square(gamma, yin); // gamma = y^2 + mul(beta, xin, gamma); // beta = x * gamma + sub(tmp, xin, delta); // alpha = 3 * (x - delta) * (x + delta) + mulLiteral(alpha, tmp, 3); + add(tmp, xin, delta); + mul(alpha, alpha, tmp); + square(xout, alpha); // xout = alpha^2 - 8 * beta + mulLiteral(tmp, beta, 8); + sub(xout, xout, tmp); + add(zout, yin, zin); // zout = (y + z)^2 - gamma - delta + square(zout, zout); + sub(zout, zout, gamma); + sub(zout, zout, delta); + mulLiteral(yout, beta, 4);// yout = alpha * (4 * beta - xout) - 8 * gamma^2 + sub(yout, yout, xout); + mul(yout, alpha, yout); + square(gamma, gamma); + mulLiteral(gamma, gamma, 8); + sub(yout, yout, gamma); + + // Clean up. + strict_clean(alpha); + strict_clean(beta); + strict_clean(gamma); + strict_clean(delta); + strict_clean(tmp); +} + +/** + * \brief Adds two curve points, one represented in Jacobian co-ordinates, + * and the other represented in affine co-ordinates. + * + * \param xout The X value for the result. + * \param yout The Y value for the result. + * \param zout The Z value for the result. + * \param x1 The X value for the first point to add. + * \param y1 The Y value for the first point to add. + * \param z1 The Z value for the first point to add. + * \param x2 The X value for the second point to add. + * \param y2 The Y value for the second point to add. + * + * The output parameters must not overlap with either of the inputs. + * + * The Z value of the second point is implicitly assumed to be 1. + * + * Reference: http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl + */ +void P521::addPoint(limb_t *xout, limb_t *yout, limb_t *zout, + const limb_t *x1, const limb_t *y1, + const limb_t *z1, const limb_t *x2, + const limb_t *y2) +{ + limb_t z1z1[NUM_LIMBS_521BIT]; + limb_t u2[NUM_LIMBS_521BIT]; + limb_t s2[NUM_LIMBS_521BIT]; + limb_t h[NUM_LIMBS_521BIT]; + limb_t i[NUM_LIMBS_521BIT]; + limb_t j[NUM_LIMBS_521BIT]; + limb_t r[NUM_LIMBS_521BIT]; + limb_t v[NUM_LIMBS_521BIT]; + + // Determine if the first value is the point-at-infinity identity element. + // The second z value is always 1 so it cannot be the point-at-infinity. + limb_t p1IsIdentity = BigNumberUtil::isZero(z1, NUM_LIMBS_521BIT); + + // Multiply the points, assuming that z2 = 1. + square(z1z1, z1); // z1z1 = z1^2 + mul(u2, x2, z1z1); // u2 = x2 * z1z1 + mul(s2, y2, z1); // s2 = y2 * z1 * z1z1 + mul(s2, s2, z1z1); + sub(h, u2, x1); // h = u2 - x1 + mulLiteral(i, h, 2); // i = (2 * h)^2 + square(i, i); + sub(r, s2, y1); // r = 2 * (s2 - y1) + add(r, r, r); + mul(j, h, i); // j = h * i + mul(v, x1, i); // v = x1 * i + square(xout, r); // xout = r^2 - j - 2 * v + sub(xout, xout, j); + sub(xout, xout, v); + sub(xout, xout, v); + sub(yout, v, xout); // yout = r * (v - xout) - 2 * y1 * j + mul(yout, r, yout); + mul(j, y1, j); + sub(yout, yout, j); + sub(yout, yout, j); + mul(zout, z1, h); // zout = 2 * z1 * h + add(zout, zout, zout); + + // Select the answer to return. If (x1, y1, z1) was the identity, + // then the answer is (x2, y2, z2). Otherwise it is (xout, yout, zout). + // Conditionally move the second argument over the output if necessary. + cmove(p1IsIdentity, xout, x2); + cmove(p1IsIdentity, yout, y2); + cmove1(p1IsIdentity, zout); // z2 = 1 + + // Clean up. + strict_clean(z1z1); + strict_clean(u2); + strict_clean(s2); + strict_clean(h); + strict_clean(i); + strict_clean(j); + strict_clean(r); + strict_clean(v); +} + +/** + * \brief Conditionally moves \a y into \a x if a selection value is non-zero. + * + * \param select Non-zero to move \a y into \a x, zero to leave \a x unchanged. + * \param x The destination to move into. + * \param y The value to conditionally move. + * + * The move is performed in a way that it should take the same amount of + * time irrespective of the value of \a select. + * + * \sa cmove1() + */ +void P521::cmove(limb_t select, limb_t *x, const limb_t *y) +{ + uint8_t posn; + limb_t dummy; + limb_t sel; + + // Turn "select" into an all-zeroes or all-ones mask. We don't care + // which bit or bits is set in the original "select" value. + sel = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - select) >> LIMB_BITS); + --sel; + + // Move y into x based on "select". + for (posn = 0; posn < NUM_LIMBS_521BIT; ++posn) { + dummy = sel & (*x ^ *y++); + *x++ ^= dummy; + } +} + +/** + * \brief Conditionally moves 1 into \a x if a selection value is non-zero. + * + * \param select Non-zero to move 1 into \a x, zero to leave \a x unchanged. + * \param x The destination to move into. + * + * The move is performed in a way that it should take the same amount of + * time irrespective of the value of \a select. + * + * \sa cmove() + */ +void P521::cmove1(limb_t select, limb_t *x) +{ + uint8_t posn; + limb_t dummy; + limb_t sel; + + // Turn "select" into an all-zeroes or all-ones mask. We don't care + // which bit or bits is set in the original "select" value. + sel = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - select) >> LIMB_BITS); + --sel; + + // Move 1 into x based on "select". + dummy = sel & (*x ^ 1); + *x++ ^= dummy; + for (posn = 1; posn < NUM_LIMBS_521BIT; ++posn) { + dummy = sel & *x; + *x++ ^= dummy; + } +} + +/** + * \brief Computes the reciprocal of a number modulo 2^521 - 1. + * + * \param result The result as a array of NUM_LIMBS_521BIT limbs in size. + * This cannot be the same array as \a x. + * \param x The number to compute the reciprocal for, also NUM_LIMBS_521BIT + * limbs in size. + */ +void P521::recip(limb_t *result, const limb_t *x) +{ + limb_t t1[NUM_LIMBS_521BIT]; + + // The reciprocal is the same as x ^ (p - 2) where p = 2^521 - 1. + // The big-endian hexadecimal expansion of (p - 2) is: + // 01FF FFFFFFF FFFFFFFF ... FFFFFFFF FFFFFFFD + // + // The naive implementation needs to do 2 multiplications per 1 bit and + // 1 multiplication per 0 bit. We can improve upon this by creating a + // pattern 1111 and then shifting and multiplying to create 11111111, + // and then 1111111111111111, and so on for the top 512-bits. + + // Build a 4-bit pattern 1111 in the result. + square(result, x); + mul(result, result, x); + square(result, result); + mul(result, result, x); + square(result, result); + mul(result, result, x); + + // Shift and multiply by increasing powers of two. This turns + // 1111 into 11111111, and then 1111111111111111, and so on. + for (size_t power = 4; power <= 256; power <<= 1) { + square(t1, result); + for (size_t temp = 1; temp < power; ++temp) + square(t1, t1); + mul(result, result, t1); + } + + // Handle the 9 lowest bits of (p - 2), 111111101, from highest to lowest. + for (uint8_t index = 0; index < 7; ++index) { + square(result, result); + mul(result, result, x); + } + square(result, result); + square(result, result); + mul(result, result, x); + + // Clean up. + clean(t1); +} + +/** + * \brief Reduces a number modulo q. + * + * \param result The result array, which must be NUM_LIMBS_521BIT limbs in size. + * \param r The value to reduce, which must be NUM_LIMBS_1042BIT limbs in size. + * + * It is allowed for \a result to be the same as \a r. + */ +void P521::reduceQ(limb_t *result, const limb_t *r) +{ + // Algorithm from: http://en.wikipedia.org/wiki/Barrett_reduction + // + // We assume that r is less than or equal to (q - 1)^2. + // + // We want to compute result = r mod q. Find the smallest k such + // that 2^k > q. In our case, k = 521. Then set m = floor(4^k / q) + // and let r = r - q * floor(m * r / 4^k). This will be the result + // or it will be at most one subtraction of q away from the result. + // + // Note: m is a 522-bit number, which fits in the same number of limbs + // as a 521-bit number assuming that limbs are 8 bits or more in size. + static limb_t const numM[NUM_LIMBS_521BIT] PROGMEM = { + LIMB_PAIR(0x6EC79BF7, 0x449048E1), LIMB_PAIR(0x7663B851, 0xC44A3647), + LIMB_PAIR(0x08F65A2F, 0x8033FEB7), LIMB_PAIR(0x40D06994, 0xAE79787C), + LIMB_PAIR(0x00000005, 0x00000000), LIMB_PAIR(0x00000000, 0x00000000), + LIMB_PAIR(0x00000000, 0x00000000), LIMB_PAIR(0x00000000, 0x00000000), + LIMB_PARTIAL(0x200) + }; + limb_t temp[NUM_LIMBS_1042BIT + NUM_LIMBS_521BIT]; + limb_t temp2[NUM_LIMBS_521BIT]; + + // Multiply r by m. + BigNumberUtil::mul_P(temp, r, NUM_LIMBS_1042BIT, numM, NUM_LIMBS_521BIT); + + // Compute (m * r / 4^521) = (m * r / 2^1042). +#if BIGNUMBER_LIMB_8BIT || BIGNUMBER_LIMB_16BIT + dlimb_t carry = temp[NUM_LIMBS_BITS(1040)] >> 2; + for (uint8_t index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry += ((dlimb_t)(temp[NUM_LIMBS_BITS(1040) + index + 1])) << (LIMB_BITS - 2); + temp2[index] = (limb_t)carry; + carry >>= LIMB_BITS; + } +#elif BIGNUMBER_LIMB_32BIT || BIGNUMBER_LIMB_64BIT + dlimb_t carry = temp[NUM_LIMBS_BITS(1024)] >> 18; + for (uint8_t index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry += ((dlimb_t)(temp[NUM_LIMBS_BITS(1024) + index + 1])) << (LIMB_BITS - 18); + temp2[index] = (limb_t)carry; + carry >>= LIMB_BITS; + } +#endif + + // Multiply (m * r) / 2^1042 by q and subtract it from r. + // We can ignore the high words of the subtraction result + // because they will all turn into zero after the subtraction. + BigNumberUtil::mul_P(temp, temp2, NUM_LIMBS_521BIT, + P521_q, NUM_LIMBS_521BIT); + BigNumberUtil::sub(result, r, temp, NUM_LIMBS_521BIT); + + // Perform a trial subtraction of q from the result to reduce it. + BigNumberUtil::reduceQuick_P(result, result, P521_q, NUM_LIMBS_521BIT); + + // Clean up and exit. + clean(temp); + clean(temp2); +} + +/** + * \brief Multiplies two values and then reduces the result modulo q. + * + * \param result The result, which must be NUM_LIMBS_521BIT limbs in size + * and can be the same array as \a x or \a y. + * \param x The first value to multiply, which must be NUM_LIMBS_521BIT limbs + * in size and less than q. + * \param y The second value to multiply, which must be NUM_LIMBS_521BIT limbs + * in size and less than q. This can be the same array as \a x. + */ +void P521::mulQ(limb_t *result, const limb_t *x, const limb_t *y) +{ + limb_t temp[NUM_LIMBS_1042BIT]; + mulNoReduce(temp, x, y); + reduceQ(result, temp); + strict_clean(temp); +} + +/** + * \brief Computes the reciprocal of a number modulo q. + * + * \param result The result as a array of NUM_LIMBS_521BIT limbs in size. + * This cannot be the same array as \a x. + * \param x The number to compute the reciprocal for, also NUM_LIMBS_521BIT + * limbs in size. + */ +void P521::recipQ(limb_t *result, const limb_t *x) +{ + // Bottom 265 bits of q - 2. The top 256 bits are all-1's. + static limb_t const P521_q_m2[] PROGMEM = { + LIMB_PAIR(0x91386407, 0xbb6fb71e), LIMB_PAIR(0x899c47ae, 0x3bb5c9b8), + LIMB_PAIR(0xf709a5d0, 0x7fcc0148), LIMB_PAIR(0xbf2f966b, 0x51868783), + LIMB_PARTIAL(0x1fa) + }; + + // Raise x to the power of q - 2, mod q. We start with the top + // 256 bits which are all-1's, using a similar technique to recip(). + limb_t t1[NUM_LIMBS_521BIT]; + mulQ(result, x, x); + mulQ(result, result, x); + mulQ(result, result, result); + mulQ(result, result, x); + mulQ(result, result, result); + mulQ(result, result, x); + for (size_t power = 4; power <= 128; power <<= 1) { + mulQ(t1, result, result); + for (size_t temp = 1; temp < power; ++temp) + mulQ(t1, t1, t1); + mulQ(result, result, t1); + } + clean(t1); + + // Deal with the bottom 265 bits from highest to lowest. Square for + // each bit and multiply in x whenever there is a 1 bit. The timing + // is based on the publicly-known constant q - 2, not on the value of x. + size_t bit = 265; + while (bit > 0) { + --bit; + mulQ(result, result, result); + if (pgm_read_limb(&(P521_q_m2[bit / LIMB_BITS])) & + (((limb_t)1) << (bit % LIMB_BITS))) { + mulQ(result, result, x); + } + } +} + +/** + * \brief Generates a k value using the algorithm from RFC 6979. + * + * \param k The value to generate. + * \param hm The hashed message formatted ready to be signed. + * \param x The private key to sign with. + * \param hash The hash algorithm to use. + * \param count Iteration counter for generating new values of k when the + * previous one is rejected. + */ +void P521::generateK(uint8_t k[66], const uint8_t hm[66], + const uint8_t x[66], Hash *hash, uint64_t count) +{ + size_t hlen = hash->hashSize(); + uint8_t V[64]; + uint8_t K[64]; + uint8_t marker; + + // If for some reason a hash function was supplied with more than + // 512 bits of output, truncate hash values to the first 512 bits. + // We cannot support more than this yet. + if (hlen > 64) + hlen = 64; + + // RFC 6979, Section 3.2, Step a. Hash the message, reduce modulo q, + // and produce an octet string the same length as q, bits2octets(H(m)). + // We support hashes up to 512 bits and q is a 521-bit number, so "hm" + // is already the bits2octets(H(m)) value that we need. + + // Steps b and c. Set V to all-ones and K to all-zeroes. + memset(V, 0x01, hlen); + memset(K, 0x00, hlen); + + // Step d. K = HMAC_K(V || 0x00 || x || hm). We make a small + // modification here to append the count value if it is non-zero. + // We use this to generate a new k if we have to re-enter this + // function because the previous one was rejected by sign(). + // This is slightly different to RFC 6979 which says that the + // loop in step h below should be continued. That code path is + // difficult to access, so instead modify K and V in steps d and f. + // This alternative construction is compatible with the second + // variant described in section 3.6 of RFC 6979. + hash->resetHMAC(K, hlen); + hash->update(V, hlen); + marker = 0x00; + hash->update(&marker, 1); + hash->update(x, 66); + hash->update(hm, 66); + if (count) + hash->update(&count, sizeof(count)); + hash->finalizeHMAC(K, hlen, K, hlen); + + // Step e. V = HMAC_K(V) + hash->resetHMAC(K, hlen); + hash->update(V, hlen); + hash->finalizeHMAC(K, hlen, V, hlen); + + // Step f. K = HMAC_K(V || 0x01 || x || hm) + hash->resetHMAC(K, hlen); + hash->update(V, hlen); + marker = 0x01; + hash->update(&marker, 1); + hash->update(x, 66); + hash->update(hm, 66); + if (count) + hash->update(&count, sizeof(count)); + hash->finalizeHMAC(K, hlen, K, hlen); + + // Step g. V = HMAC_K(V) + hash->resetHMAC(K, hlen); + hash->update(V, hlen); + hash->finalizeHMAC(K, hlen, V, hlen); + + // Step h. Generate candidate k values until we find what we want. + for (;;) { + // Step h.1 and h.2. Generate a string of 66 bytes in length. + // T = empty + // while (len(T) < 66) + // V = HMAC_K(V) + // T = T || V + size_t posn = 0; + while (posn < 66) { + size_t temp = 66 - posn; + if (temp > hlen) + temp = hlen; + hash->resetHMAC(K, hlen); + hash->update(V, hlen); + hash->finalizeHMAC(K, hlen, V, hlen); + memcpy(k + posn, V, temp); + posn += temp; + } + + // Step h.3. k = bits2int(T) and exit the loop if k is not in + // the range 1 to q - 1. Note: We have to extract the 521 most + // significant bits of T, which means shifting it right by seven + // bits to put it into the correct form. + for (posn = 65; posn > 0; --posn) + k[posn] = (k[posn - 1] << 1) | (k[posn] >> 7); + k[0] >>= 7; + if (isValidPrivateKey(k)) + break; + + // Generate new K and V values and try again. + // K = HMAC_K(V || 0x00) + // V = HMAC_K(V) + hash->resetHMAC(K, hlen); + hash->update(V, hlen); + marker = 0x00; + hash->update(&marker, 1); + hash->finalizeHMAC(K, hlen, K, hlen); + hash->resetHMAC(K, hlen); + hash->update(V, hlen); + hash->finalizeHMAC(K, hlen, V, hlen); + } + + // Clean up. + clean(V); + clean(K); +} + +/** + * \brief Generates a k value using the algorithm from RFC 6979. + * + * \param k The value to generate. + * \param hm The hashed message formatted ready to be signed. + * \param x The private key to sign with. + * \param count Iteration counter for generating new values of k when the + * previous one is rejected. + * + * This override uses SHA512 to generate k values. It is used when + * sign() was not passed an explicit hash object by the application. + */ +void P521::generateK(uint8_t k[66], const uint8_t hm[66], + const uint8_t x[66], uint64_t count) +{ + SHA512 hash; + generateK(k, hm, x, &hash, count); +} diff --git a/src/P521.h b/src/P521.h new file mode 100644 index 0000000..09ac02f --- /dev/null +++ b/src/P521.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_P521_h +#define CRYPTO_P521_h + +#include "BigNumberUtil.h" + +class Hash; + +class P521 +{ +public: + + static bool eval(uint8_t result[132], const uint8_t f[66], const uint8_t point[132]); + + static void dh1(uint8_t k[132], uint8_t f[66]); + static bool dh2(const uint8_t k[132], uint8_t f[66]); + + static void sign(uint8_t signature[132], const uint8_t privateKey[66], + const void *message, size_t len, Hash *hash = 0); + static bool verify(const uint8_t signature[132], + const uint8_t publicKey[132], + const void *message, size_t len, Hash *hash = 0); + + static void generatePrivateKey(uint8_t privateKey[66]); + static void derivePublicKey(uint8_t publicKey[132], const uint8_t privateKey[66]); + + static bool isValidPrivateKey(const uint8_t privateKey[66]); + static bool isValidPublicKey(const uint8_t publicKey[132]); + + static bool isValidCurvePoint(const uint8_t point[132]) + { + return isValidPublicKey(point); + } + +#if defined(TEST_P521_FIELD_OPS) +public: +#else +private: +#endif + static void evaluate(limb_t *x, limb_t *y, const uint8_t f[66]); + + static void addAffine(limb_t *x1, limb_t *y1, + const limb_t *x2, const limb_t *y2); + + static bool validate(const limb_t *x, const limb_t *y); + static bool inRange(const limb_t *x); + + static void reduce(limb_t *result, const limb_t *x); + static void reduceQuick(limb_t *x); + + static void mulNoReduce(limb_t *result, const limb_t *x, const limb_t *y); + + static void mul(limb_t *result, const limb_t *x, const limb_t *y); + static void square(limb_t *result, const limb_t *x) + { + mul(result, x, x); + } + + static void mulLiteral(limb_t *result, const limb_t *x, limb_t y); + + static void add(limb_t *result, const limb_t *x, const limb_t *y); + static void sub(limb_t *result, const limb_t *x, const limb_t *y); + + static void dblPoint(limb_t *xout, limb_t *yout, limb_t *zout, + const limb_t *xin, const limb_t *yin, + const limb_t *zin); + static void addPoint(limb_t *xout, limb_t *yout, limb_t *zout, + const limb_t *x1, const limb_t *y1, + const limb_t *z1, const limb_t *x2, + const limb_t *y2); + + static void cmove(limb_t select, limb_t *x, const limb_t *y); + static void cmove1(limb_t select, limb_t *x); + + static void recip(limb_t *result, const limb_t *x); + + static void reduceQ(limb_t *result, const limb_t *r); + static void mulQ(limb_t *result, const limb_t *x, const limb_t *y); + static void recipQ(limb_t *result, const limb_t *x); + + static void generateK(uint8_t k[66], const uint8_t hm[66], + const uint8_t x[66], Hash *hash, uint64_t count); + static void generateK(uint8_t k[66], const uint8_t hm[66], + const uint8_t x[66], uint64_t count); + + // Constructor and destructor are private - cannot instantiate this class. + P521() {} + ~P521() {} +}; + +#endif diff --git a/src/Poly1305.cpp b/src/Poly1305.cpp new file mode 100644 index 0000000..c9e2851 --- /dev/null +++ b/src/Poly1305.cpp @@ -0,0 +1,345 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "Poly1305.h" +#include "Crypto.h" +#include "utility/EndianUtil.h" +#include "utility/LimbUtil.h" +#include + +/** + * \class Poly1305 Poly1305.h + * \brief Poly1305 message authenticator + * + * Poly1305 is a message authenticator designed by Daniel J. Bernstein. + * An arbitrary-length message is broken up into 16-byte chunks and fed + * into a polynomial mod 2130 - 5 based on the 16-byte + * authentication key. The final polynomial value is then combined with a + * 16-byte nonce to create the authentication token. + * + * The following example demonstrates how to compute an authentication token + * for a message made up of several blocks under a specific key and nonce: + * + * \code + * Poly1305 poly1305; + * uint8_t token[16]; + * poly1305.reset(key); + * poly1305.update(block1, sizeof(block1)); + * poly1305.update(block2, sizeof(block2)); + * ... + * poly1305.update(blockN, sizeof(blockN)); + * poly1305.finalize(nonce, token, sizeof(token)); + * \endcode + * + * In the original Poly1305 specification, the nonce was encrypted with AES + * and a second 16-byte key. Since then, common practice has been for the + * caller to encrypt the nonce which gives the caller more flexibility as + * to how to derive and/or encrypt the nonce. + * + * References: http://en.wikipedia.org/wiki/Poly1305-AES, + * http://cr.yp.to/mac.html + */ + +// Limb array with enough space for 130 bits. +#define NUM_LIMBS_130BIT (NUM_LIMBS_128BIT + 1) + +// Endian helper macros for limbs and arrays of limbs. +#if BIGNUMBER_LIMB_8BIT +#define lelimbtoh(x) (x) +#define htolelimb(x) (x) +#elif BIGNUMBER_LIMB_16BIT +#define lelimbtoh(x) (le16toh((x))) +#define htolelimb(x) (htole16((x))) +#elif BIGNUMBER_LIMB_32BIT +#define lelimbtoh(x) (le32toh((x))) +#define htolelimb(x) (htole32((x))) +#elif BIGNUMBER_LIMB_64BIT +#define lelimbtoh(x) (le64toh((x))) +#define htolelimb(x) (htole64((x))) +#endif +#if defined(CRYPTO_LITTLE_ENDIAN) +#define littleToHost(r,size) do { ; } while (0) +#else +#define littleToHost(r,size) \ + do { \ + for (uint8_t i = 0; i < (size); ++i) \ + (r)[i] = lelimbtoh((r)[i]); \ + } while (0) +#endif + +/** + * \brief Constructs a new Poly1305 message authenticator. + */ +Poly1305::Poly1305() +{ + state.chunkSize = 0; +} + +/** + * \brief Destroys this Poly1305 message authenticator after clearing all + * sensitive information. + */ +Poly1305::~Poly1305() +{ + clean(state); +} + +/** + * \brief Resets the Poly1305 message authenticator for a new session. + * + * \param key Points to the 16 byte authentication key. + * + * \sa update(), finalize() + */ +void Poly1305::reset(const void *key) +{ + // Copy the key into place and clear the bits we don't need. + uint8_t *r = (uint8_t *)state.r; + memcpy(r, key, 16); + r[3] &= 0x0F; + r[4] &= 0xFC; + r[7] &= 0x0F; + r[8] &= 0xFC; + r[11] &= 0x0F; + r[12] &= 0xFC; + r[15] &= 0x0F; + + // Convert into little-endian if necessary. + littleToHost(state.r, NUM_LIMBS_128BIT); + + // Reset the hashing process. + state.chunkSize = 0; + memset(state.h, 0, sizeof(state.h)); +} + +/** + * \brief Updates the message authenticator with more data. + * + * \param data Data to be hashed. + * \param len Number of bytes of data to be hashed. + * + * If finalize() has already been called, then the behavior of update() will + * be undefined. Call reset() first to start a new authentication process. + * + * \sa pad(), reset(), finalize() + */ +void Poly1305::update(const void *data, size_t len) +{ + // Break the input up into 128-bit chunks and process each in turn. + const uint8_t *d = (const uint8_t *)data; + while (len > 0) { + uint8_t size = 16 - state.chunkSize; + if (size > len) + size = len; + memcpy(((uint8_t *)state.c) + state.chunkSize, d, size); + state.chunkSize += size; + len -= size; + d += size; + if (state.chunkSize == 16) { + littleToHost(state.c, NUM_LIMBS_128BIT); + state.c[NUM_LIMBS_128BIT] = 1; + processChunk(); + state.chunkSize = 0; + } + } +} + +/** + * \brief Finalizes the authentication process and returns the token. + * + * \param nonce Points to the 16-bit nonce to combine with the token. + * \param token The buffer to return the token value in. + * \param len The length of the \a token buffer between 0 and 16. + * + * If \a len is less than 16, then the token value will be truncated to + * the first \a len bytes. If \a len is greater than 16, then the remaining + * bytes will left unchanged. + * + * If finalize() is called again, then the returned \a token value is + * undefined. Call reset() first to start a new authentication process. + * + * \sa reset(), update() + */ +void Poly1305::finalize(const void *nonce, void *token, size_t len) +{ + dlimb_t carry; + uint8_t i; + limb_t t[NUM_LIMBS_256BIT + 1]; + + // Pad and flush the final chunk. + if (state.chunkSize > 0) { + uint8_t *c = (uint8_t *)state.c; + c[state.chunkSize] = 1; + memset(c + state.chunkSize + 1, 0, 16 - state.chunkSize - 1); + littleToHost(state.c, NUM_LIMBS_128BIT); + state.c[NUM_LIMBS_128BIT] = 0; + processChunk(); + } + + // At this point, processChunk() has left h as a partially reduced + // result that is less than (2^130 - 5) * 6. Perform one more + // reduction and a trial subtraction to produce the final result. + + // Multiply the high bits of h by 5 and add them to the 130 low bits. + carry = (dlimb_t)((state.h[NUM_LIMBS_128BIT] >> 2) + + (state.h[NUM_LIMBS_128BIT] & ~((limb_t)3))); + state.h[NUM_LIMBS_128BIT] &= 0x0003; + for (i = 0; i < NUM_LIMBS_128BIT; ++i) { + carry += state.h[i]; + state.h[i] = (limb_t)carry; + carry >>= LIMB_BITS; + } + state.h[i] += (limb_t)carry; + + // Subtract (2^130 - 5) from h by computing t = h + 5 - 2^130. + // The "minus 2^130" step is implicit. + carry = 5; + for (i = 0; i < NUM_LIMBS_130BIT; ++i) { + carry += state.h[i]; + t[i] = (limb_t)carry; + carry >>= LIMB_BITS; + } + + // Borrow occurs if bit 2^130 of the previous t result is zero. + // Carefully turn this into a selection mask so we can select either + // h or t as the final result. We don't care about the highest word + // of the result because we are about to drop it in the next step. + // We have to do it this way to avoid giving away any information + // about the value of h in the instruction timing. + limb_t mask = (~((t[NUM_LIMBS_128BIT] >> 2) & 1)) + 1; + limb_t nmask = ~mask; + for (i = 0; i < NUM_LIMBS_128BIT; ++i) { + state.h[i] = (state.h[i] & nmask) | (t[i] & mask); + } + + // Add the encrypted nonce and format the final hash. + memcpy(state.c, nonce, 16); + littleToHost(state.c, NUM_LIMBS_128BIT); + carry = 0; + for (i = 0; i < NUM_LIMBS_128BIT; ++i) { + carry += state.h[i]; + carry += state.c[i]; + state.h[i] = htolelimb((limb_t)carry); + carry >>= LIMB_BITS; + } + if (len > 16) + len = 16; + memcpy(token, state.h, len); +} + +/** + * \brief Pads the input stream with zero bytes to a multiple of 16. + * + * \sa update() + */ +void Poly1305::pad() +{ + if (state.chunkSize != 0) { + memset(((uint8_t *)state.c) + state.chunkSize, 0, 16 - state.chunkSize); + littleToHost(state.c, NUM_LIMBS_128BIT); + state.c[NUM_LIMBS_128BIT] = 1; + processChunk(); + state.chunkSize = 0; + } +} + +/** + * \brief Clears the authenticator's state, removing all sensitive data. + */ +void Poly1305::clear() +{ + clean(state); +} + +/** + * \brief Processes a single 128-bit chunk of input data. + */ +void Poly1305::processChunk() +{ + limb_t t[NUM_LIMBS_256BIT + 1]; + + // Compute h = ((h + c) * r) mod (2^130 - 5). + + // Start with h += c. We assume that h is less than (2^130 - 5) * 6 + // and that c is less than 2^129, so the result will be less than 2^133. + dlimb_t carry = 0; + uint8_t i, j; + for (i = 0; i < NUM_LIMBS_130BIT; ++i) { + carry += state.h[i]; + carry += state.c[i]; + state.h[i] = (limb_t)carry; + carry >>= LIMB_BITS; + } + + // Multiply h by r. We know that r is less than 2^124 because the + // top 4 bits were AND-ed off by reset(). That makes h * r less + // than 2^257. Which is less than the (2^130 - 6)^2 we want for + // the modulo reduction step that follows. + carry = 0; + limb_t word = state.r[0]; + for (i = 0; i < NUM_LIMBS_130BIT; ++i) { + carry += ((dlimb_t)(state.h[i])) * word; + t[i] = (limb_t)carry; + carry >>= LIMB_BITS; + } + t[NUM_LIMBS_130BIT] = (limb_t)carry; + for (i = 1; i < NUM_LIMBS_128BIT; ++i) { + word = state.r[i]; + carry = 0; + for (j = 0; j < NUM_LIMBS_130BIT; ++j) { + carry += ((dlimb_t)(state.h[j])) * word; + carry += t[i + j]; + t[i + j] = (limb_t)carry; + carry >>= LIMB_BITS; + } + t[i + NUM_LIMBS_130BIT] = (limb_t)carry; + } + + // Reduce h * r modulo (2^130 - 5) by multiplying the high 130 bits by 5 + // and adding them to the low 130 bits. See the explaination in the + // comments for Curve25519::reduce() for a description of how this works. + carry = ((dlimb_t)(t[NUM_LIMBS_128BIT] >> 2)) + + (t[NUM_LIMBS_128BIT] & ~((limb_t)3)); + t[NUM_LIMBS_128BIT] &= 0x0003; + for (i = 0; i < NUM_LIMBS_128BIT; ++i) { + // Shift the next word of t up by (LIMB_BITS - 2) bits and then + // multiply it by 5. Breaking it down, we can add the results + // of shifting up by LIMB_BITS and shifting up by (LIMB_BITS - 2). + // The main wrinkle here is that this can result in an intermediate + // carry that is (LIMB_BITS * 2 + 1) bits in size which doesn't + // fit within a dlimb_t variable. However, we can defer adding + // (word << LIMB_BITS) until after the "carry >>= LIMB_BITS" step + // because it won't affect the low bits of the carry. + word = t[i + NUM_LIMBS_130BIT]; + carry += ((dlimb_t)word) << (LIMB_BITS - 2); + carry += t[i]; + state.h[i] = (limb_t)carry; + carry >>= LIMB_BITS; + carry += word; + } + state.h[i] = (limb_t)(carry + t[NUM_LIMBS_128BIT]); + + // At this point, h is either the answer of reducing modulo (2^130 - 5) + // or it is at most 5 subtractions away from the answer we want. + // Leave it as-is for now with h less than (2^130 - 5) * 6. It is + // still within a range where the next h * r step will not overflow. +} diff --git a/src/Poly1305.h b/src/Poly1305.h new file mode 100644 index 0000000..ae1e453 --- /dev/null +++ b/src/Poly1305.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_POLY1305_h +#define CRYPTO_POLY1305_h + +#include "BigNumberUtil.h" +#include + +class Poly1305 +{ +public: + Poly1305(); + ~Poly1305(); + + void reset(const void *key); + void update(const void *data, size_t len); + void finalize(const void *nonce, void *token, size_t len); + + void pad(); + + void clear(); + +private: + struct { + limb_t h[(16 / sizeof(limb_t)) + 1]; + limb_t c[(16 / sizeof(limb_t)) + 1]; + limb_t r[(16 / sizeof(limb_t))]; + uint8_t chunkSize; + } state; + + void processChunk(); +}; + +#endif diff --git a/src/RNG.cpp b/src/RNG.cpp new file mode 100644 index 0000000..84bbc4e --- /dev/null +++ b/src/RNG.cpp @@ -0,0 +1,1004 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "RNG.h" +#include "NoiseSource.h" +#include "ChaCha.h" +#include "Crypto.h" +#include +#include "utility/ProgMemUtil.h" +#if defined (__arm__) && defined (__SAM3X8E__) +// The Arduino Due does not have any EEPROM natively on the main chip. +// However, it does have a TRNG and flash memory. +#define RNG_DUE_TRNG 1 +#elif defined(__AVR__) +#define RNG_EEPROM 1 // Use EEPROM to save the seed. +#define RNG_WATCHDOG 1 // Harvest entropy from watchdog jitter. +#include +#include +#include +#define RNG_EEPROM_ADDRESS (E2END + 1 - RNGClass::SEED_SIZE) +#elif defined(ESP8266) +// ESP8266 does not have EEPROM but it does have SPI flash memory. +// It also has a TRNG register for generating "true" random numbers. +// For now we use the TRNG but don't save the seed in flash memory. +#define RNG_WORD_TRNG 1 +#define RNG_WORD_TRNG_GET() (ESP8266_DREG(0x20E44)) +#elif defined(ESP32) +// ESP32 has a word-based TRNG and an API for Non-Volatile Storage (NVS). +#define RNG_WORD_TRNG 1 +#define RNG_WORD_TRNG_GET() (esp_random()) +#define RNG_ESP_NVS 1 +#include +#endif +#include + +// Throw a warning if there is no built-in hardware random number source. +// If this happens, then you need to do one of two things: +// 1. Edit RNG.cpp to add your platform's hardware TRNG. +// 2. Provide a proper noise source like TransistorNoiseSource +// in your sketch and then comment out the #warning line below. +#if !defined(RNG_DUE_TRNG) && \ + !defined(RNG_WATCHDOG) && \ + !defined(RNG_WORD_TRNG) +#warning "no hardware random number source detected for this platform" +#endif + +/** + * \class RNGClass RNG.h + * \brief Pseudo random number generator suitable for cryptography. + * + * Random number generators must be seeded properly before they can + * be used or an adversary may be able to predict the random output. + * Seed data may be: + * + * \li Device-specific, for example serial numbers or MAC addresses. + * \li Application-specific, unique to the application. The tag that is + * passed to begin() is an example of an application-specific value. + * \li Noise-based, generated by a hardware random number generator + * that provides unpredictable values from a noise source. + * + * The following example demonstrates how to initialise the random + * number generator: + * + * \code + * #include + * #include + * #include + * #include + * #include + * + * // Noise source to seed the random number generator. + * TransistorNoiseSource noise(A1); + * + * // MAC address for Ethernet communication. + * byte mac_address[] = {0xDE, 0xAD, 0xBE, 0xEF, 0xFE, 0xED}; + * + * void setup() { + * // Initialize the Ethernet shield. + * Ethernet.begin(mac_address); + * + * // Initialize the random number generator with the application tag + * // "MyApp 1.0" and load the previous seed from EEPROM. + * RNG.begin("MyApp 1.0"); + * + * // Stir in the Ethernet MAC address. + * RNG.stir(mac_address, sizeof(mac_address)); + * + * // Add the noise source to the list of sources known to RNG. + * RNG.addNoiseSource(noise); + * + * // ... + * } + * \endcode + * + * The application should regularly call loop() to stir in new data + * from the registered noise sources and to periodically save the seed: + * + * \code + * void loop() { + * // ... + * + * // Perform regular housekeeping on the random number generator. + * RNG.loop(); + * + * // ... + * } + * \endcode + * + * The loop() function will automatically save the random number seed on a + * regular basis to the last SEED_SIZE bytes of EEPROM memory. By default + * the seed is saved every hour but this can be changed using setAutoSaveTime(). + * + * Keep in mind that saving too often may cause the EEPROM to wear out quicker. + * It is wise to limit saving to once an hour or once a day depending + * upon how long you intend to field the device before replacing it. + * For example, an EEPROM rated for 100k erase/write cycles will last about + * 69 days saving once a minute or 11 years saving once an hour. + * + * The application can still elect to call save() at any time if wants. + * For example, if the application can detect power loss or shutdown + * conditions programmatically, then it may make sense to force a save() + * of the seed upon shutdown. + * + * The Arduino Due does not have EEPROM so RNG saves the seed into + * the last page of system flash memory instead. The RNG class will also + * mix in data from the CPU's built-in True Random Number Generator (TRNG). + * Assuming that the CPU's TRNG is trustworthy, this should be sufficient + * to properly seed the random number generator. It is recommended to + * also mix in data from other noise sources just in case the CPU's TRNG + * is not trustworthy. + * + * \sa NoiseSource + */ + +/** + * \brief Global random number generator instance. + * + * \sa RNGClass + */ +RNGClass RNG; + +/** + * \var RNGClass::SEED_SIZE + * \brief Size of a saved random number seed in EEPROM space. + * + * The seed is saved into the last SEED_SIZE bytes of EEPROM memory. + * The address is dependent upon the size of EEPROM fitted in the device. + */ + +// Number of ChaCha hash rounds to use for random number generation. +#define RNG_ROUNDS 20 + +// Force a rekey after this many blocks of random data. +#define RNG_REKEY_BLOCKS 16 + +// Maximum entropy credit that can be contained in the pool. +#define RNG_MAX_CREDITS 384u + +/** @cond */ + +// Imported from Crypto.cpp. +extern uint8_t crypto_crc8(uint8_t tag, const void *data, unsigned size); + +// Tag for 256-bit ChaCha20 keys. This will always appear in the +// first 16 bytes of the block. The remaining 48 bytes are the seed. +static const char tagRNG[16] PROGMEM = { + 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', + '2', '-', 'b', 'y', 't', 'e', ' ', 'k' +}; + +// Initialization seed. This is the ChaCha20 output of hashing +// "expand 32-byte k" followed by 48 bytes set to the numbers 1 to 48. +// The ChaCha20 output block is then truncated to the first 48 bytes. +// +// This value is intended to start the RNG in a semi-chaotic state if +// we don't have a previously saved seed in EEPROM. +static const uint8_t initRNG[48] PROGMEM = { + 0xB0, 0x2A, 0xAE, 0x7D, 0xEE, 0xCB, 0xBB, 0xB1, + 0xFC, 0x03, 0x6F, 0xDD, 0xDC, 0x7D, 0x76, 0x67, + 0x0C, 0xE8, 0x1F, 0x0D, 0xA3, 0xA0, 0xAA, 0x1E, + 0xB0, 0xBD, 0x72, 0x6B, 0x2B, 0x4C, 0x8A, 0x7E, + 0x34, 0xFC, 0x37, 0x60, 0xF4, 0x1E, 0x22, 0xA0, + 0x0B, 0xFB, 0x18, 0x84, 0x60, 0xA5, 0x77, 0x72 +}; + +#if defined(RNG_WATCHDOG) + +// Use jitter between the watchdog timer and the main CPU clock to +// harvest some entropy on AVR-based systems. This technique comes from: +// +// https://sites.google.com/site/astudyofentropy/project-definition/timer-jitter-entropy-sources/entropy-library +// +// The watchdog generates entropy very slowly - it can take around 32 seconds +// to generate 256 bits of entropy credit. This is a "better than nothing" +// entropy source but a real noise source is definitely recommended. + +// Helper macros for specific 32-bit shift counts. +#define leftShift3(value) ((value) << 3) +#define leftShift10(value) ((value) << 10) +#define leftShift15(value) ((value) << 15) +#define rightShift6(value) ((value) >> 6) +#define rightShift11(value) ((value) >> 11) + +static uint32_t volatile hash = 0; +static uint8_t volatile outBits = 0; + +// Watchdog interrupt handler. This fires off every 16ms. We collect +// 32 bits and then pass them off onto RNGClass::loop(). +ISR(WDT_vect) +{ + // Read the low byte of Timer 1. We assume that the timer was + // initialized by the Arduino startup code for PWM use or that the + // application is free-running Timer 1 for its own purposes. + // Timer 0 is used on systems that don't have a Timer 1. +#if defined(TCNT1L) + unsigned char value = TCNT1L; +#elif defined(TCNT0L) + unsigned char value = TCNT0L; +#else + unsigned char value = TCNT0; +#endif + // Use Jenkin's one-at-a-time hash function to scatter the entropy a bit. + // https://en.wikipedia.org/wiki/Jenkins_hash_function + hash += value; + hash += leftShift10(hash); + hash ^= rightShift6(hash); + ++outBits; +} + +#endif // RNG_WATCHDOG + +/** @endcond */ + +/** + * \brief Constructs a new random number generator instance. + * + * This constructor must be followed by a call to begin() to + * properly initialize the random number generator. + * + * \sa begin() + */ +RNGClass::RNGClass() + : credits(0) + , firstSave(1) + , initialized(0) + , trngPending(0) + , timer(0) + , timeout(3600000UL) // 1 hour in milliseconds + , count(0) + , trngPosn(0) +{ +} + +/** + * \brief Destroys this random number generator instance. + */ +RNGClass::~RNGClass() +{ +#if defined(RNG_DUE_TRNG) + // Disable the TRNG in the Arduino Due. + REG_TRNG_CR = TRNG_CR_KEY(0x524E47); +#endif +#if defined(RNG_WATCHDOG) + // Disable interrupts and reset the watchdog. + cli(); + wdt_reset(); + + // Clear the "reset due to watchdog" flag. + MCUSR &= ~(1 << WDRF); + + // Disable the watchdog. + _WD_CONTROL_REG |= (1 << _WD_CHANGE_BIT) | (1 << WDE); + _WD_CONTROL_REG = 0; + + // Re-enable interrupts. The watchdog should be stopped. + sei(); +#endif + clean(block); + clean(stream); +} + +#if defined(RNG_DUE_TRNG) + +// Find the flash memory of interest. Allow for the possibility +// of other SAM-based Arduino variants in the future. +#if defined(IFLASH1_ADDR) +#define RNG_FLASH_ADDR IFLASH1_ADDR +#define RNG_FLASH_SIZE IFLASH1_SIZE +#define RNG_FLASH_PAGE_SIZE IFLASH1_PAGE_SIZE +#define RNG_EFC EFC1 +#elif defined(IFLASH0_ADDR) +#define RNG_FLASH_ADDR IFLASH0_ADDR +#define RNG_FLASH_SIZE IFLASH0_SIZE +#define RNG_FLASH_PAGE_SIZE IFLASH0_PAGE_SIZE +#define RNG_EFC EFC0 +#else +#define RNG_FLASH_ADDR IFLASH_ADDR +#define RNG_FLASH_SIZE IFLASH_SIZE +#define RNG_FLASH_PAGE_SIZE IFLASH_PAGE_SIZE +#define RNG_EFC EFC +#endif + +// Address of the flash page to use for saving the seed on the Due. +// All SAM variants have a page size of 256 bytes or greater so there is +// plenty of room for the 48 byte seed in the last page of flash memory. +#define RNG_SEED_ADDR (RNG_FLASH_ADDR + RNG_FLASH_SIZE - RNG_FLASH_PAGE_SIZE) +#define RNG_SEED_PAGE ((RNG_FLASH_SIZE / RNG_FLASH_PAGE_SIZE) - 1) + +// Stir in the unique identifier for the Arduino Due's CPU. +// This function must be in RAM because programs running out of +// flash memory are not allowed to access the unique identifier. +// Info from: http://forum.arduino.cc/index.php?topic=289190.0 +__attribute__((section(".ramfunc"))) +static void stirUniqueIdentifier(void) +{ + uint32_t id[4]; + + // Start Read Unique Identifier. + RNG_EFC->EEFC_FCR = (0x5A << 24) | EFC_FCMD_STUI; + while ((RNG_EFC->EEFC_FSR & EEFC_FSR_FRDY) != 0) + ; // do nothing until FRDY falls. + + // Read the identifier. + id[0] = *((const uint32_t *)RNG_FLASH_ADDR); + id[1] = *((const uint32_t *)(RNG_FLASH_ADDR + 4)); + id[2] = *((const uint32_t *)(RNG_FLASH_ADDR + 8)); + id[3] = *((const uint32_t *)(RNG_FLASH_ADDR + 12)); + + // Stop Read Unique Identifier. + RNG_EFC->EEFC_FCR = (0x5A << 24) | EFC_FCMD_SPUI; + while ((RNG_EFC->EEFC_FSR & EEFC_FSR_FRDY) == 0) + ; // do nothing until FRDY rises. + + // Stir the unique identifier into the entropy pool. + RNG.stir((uint8_t *)id, sizeof(id)); +} + +// Erases the flash page containing the seed and then writes the new seed. +// It is assumed the seed has already been loaded into the latch registers. +__attribute__((section(".ramfunc"))) +static void eraseAndWriteSeed() +{ + // Execute the "Erase and Write Page" command. + RNG_EFC->EEFC_FCR = (0x5A << 24) | (RNG_SEED_PAGE << 8) | EFC_FCMD_EWP; + + // Wait for the FRDY bit to be raised. + while ((RNG_EFC->EEFC_FSR & EEFC_FSR_FRDY) == 0) + ; // do nothing until FRDY rises. +} + +#endif + +/** + * \brief Initializes the random number generator. + * + * \param tag A string that is stirred into the random pool at startup; + * usually this should be a value that is unique to the application and + * version such as "MyApp 1.0" so that different applications do not + * generate the same sequence of values upon first boot. + * + * This function should be followed by calls to addNoiseSource() to + * register the application's noise sources. + * + * \sa addNoiseSource(), stir(), save() + */ +void RNGClass::begin(const char *tag) +{ + // Bail out if we have already done this. + if (initialized) + return; + + // Initialize the ChaCha20 input block from the saved seed. + memcpy_P(block, tagRNG, sizeof(tagRNG)); + memcpy_P(block + 4, initRNG, sizeof(initRNG)); +#if defined(RNG_EEPROM) + int address = RNG_EEPROM_ADDRESS; + eeprom_read_block(stream, (const void *)address, SEED_SIZE); + if (crypto_crc8('S', stream, SEED_SIZE - 1) == + ((const uint8_t *)stream)[SEED_SIZE - 1]) { + // We have a saved seed: XOR it with the initialization block. + // Note: the CRC-8 value is included. No point throwing it away. + for (int posn = 0; posn < 12; ++posn) + block[posn + 4] ^= stream[posn]; + } +#elif defined(RNG_DUE_TRNG) + // Do we have a seed saved in the last page of flash memory on the Due? + if (crypto_crc8('S', ((const uint32_t *)RNG_SEED_ADDR) + 1, SEED_SIZE) + == ((const uint32_t *)RNG_SEED_ADDR)[0]) { + // XOR the saved seed with the initialization block. + for (int posn = 0; posn < 12; ++posn) + block[posn + 4] ^= ((const uint32_t *)RNG_SEED_ADDR)[posn + 1]; + } + + // If the device has just been reprogrammed, there will be no saved seed. + // XOR the initialization block with some output from the CPU's TRNG + // to permute the state in a first boot situation after reprogramming. + pmc_enable_periph_clk(ID_TRNG); + REG_TRNG_CR = TRNG_CR_KEY(0x524E47) | TRNG_CR_ENABLE; + REG_TRNG_IDR = TRNG_IDR_DATRDY; // Disable interrupts - we will poll. + mixTRNG(); +#endif +#if defined(RNG_ESP_NVS) + // Do we have a seed saved in ESP non-volatile storage (NVS)? + nvs_handle handle = 0; + if (nvs_open("rng", NVS_READONLY, &handle) == 0) { + size_t len = 0; + if (nvs_get_blob(handle, "seed", NULL, &len) == 0 && len == SEED_SIZE) { + uint32_t seed[12]; + if (nvs_get_blob(handle, "seed", seed, &len) == 0) { + for (int posn = 0; posn < 12; ++posn) + block[posn + 4] ^= seed[posn]; + } + clean(seed); + } + nvs_close(handle); + } +#endif +#if defined(RNG_WORD_TRNG) + // Mix in some output from a word-based TRNG to initialize the state. + mixTRNG(); +#endif + + // No entropy credits for the saved seed. + credits = 0; + + // Trigger an automatic save once the entropy credits max out. + firstSave = 1; + + // Rekey the random number generator immediately. + rekey(); + + // Stir in the supplied tag data but don't credit any entropy to it. + if (tag) + stir((const uint8_t *)tag, strlen(tag)); + +#if defined(RNG_DUE_TRNG) + // Stir in the unique identifier for the CPU so that different + // devices will give different outputs even without seeding. + stirUniqueIdentifier(); +#elif defined(ESP8266) + // ESP8266's have a 32-bit CPU chip ID and 32-bit flash chip ID + // that we can use as a device unique identifier. + uint32_t ids[2]; + ids[0] = ESP.getChipId(); + ids[1] = ESP.getFlashChipId(); + stir((const uint8_t *)ids, sizeof(ids)); +#elif defined(ESP32) + // ESP32's have a MAC address that can be used as a device identifier. + uint64_t mac = ESP.getEfuseMac(); + stir((const uint8_t *)&mac, sizeof(mac)); +#else + // AVR devices don't have anything like a serial number so it is + // difficult to make every device unique. Use the compilation + // time and date to provide a little randomness across applications + // if not across devices running the same pre-compiled application. + tag = __TIME__ __DATE__; + stir((const uint8_t *)tag, strlen(tag)); +#endif + +#if defined(RNG_WATCHDOG) + // Disable interrupts and reset the watchdog. + cli(); + wdt_reset(); + + // Clear the "reset due to watchdog" flag. + MCUSR &= ~(1 << WDRF); + + // Enable the watchdog with the smallest duration (16ms) + // and interrupt-only mode. + _WD_CONTROL_REG |= (1 << _WD_CHANGE_BIT) | (1 << WDE); + _WD_CONTROL_REG = (1 << WDIE); + + // Re-enable interrupts. The watchdog should be running. + sei(); +#endif + + // Re-save the seed to obliterate the previous value and to ensure + // that if the system is reset without a call to save() that we won't + // accidentally generate the same sequence of random data again. + save(); + + // The RNG has now been initialized. + initialized = 1; +} + +/** + * \brief Adds a noise source to the random number generator. + * + * \param source The noise source to add, which will be polled regularly + * by loop() to accumulate noise-based entropy from the source. + * + * RNG supports a maximum of four noise sources. If the application needs + * more than that then the application must poll the noise sources itself by + * calling NoiseSource::stir() directly. + * + * \sa loop(), begin() + */ +void RNGClass::addNoiseSource(NoiseSource &source) +{ + #define MAX_NOISE_SOURCES (sizeof(noiseSources) / sizeof(noiseSources[0])) + if (count < MAX_NOISE_SOURCES) { + noiseSources[count++] = &source; + source.added(); + } +} + +/** + * \brief Sets the amount of time between automatic seed saves. + * + * \param minutes The number of minutes between automatic seed saves. + * + * The default time between automatic seed saves is 1 hour. + * + * This function is intended to help with EEPROM wear by slowing down how + * often seed data is saved as noise is stirred into the random pool. + * The exact period to use depends upon how long you intend to field + * the device before replacing it. For example, an EEPROM rated for + * 100k erase/write cycles will last about 69 days saving once a minute + * or 11 years saving once an hour. + * + * \sa save(), stir() + */ +void RNGClass::setAutoSaveTime(uint16_t minutes) +{ + if (!minutes) + minutes = 1; // Just in case. + timeout = ((uint32_t)minutes) * 60000U; +} + +/** + * \brief Generates random bytes into a caller-supplied buffer. + * + * \param data Points to the buffer to fill with random bytes. + * \param len Number of bytes to generate. + * + * Calling this function will decrease the amount of entropy in the + * random number pool by \a len * 8 bits. If there isn't enough + * entropy, then this function will still return \a len bytes of + * random data generated from what entropy it does have. + * + * If the application requires a specific amount of entropy before + * generating important values, the available() function can be + * polled to determine when sufficient entropy is available. + * + * \sa available(), stir() + */ +void RNGClass::rand(uint8_t *data, size_t len) +{ + // Make sure that the RNG is initialized in case the application + // forgot to call RNG.begin() at startup time. + if (!initialized) + begin(0); + + // Decrease the amount of entropy in the pool. + if ( (uint16_t)len > (credits / 8)) + credits = 0; + else + credits -= len * 8; + + // If we have pending TRNG data from the loop() function, + // then force a stir on the state. Otherwise mix in some + // fresh data from the TRNG because it is possible that + // the application forgot to call RNG.loop(). + if (trngPending) { + stir(0, 0, 0); + trngPending = 0; + trngPosn = 0; + } else { + mixTRNG(); + } + + // Generate the random data. + uint8_t count = 0; + while (len > 0) { + // Force a rekey if we have generated too many blocks in this request. + if (count >= RNG_REKEY_BLOCKS) { + rekey(); + count = 1; + } else { + ++count; + } + + // Increment the low counter word and generate a new keystream block. + ++(block[12]); + ChaCha::hashCore(stream, block, RNG_ROUNDS); + + // Copy the data to the return buffer. + if (len < 64) { + memcpy(data, stream, len); + break; + } else { + memcpy(data, stream, 64); + data += 64; + len -= 64; + } + } + + // Force a rekey after every request. + rekey(); +} + +/** + * \brief Determine if there is sufficient entropy available for a + * specific request size. + * + * \param len The number of bytes of random data that will be requested + * via a call to rand(). + * \return Returns true if there is at least \a len * 8 bits of entropy + * in the random number pool, or false if not. + * + * This function can be used by the application to wait for sufficient + * entropy to become available from the system's noise sources before + * generating important values. For example: + * + * \code + * bool haveKey = false; + * byte key[32]; + * + * void loop() { + * ... + * + * if (!haveKey && RNG.available(sizeof(key))) { + * RNG.rand(key, sizeof(key)); + * haveKey = true; + * } + * + * ... + * } + * \endcode + * + * If \a len is larger than the maximum number of entropy credits supported + * by the random number pool (384 bits, 48 bytes), then the maximum will be + * used instead. For example, asking if 512 bits (64 bytes) are available + * will return true if in reality only 384 bits are available. If this is a + * problem for the application's security requirements, then large requests + * for random data should be broken up into smaller chunks with the + * application waiting for the entropy pool to refill between chunks. + * + * \sa rand() + */ +bool RNGClass::available(size_t len) const +{ + if (len >= (RNG_MAX_CREDITS / 8)) + return credits >= RNG_MAX_CREDITS; + else + return (uint16_t)len <= (credits / 8); +} + +/** + * \brief Stirs additional entropy data into the random pool. + * + * \param data Points to the additional data to be stirred in. + * \param len Number of bytes to be stirred in. + * \param credit The number of bits of entropy to credit for the + * data that is stirred in. Note that this is bits, not bytes. + * + * The maximum credit allowed is \a len * 8 bits, indicating that + * every bit in the input \a data is good and random. Practical noise + * sources are rarely that good, so \a credit will usually be smaller. + * For example, to credit 2 bits of entropy per byte, the function + * would be called as follows: + * + * \code + * RNG.stir(noise_data, noise_bytes, noise_bytes * 2); + * \endcode + * + * If \a credit is zero, then the \a data will be stirred in but no + * entropy credit is given. This is useful for static values like + * serial numbers and MAC addresses that are different between + * devices but highly predictable. + * + * \sa loop() + */ +void RNGClass::stir(const uint8_t *data, size_t len, unsigned int credit) +{ + // Increase the entropy credit. + if ((credit / 8) >= len && len) + credit = len * 8; + if ((uint16_t)(RNG_MAX_CREDITS - credits) > credit) + credits += credit; + else + credits = RNG_MAX_CREDITS; + + // Process the supplied input data. + if (len > 0) { + // XOR the data with the ChaCha input block in 48 byte + // chunks and rekey the ChaCha cipher for each chunk to mix + // the data in. This should scatter any "true entropy" in + // the input across the entire block. + while (len > 0) { + size_t templen = len; + if (templen > 48) + templen = 48; + uint8_t *output = ((uint8_t *)block) + 16; + len -= templen; + while (templen > 0) { + *output++ ^= *data++; + --templen; + } + rekey(); + } + } else { + // There was no input data, so just force a rekey so we + // get some mixing of the state even without new data. + rekey(); + } + + // Save if this is the first time we have reached max entropy. + // This provides some protection if the system is powered off before + // the first auto-save timeout occurs. + if (firstSave && credits >= RNG_MAX_CREDITS) { + firstSave = 0; + save(); + } +} + +/** + * \brief Saves the random seed to EEPROM. + * + * During system startup, noise sources typically won't have accumulated + * much entropy. But startup is usually the time when the system most + * needs to generate random data for session keys, IV's, and the like. + * + * The purpose of this function is to pass some of the accumulated entropy + * from one session to the next after a loss of power. Thus, once the system + * has been running for a while it will get progressively better at generating + * random values and the accumulated entropy will not be completely lost. + * + * Normally it isn't necessary to call save() directly. The loop() function + * will automatically save the seed on a periodic basis (default of 1 hour). + * + * The seed that is saved is generated in such a way that it cannot be used + * to predict random values that were generated previously or subsequently + * in the current session. So a compromise of the EEPROM contents of a + * captured device should not result in compromise of random values + * that have already been generated. However, if power is lost and the + * system restarted, then there will be a short period of time where the + * random state will be predictable from the seed. For this reason it is + * very important to stir() in new noise data at startup. + * + * \sa loop(), stir() + */ +void RNGClass::save() +{ + // Generate random data from the current state and save + // that as the seed. Then force a rekey. + ++(block[12]); + ChaCha::hashCore(stream, block, RNG_ROUNDS); +#if defined(RNG_EEPROM) + // We shorten the seed from 48 bytes to 47 to leave room for + // the CRC-8 value. We do this to align the data on an 8-byte + // boundary in EERPOM. + int address = RNG_EEPROM_ADDRESS; + eeprom_write_block(stream, (void *)address, SEED_SIZE - 1); + eeprom_write_byte((uint8_t *)(address + SEED_SIZE - 1), + crypto_crc8('S', stream, SEED_SIZE - 1)); +#elif defined(RNG_DUE_TRNG) + unsigned posn; + ((uint32_t *)(RNG_SEED_ADDR))[0] = crypto_crc8('S', stream, SEED_SIZE); + for (posn = 0; posn < 12; ++posn) + ((uint32_t *)(RNG_SEED_ADDR))[posn + 1] = stream[posn]; + for (posn = 13; posn < (RNG_FLASH_PAGE_SIZE / 4); ++posn) + ((uint32_t *)(RNG_SEED_ADDR))[posn + 13] = 0xFFFFFFFF; + eraseAndWriteSeed(); +#elif defined(RNG_ESP_NVS) + // Save the seed into ESP non-volatile storage (NVS). + nvs_handle handle = 0; + if (nvs_open("rng", NVS_READWRITE, &handle) == 0) { + nvs_erase_all(handle); + nvs_set_blob(handle, "seed", stream, SEED_SIZE); + nvs_commit(handle); + nvs_close(handle); + } +#endif + rekey(); + timer = millis(); +} + +/** + * \brief Run periodic housekeeping tasks on the random number generator. + * + * This function must be called on a regular basis from the application's + * main "loop()" function. + */ +void RNGClass::loop() +{ + // Stir in the entropy from all registered noise sources. + for (uint8_t posn = 0; posn < count; ++posn) + noiseSources[posn]->stir(); + +#if defined(RNG_DUE_TRNG) + // If there is data available from the Arudino Due's TRNG, then XOR + // it with the state block and increase the entropy credit. We don't + // call stir() yet because that will seriously slow down the system + // given how fast the TRNG is. Instead we save up the XOR'ed TRNG + // data until the next rand() call and then hash it to generate the + // desired output. + // + // The CPU documentation claims that the TRNG output is very good so + // this should only make the pool more and more random as time goes on. + // However there is a risk that the CPU manufacturer was pressured by + // government or intelligence agencies to insert a back door that + // generates predictable output. Or the manufacturer was overly + // optimistic about their TRNG design and it is actually flawed in a + // way they don't realise. + // + // If you are concerned about such threats, then make sure to mix in + // data from other noise sources. By hashing together the TRNG with + // the other noise data, rand() should produce unpredictable data even + // if one of the sources is actually predictable. + if ((REG_TRNG_ISR & TRNG_ISR_DATRDY) != 0) { + block[4 + trngPosn] ^= REG_TRNG_ODATA; + if (++trngPosn >= 12) + trngPosn = 0; + if (credits < RNG_MAX_CREDITS) { + // Credit 1 bit of entropy for the word. The TRNG should be + // better than this but it is so fast that we want to collect + // up more data before passing it to the application. + ++credits; + } + trngPending = 1; + } +#elif defined(RNG_WORD_TRNG) + // Read a word from the TRNG and XOR it into the state. + block[4 + trngPosn] ^= RNG_WORD_TRNG_GET(); + if (++trngPosn >= 12) + trngPosn = 0; + if (credits < RNG_MAX_CREDITS) { + // Credit 1 bit of entropy for the word. The TRNG should be + // better than this but it is so fast that we want to collect + // up more data before passing it to the application. + ++credits; + } + trngPending = 1; +#elif defined(RNG_WATCHDOG) + // Read the 32 bit buffer from the WDT interrupt. + cli(); + if (outBits >= 32) { + uint32_t value = hash; + hash = 0; + outBits = 0; + sei(); + + // Final steps of the Jenkin's one-at-a-time hash function. + // https://en.wikipedia.org/wiki/Jenkins_hash_function + value += leftShift3(value); + value ^= rightShift11(value); + value += leftShift15(value); + + // Credit 1 bit of entropy for each byte of input. It can take + // between 30 and 40 seconds to accumulate 256 bits of credit. + credits += 4; + if (credits > RNG_MAX_CREDITS) + credits = RNG_MAX_CREDITS; + + // XOR the word with the state. Stir once we accumulate 48 bytes, + // which happens about once every 6.4 seconds. + block[4 + trngPosn] ^= value; + if (++trngPosn >= 12) { + trngPosn = 0; + trngPending = 0; + stir(0, 0, 0); + } else { + trngPending = 1; + } + } else { + sei(); + } +#endif + + // Save the seed if the auto-save timer has expired. + if ((millis() - timer) >= timeout) + save(); +} + +/** + * \brief Destroys the data in the random number pool and the saved seed + * in EEPROM. + * + * This function attempts to throw away any data that could theoretically be + * used to predict previous and future outputs of the random number generator + * if the device is captured, sold, or otherwise compromised. + * + * After this function is called, begin() must be called again to + * re-initialize the random number generator. + * + * \note The rand() and save() functions take some care to manage the + * random number pool in a way that makes prediction of past outputs from a + * captured state very difficult. Future outputs may be predictable if + * noise or other high-entropy data is not mixed in with stir() on a + * regular basis. + * + * \sa begin() + */ +void RNGClass::destroy() +{ + clean(block); + clean(stream); +#if defined(RNG_EEPROM) + int address = RNG_EEPROM_ADDRESS; + for (int posn = 0; posn < SEED_SIZE; ++posn) + eeprom_write_byte((uint8_t *)(address + posn), 0xFF); +#elif defined(RNG_DUE_TRNG) + for (unsigned posn = 0; posn < (RNG_FLASH_PAGE_SIZE / 4); ++posn) + ((uint32_t *)(RNG_SEED_ADDR))[posn] = 0xFFFFFFFF; + eraseAndWriteSeed(); +#elif defined(RNG_ESP_NVS) + nvs_handle handle = 0; + if (nvs_open("rng", NVS_READWRITE, &handle) == 0) { + nvs_erase_all(handle); + nvs_commit(handle); + nvs_close(handle); + } +#endif + initialized = 0; +} + +/** + * \brief Rekeys the random number generator. + */ +void RNGClass::rekey() +{ + // Rekey the cipher for the next request by generating a new block. + // This is intended to make it difficult to wind the random number + // backwards if the state is captured later. The first 16 bytes of + // "block" remain set to "tagRNG". + ++(block[12]); + ChaCha::hashCore(stream, block, RNG_ROUNDS); + memcpy(block + 4, stream, 48); + + // Permute the high word of the counter using the system microsecond + // counter to introduce a little bit of non-stir randomness for each + // request. Note: If random data is requested on a predictable schedule + // then this may not help very much. It is still necessary to stir in + // high quality entropy data on a regular basis using stir(). + block[13] ^= micros(); +} + +/** + * \brief Mix in fresh data from the TRNG when rand() is called. + */ +void RNGClass::mixTRNG() +{ +#if defined(RNG_DUE_TRNG) + // Mix in 12 words from the Due's TRNG. + for (int posn = 0; posn < 12; ++posn) { + // According to the documentation the TRNG should produce a new + // 32-bit random value every 84 clock cycles. If it still hasn't + // produced a value after 200 iterations, then assume that the + // TRNG is not producing output and stop. + int counter; + for (counter = 0; counter < 200; ++counter) { + if ((REG_TRNG_ISR & TRNG_ISR_DATRDY) != 0) + break; + } + if (counter >= 200) + break; + block[posn + 4] ^= REG_TRNG_ODATA; + } +#elif defined(RNG_WORD_TRNG) + // Read 12 words from the TRNG and XOR them into the state. + for (uint8_t index = 4; index < 16; ++index) + block[index] ^= RNG_WORD_TRNG_GET(); +#elif defined(RNG_WATCHDOG) + // Read the pending 32 bit buffer from the WDT interrupt and mix it in. + cli(); + if (outBits >= 32) { + uint32_t value = hash; + hash = 0; + outBits = 0; + sei(); + + // Final steps of the Jenkin's one-at-a-time hash function. + // https://en.wikipedia.org/wiki/Jenkins_hash_function + value += leftShift3(value); + value ^= rightShift11(value); + value += leftShift15(value); + + // XOR the word with the state. + block[4] ^= value; + } else { + sei(); + } +#endif +} diff --git a/src/RNG.h b/src/RNG.h new file mode 100644 index 0000000..307c058 --- /dev/null +++ b/src/RNG.h @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_RNG_h +#define CRYPTO_RNG_h + +#include +#include + +class NoiseSource; + +class RNGClass +{ +public: + RNGClass(); + ~RNGClass(); + + void begin(const char *tag); + void addNoiseSource(NoiseSource &source); + + void setAutoSaveTime(uint16_t minutes); + + void rand(uint8_t *data, size_t len); + bool available(size_t len) const; + + void stir(const uint8_t *data, size_t len, unsigned int credit = 0); + + void save(); + + void loop(); + + void destroy(); + + static const int SEED_SIZE = 48; + +private: + uint32_t block[16]; + uint32_t stream[16]; + uint16_t credits : 13; + uint16_t firstSave : 1; + uint16_t initialized : 1; + uint16_t trngPending : 1; + unsigned long timer; + unsigned long timeout; + NoiseSource *noiseSources[4]; + uint8_t count; + uint8_t trngPosn; + + void rekey(); + void mixTRNG(); +}; + +extern RNGClass RNG; + +#endif diff --git a/src/RWeatherCrypto.h b/src/RWeatherCrypto.h new file mode 100644 index 0000000..e69de29 diff --git a/src/SHA256.cpp b/src/SHA256.cpp new file mode 100644 index 0000000..098b2e7 --- /dev/null +++ b/src/SHA256.cpp @@ -0,0 +1,258 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "SHA256.h" +#include "Crypto.h" +#include "utility/RotateUtil.h" +#include "utility/EndianUtil.h" +#include "utility/ProgMemUtil.h" +#include + +/** + * \class SHA256 SHA256.h + * \brief SHA-256 hash algorithm. + * + * Reference: http://en.wikipedia.org/wiki/SHA-2 + * + * \sa SHA512, SHA3_256, BLAKE2s + */ + +/** + * \brief Constructs a SHA-256 hash object. + */ +SHA256::SHA256() +{ + reset(); +} + +/** + * \brief Destroys this SHA-256 hash object after clearing + * sensitive information. + */ +SHA256::~SHA256() +{ + clean(state); +} + +size_t SHA256::hashSize() const +{ + return 32; +} + +size_t SHA256::blockSize() const +{ + return 64; +} + +void SHA256::reset() +{ + state.h[0] = 0x6a09e667; + state.h[1] = 0xbb67ae85; + state.h[2] = 0x3c6ef372; + state.h[3] = 0xa54ff53a, + state.h[4] = 0x510e527f; + state.h[5] = 0x9b05688c; + state.h[6] = 0x1f83d9ab; + state.h[7] = 0x5be0cd19; + state.chunkSize = 0; + state.length = 0; +} + +void SHA256::update(const void *data, size_t len) +{ + // Update the total length (in bits, not bytes). + state.length += ((uint64_t)len) << 3; + + // Break the input up into 512-bit chunks and process each in turn. + const uint8_t *d = (const uint8_t *)data; + while (len > 0) { + uint8_t size = 64 - state.chunkSize; + if (size > len) + size = len; + memcpy(((uint8_t *)state.w) + state.chunkSize, d, size); + state.chunkSize += size; + len -= size; + d += size; + if (state.chunkSize == 64) { + processChunk(); + state.chunkSize = 0; + } + } +} + +void SHA256::finalize(void *hash, size_t len) +{ + // Pad the last chunk. We may need two padding chunks if there + // isn't enough room in the first for the padding and length. + uint8_t *wbytes = (uint8_t *)state.w; + if (state.chunkSize <= (64 - 9)) { + wbytes[state.chunkSize] = 0x80; + memset(wbytes + state.chunkSize + 1, 0x00, 64 - 8 - (state.chunkSize + 1)); + state.w[14] = htobe32((uint32_t)(state.length >> 32)); + state.w[15] = htobe32((uint32_t)state.length); + processChunk(); + } else { + wbytes[state.chunkSize] = 0x80; + memset(wbytes + state.chunkSize + 1, 0x00, 64 - (state.chunkSize + 1)); + processChunk(); + memset(wbytes, 0x00, 64 - 8); + state.w[14] = htobe32((uint32_t)(state.length >> 32)); + state.w[15] = htobe32((uint32_t)state.length); + processChunk(); + } + + // Convert the result into big endian and return it. + for (uint8_t posn = 0; posn < 8; ++posn) + state.w[posn] = htobe32(state.h[posn]); + + // Copy the hash to the caller's return buffer. + if (len > 32) + len = 32; + memcpy(hash, state.w, len); +} + +void SHA256::clear() +{ + clean(state); + reset(); +} + +void SHA256::resetHMAC(const void *key, size_t keyLen) +{ + formatHMACKey(state.w, key, keyLen, 0x36); + state.length += 64 * 8; + processChunk(); +} + +void SHA256::finalizeHMAC(const void *key, size_t keyLen, void *hash, size_t hashLen) +{ + uint8_t temp[32]; + finalize(temp, sizeof(temp)); + formatHMACKey(state.w, key, keyLen, 0x5C); + state.length += 64 * 8; + processChunk(); + update(temp, sizeof(temp)); + finalize(hash, hashLen); + clean(temp); +} + +/** + * \brief Processes a single 512-bit chunk with the core SHA-256 algorithm. + * + * Reference: http://en.wikipedia.org/wiki/SHA-2 + */ +void SHA256::processChunk() +{ + // Round constants for SHA-256. + static uint32_t const k[64] PROGMEM = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 + }; + + // Convert the first 16 words from big endian to host byte order. + uint8_t index; + for (index = 0; index < 16; ++index) + state.w[index] = be32toh(state.w[index]); + + // Initialise working variables to the current hash value. + uint32_t a = state.h[0]; + uint32_t b = state.h[1]; + uint32_t c = state.h[2]; + uint32_t d = state.h[3]; + uint32_t e = state.h[4]; + uint32_t f = state.h[5]; + uint32_t g = state.h[6]; + uint32_t h = state.h[7]; + + // Perform the first 16 rounds of the compression function main loop. + uint32_t temp1, temp2; + for (index = 0; index < 16; ++index) { + temp1 = h + pgm_read_dword(k + index) + state.w[index] + + (rightRotate6(e) ^ rightRotate11(e) ^ rightRotate25(e)) + + ((e & f) ^ ((~e) & g)); + temp2 = (rightRotate2(a) ^ rightRotate13(a) ^ rightRotate22(a)) + + ((a & b) ^ (a & c) ^ (b & c)); + h = g; + g = f; + f = e; + e = d + temp1; + d = c; + c = b; + b = a; + a = temp1 + temp2; + } + + // Perform the 48 remaining rounds. We expand the first 16 words to + // 64 in-place in the "w" array. This saves 192 bytes of memory + // that would have otherwise need to be allocated to the "w" array. + for (; index < 64; ++index) { + // Expand the next word. + temp1 = state.w[(index - 15) & 0x0F]; + temp2 = state.w[(index - 2) & 0x0F]; + temp1 = state.w[index & 0x0F] = + state.w[(index - 16) & 0x0F] + state.w[(index - 7) & 0x0F] + + (rightRotate7(temp1) ^ rightRotate18(temp1) ^ (temp1 >> 3)) + + (rightRotate17(temp2) ^ rightRotate19(temp2) ^ (temp2 >> 10)); + + // Perform the round. + temp1 = h + pgm_read_dword(k + index) + temp1 + + (rightRotate6(e) ^ rightRotate11(e) ^ rightRotate25(e)) + + ((e & f) ^ ((~e) & g)); + temp2 = (rightRotate2(a) ^ rightRotate13(a) ^ rightRotate22(a)) + + ((a & b) ^ (a & c) ^ (b & c)); + h = g; + g = f; + f = e; + e = d + temp1; + d = c; + c = b; + b = a; + a = temp1 + temp2; + } + + // Add the compressed chunk to the current hash value. + state.h[0] += a; + state.h[1] += b; + state.h[2] += c; + state.h[3] += d; + state.h[4] += e; + state.h[5] += f; + state.h[6] += g; + state.h[7] += h; + + // Attempt to clean up the stack. + a = b = c = d = e = f = g = h = temp1 = temp2 = 0; +} diff --git a/src/SHA256.h b/src/SHA256.h new file mode 100644 index 0000000..246ea58 --- /dev/null +++ b/src/SHA256.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_SHA256_h +#define CRYPTO_SHA256_h + +#include "Hash.h" + +class SHA256 : public Hash +{ +public: + SHA256(); + virtual ~SHA256(); + + size_t hashSize() const; + size_t blockSize() const; + + void reset(); + void update(const void *data, size_t len); + void finalize(void *hash, size_t len); + + void clear(); + + void resetHMAC(const void *key, size_t keyLen); + void finalizeHMAC(const void *key, size_t keyLen, void *hash, size_t hashLen); + +private: + struct { + uint32_t h[8]; + uint32_t w[16]; + uint64_t length; + uint8_t chunkSize; + } state; + + void processChunk(); +}; + +#endif diff --git a/src/SHA3.cpp b/src/SHA3.cpp new file mode 100644 index 0000000..74353c4 --- /dev/null +++ b/src/SHA3.cpp @@ -0,0 +1,168 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "SHA3.h" +#include "Crypto.h" + +/** + * \class SHA3_256 SHA3.h + * \brief SHA3-256 hash algorithm. + * + * Reference: http://en.wikipedia.org/wiki/SHA-3 + * + * \sa SHA3_512 + */ + +/** + * \brief Constructs a new SHA3-256 hash object. + */ +SHA3_256::SHA3_256() +{ + core.setCapacity(512); +} + +/** + * \brief Destroys this hash object after clearing sensitive information. + */ +SHA3_256::~SHA3_256() +{ + // The destructor for the KeccakCore object will do most of the work. +} + +size_t SHA3_256::hashSize() const +{ + return 32; +} + +size_t SHA3_256::blockSize() const +{ + return core.blockSize(); +} + +void SHA3_256::reset() +{ + core.reset(); +} + +void SHA3_256::update(const void *data, size_t len) +{ + core.update(data, len); +} + +void SHA3_256::finalize(void *hash, size_t len) +{ + // Pad the final block and then extract the hash value. + core.pad(0x06); + core.extract(hash, len); +} + +void SHA3_256::clear() +{ + core.clear(); +} + +void SHA3_256::resetHMAC(const void *key, size_t keyLen) +{ + core.setHMACKey(key, keyLen, 0x36, 32); +} + +void SHA3_256::finalizeHMAC(const void *key, size_t keyLen, void *hash, size_t hashLen) +{ + uint8_t temp[32]; + finalize(temp, sizeof(temp)); + core.setHMACKey(key, keyLen, 0x5C, 32); + core.update(temp, sizeof(temp)); + finalize(hash, hashLen); + clean(temp); +} + +/** + * \class SHA3_512 SHA3.h + * \brief SHA3-512 hash algorithm. + * + * Reference: http://en.wikipedia.org/wiki/SHA-3 + * + * \sa SHA3_256 + */ + +/** + * \brief Constructs a new SHA3-512 hash object. + */ +SHA3_512::SHA3_512() +{ + core.setCapacity(1024); +} + +/** + * \brief Destroys this hash object after clearing sensitive information. + */ +SHA3_512::~SHA3_512() +{ + // The destructor for the KeccakCore object will do most of the work. +} + +size_t SHA3_512::hashSize() const +{ + return 64; +} + +size_t SHA3_512::blockSize() const +{ + return core.blockSize(); +} + +void SHA3_512::reset() +{ + core.reset(); +} + +void SHA3_512::update(const void *data, size_t len) +{ + core.update(data, len); +} + +void SHA3_512::finalize(void *hash, size_t len) +{ + // Pad the final block and then extract the hash value. + core.pad(0x06); + core.extract(hash, len); +} + +void SHA3_512::clear() +{ + core.clear(); +} + +void SHA3_512::resetHMAC(const void *key, size_t keyLen) +{ + core.setHMACKey(key, keyLen, 0x36, 64); +} + +void SHA3_512::finalizeHMAC(const void *key, size_t keyLen, void *hash, size_t hashLen) +{ + uint8_t temp[64]; + finalize(temp, sizeof(temp)); + core.setHMACKey(key, keyLen, 0x5C, 64); + core.update(temp, sizeof(temp)); + finalize(hash, hashLen); + clean(temp); +} diff --git a/src/SHA3.h b/src/SHA3.h new file mode 100644 index 0000000..76bb031 --- /dev/null +++ b/src/SHA3.h @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_SHA3_h +#define CRYPTO_SHA3_h + +#include "KeccakCore.h" +#include "Hash.h" + +class SHA3_256 : public Hash +{ +public: + SHA3_256(); + virtual ~SHA3_256(); + + size_t hashSize() const; + size_t blockSize() const; + + void reset(); + void update(const void *data, size_t len); + void finalize(void *hash, size_t len); + + void clear(); + + void resetHMAC(const void *key, size_t keyLen); + void finalizeHMAC(const void *key, size_t keyLen, void *hash, size_t hashLen); + +private: + KeccakCore core; +}; + +class SHA3_512 : public Hash +{ +public: + SHA3_512(); + virtual ~SHA3_512(); + + size_t hashSize() const; + size_t blockSize() const; + + void reset(); + void update(const void *data, size_t len); + void finalize(void *hash, size_t len); + + void clear(); + + void resetHMAC(const void *key, size_t keyLen); + void finalizeHMAC(const void *key, size_t keyLen, void *hash, size_t hashLen); + +private: + KeccakCore core; +}; + +#endif diff --git a/src/SHA512.cpp b/src/SHA512.cpp new file mode 100644 index 0000000..71cf019 --- /dev/null +++ b/src/SHA512.cpp @@ -0,0 +1,274 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "SHA512.h" +#include "Crypto.h" +#include "utility/RotateUtil.h" +#include "utility/EndianUtil.h" +#include "utility/ProgMemUtil.h" +#include + +/** + * \class SHA512 SHA512.h + * \brief SHA-512 hash algorithm. + * + * Reference: http://en.wikipedia.org/wiki/SHA-2 + * + * \sa SHA256, SHA3_512, BLAKE2b + */ + +/** + * \brief Constructs a SHA-512 hash object. + */ +SHA512::SHA512() +{ + reset(); +} + +/** + * \brief Destroys this SHA-512 hash object after clearing + * sensitive information. + */ +SHA512::~SHA512() +{ + clean(state); +} + +size_t SHA512::hashSize() const +{ + return 64; +} + +size_t SHA512::blockSize() const +{ + return 128; +} + +void SHA512::reset() +{ + static uint64_t const hashStart[8] PROGMEM = { + 0x6A09E667F3BCC908ULL, 0xBB67AE8584CAA73BULL, 0x3C6EF372FE94F82BULL, + 0xA54FF53A5F1D36F1ULL, 0x510E527FADE682D1ULL, 0x9B05688C2B3E6C1FULL, + 0x1F83D9ABFB41BD6BULL, 0x5BE0CD19137E2179ULL + }; + memcpy_P(state.h, hashStart, sizeof(hashStart)); + state.chunkSize = 0; + state.lengthLow = 0; + state.lengthHigh = 0; +} + +void SHA512::update(const void *data, size_t len) +{ + // Update the total length in bits, not bytes. + uint64_t temp = state.lengthLow; + state.lengthLow += (((uint64_t)len) << 3); + state.lengthHigh += (((uint64_t)len) >> 61); + if (state.lengthLow < temp) + ++state.lengthHigh; + + // Break the input up into 1024-bit chunks and process each in turn. + const uint8_t *d = (const uint8_t *)data; + while (len > 0) { + uint8_t size = 128 - state.chunkSize; + if (size > len) + size = len; + memcpy(((uint8_t *)state.w) + state.chunkSize, d, size); + state.chunkSize += size; + len -= size; + d += size; + if (state.chunkSize == 128) { + processChunk(); + state.chunkSize = 0; + } + } +} + +void SHA512::finalize(void *hash, size_t len) +{ + // Pad the last chunk. We may need two padding chunks if there + // isn't enough room in the first for the padding and length. + uint8_t *wbytes = (uint8_t *)state.w; + if (state.chunkSize <= (128 - 17)) { + wbytes[state.chunkSize] = 0x80; + memset(wbytes + state.chunkSize + 1, 0x00, 128 - 16 - (state.chunkSize + 1)); + state.w[14] = htobe64(state.lengthHigh); + state.w[15] = htobe64(state.lengthLow); + processChunk(); + } else { + wbytes[state.chunkSize] = 0x80; + memset(wbytes + state.chunkSize + 1, 0x00, 128 - (state.chunkSize + 1)); + processChunk(); + memset(wbytes, 0x00, 128 - 16); + state.w[14] = htobe64(state.lengthHigh); + state.w[15] = htobe64(state.lengthLow); + processChunk(); + } + + // Convert the result into big endian and return it. + for (uint8_t posn = 0; posn < 8; ++posn) + state.w[posn] = htobe64(state.h[posn]); + + // Copy the hash to the caller's return buffer. + if (len > 64) + len = 64; + memcpy(hash, state.w, len); +} + +void SHA512::clear() +{ + clean(state); + reset(); +} + +void SHA512::resetHMAC(const void *key, size_t keyLen) +{ + formatHMACKey(state.w, key, keyLen, 0x36); + state.lengthLow += 128 * 8; + processChunk(); +} + +void SHA512::finalizeHMAC(const void *key, size_t keyLen, void *hash, size_t hashLen) +{ + uint8_t temp[64]; + finalize(temp, sizeof(temp)); + formatHMACKey(state.w, key, keyLen, 0x5C); + state.lengthLow += 128 * 8; + processChunk(); + update(temp, sizeof(temp)); + finalize(hash, hashLen); + clean(temp); +} + +/** + * \brief Processes a single 1024-bit chunk with the core SHA-512 algorithm. + * + * Reference: http://en.wikipedia.org/wiki/SHA-2 + */ +void SHA512::processChunk() +{ + // Round constants for SHA-512. + static uint64_t const k[80] PROGMEM = { + 0x428A2F98D728AE22ULL, 0x7137449123EF65CDULL, 0xB5C0FBCFEC4D3B2FULL, + 0xE9B5DBA58189DBBCULL, 0x3956C25BF348B538ULL, 0x59F111F1B605D019ULL, + 0x923F82A4AF194F9BULL, 0xAB1C5ED5DA6D8118ULL, 0xD807AA98A3030242ULL, + 0x12835B0145706FBEULL, 0x243185BE4EE4B28CULL, 0x550C7DC3D5FFB4E2ULL, + 0x72BE5D74F27B896FULL, 0x80DEB1FE3B1696B1ULL, 0x9BDC06A725C71235ULL, + 0xC19BF174CF692694ULL, 0xE49B69C19EF14AD2ULL, 0xEFBE4786384F25E3ULL, + 0x0FC19DC68B8CD5B5ULL, 0x240CA1CC77AC9C65ULL, 0x2DE92C6F592B0275ULL, + 0x4A7484AA6EA6E483ULL, 0x5CB0A9DCBD41FBD4ULL, 0x76F988DA831153B5ULL, + 0x983E5152EE66DFABULL, 0xA831C66D2DB43210ULL, 0xB00327C898FB213FULL, + 0xBF597FC7BEEF0EE4ULL, 0xC6E00BF33DA88FC2ULL, 0xD5A79147930AA725ULL, + 0x06CA6351E003826FULL, 0x142929670A0E6E70ULL, 0x27B70A8546D22FFCULL, + 0x2E1B21385C26C926ULL, 0x4D2C6DFC5AC42AEDULL, 0x53380D139D95B3DFULL, + 0x650A73548BAF63DEULL, 0x766A0ABB3C77B2A8ULL, 0x81C2C92E47EDAEE6ULL, + 0x92722C851482353BULL, 0xA2BFE8A14CF10364ULL, 0xA81A664BBC423001ULL, + 0xC24B8B70D0F89791ULL, 0xC76C51A30654BE30ULL, 0xD192E819D6EF5218ULL, + 0xD69906245565A910ULL, 0xF40E35855771202AULL, 0x106AA07032BBD1B8ULL, + 0x19A4C116B8D2D0C8ULL, 0x1E376C085141AB53ULL, 0x2748774CDF8EEB99ULL, + 0x34B0BCB5E19B48A8ULL, 0x391C0CB3C5C95A63ULL, 0x4ED8AA4AE3418ACBULL, + 0x5B9CCA4F7763E373ULL, 0x682E6FF3D6B2B8A3ULL, 0x748F82EE5DEFB2FCULL, + 0x78A5636F43172F60ULL, 0x84C87814A1F0AB72ULL, 0x8CC702081A6439ECULL, + 0x90BEFFFA23631E28ULL, 0xA4506CEBDE82BDE9ULL, 0xBEF9A3F7B2C67915ULL, + 0xC67178F2E372532BULL, 0xCA273ECEEA26619CULL, 0xD186B8C721C0C207ULL, + 0xEADA7DD6CDE0EB1EULL, 0xF57D4F7FEE6ED178ULL, 0x06F067AA72176FBAULL, + 0x0A637DC5A2C898A6ULL, 0x113F9804BEF90DAEULL, 0x1B710B35131C471BULL, + 0x28DB77F523047D84ULL, 0x32CAAB7B40C72493ULL, 0x3C9EBE0A15C9BEBCULL, + 0x431D67C49C100D4CULL, 0x4CC5D4BECB3E42B6ULL, 0x597F299CFC657E2AULL, + 0x5FCB6FAB3AD6FAECULL, 0x6C44198C4A475817ULL + }; + + // Convert the first 16 words from big endian to host byte order. + uint8_t index; + for (index = 0; index < 16; ++index) + state.w[index] = be64toh(state.w[index]); + + // Initialise working variables to the current hash value. + uint64_t a = state.h[0]; + uint64_t b = state.h[1]; + uint64_t c = state.h[2]; + uint64_t d = state.h[3]; + uint64_t e = state.h[4]; + uint64_t f = state.h[5]; + uint64_t g = state.h[6]; + uint64_t h = state.h[7]; + + // Perform the first 16 rounds of the compression function main loop. + uint64_t temp1, temp2; + for (index = 0; index < 16; ++index) { + temp1 = h + pgm_read_qword(k + index) + state.w[index] + + (rightRotate14_64(e) ^ rightRotate18_64(e) ^ + rightRotate41_64(e)) + ((e & f) ^ ((~e) & g)); + temp2 = (rightRotate28_64(a) ^ rightRotate34_64(a) ^ + rightRotate39_64(a)) + ((a & b) ^ (a & c) ^ (b & c)); + h = g; + g = f; + f = e; + e = d + temp1; + d = c; + c = b; + b = a; + a = temp1 + temp2; + } + + // Perform the 64 remaining rounds. We expand the first 16 words to + // 80 in-place in the "w" array. This saves 512 bytes of memory + // that would have otherwise need to be allocated to the "w" array. + for (; index < 80; ++index) { + // Expand the next word. + temp1 = state.w[(index - 15) & 0x0F]; + temp2 = state.w[(index - 2) & 0x0F]; + temp1 = state.w[index & 0x0F] = + state.w[(index - 16) & 0x0F] + state.w[(index - 7) & 0x0F] + + (rightRotate1_64(temp1) ^ rightRotate8_64(temp1) ^ + (temp1 >> 7)) + + (rightRotate19_64(temp2) ^ rightRotate61_64(temp2) ^ + (temp2 >> 6)); + + // Perform the round. + temp1 = h + pgm_read_qword(k + index) + temp1 + + (rightRotate14_64(e) ^ rightRotate18_64(e) ^ + rightRotate41_64(e)) + ((e & f) ^ ((~e) & g)); + temp2 = (rightRotate28_64(a) ^ rightRotate34_64(a) ^ + rightRotate39_64(a)) + ((a & b) ^ (a & c) ^ (b & c)); + h = g; + g = f; + f = e; + e = d + temp1; + d = c; + c = b; + b = a; + a = temp1 + temp2; + } + + // Add the compressed chunk to the current hash value. + state.h[0] += a; + state.h[1] += b; + state.h[2] += c; + state.h[3] += d; + state.h[4] += e; + state.h[5] += f; + state.h[6] += g; + state.h[7] += h; + + // Attempt to clean up the stack. + a = b = c = d = e = f = g = h = temp1 = temp2 = 0; +} diff --git a/src/SHA512.h b/src/SHA512.h new file mode 100644 index 0000000..d1a069f --- /dev/null +++ b/src/SHA512.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_SHA512_h +#define CRYPTO_SHA512_h + +#include "Hash.h" + +class Ed25519; + +class SHA512 : public Hash +{ +public: + SHA512(); + virtual ~SHA512(); + + size_t hashSize() const; + size_t blockSize() const; + + void reset(); + void update(const void *data, size_t len); + void finalize(void *hash, size_t len); + + void clear(); + + void resetHMAC(const void *key, size_t keyLen); + void finalizeHMAC(const void *key, size_t keyLen, void *hash, size_t hashLen); + +private: + struct { + uint64_t h[8]; + uint64_t w[16]; + uint64_t lengthLow; + uint64_t lengthHigh; + uint8_t chunkSize; + } state; + + void processChunk(); + + friend class Ed25519; +}; + +#endif diff --git a/src/SHAKE.cpp b/src/SHAKE.cpp new file mode 100644 index 0000000..b813675 --- /dev/null +++ b/src/SHAKE.cpp @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "SHAKE.h" + +/** + * \class SHAKE SHAKE.h + * \brief Abstract base class for the SHAKE Extendable-Output Functions (XOFs). + * + * Reference: http://en.wikipedia.org/wiki/SHA-3 + * + * \sa SHAKE256, SHAKE128, SHA3_256 + */ + +/** + * \brief Constructs a SHAKE object. + * + * \param capacity The capacity of the Keccak sponge function in bits which + * should be a multiple of 64 and between 64 and 1536. + */ +SHAKE::SHAKE(size_t capacity) + : finalized(false) +{ + core.setCapacity(capacity); +} + +/** + * \brief Destroys this SHAKE object after clearing all sensitive information. + */ +SHAKE::~SHAKE() +{ +} + +size_t SHAKE::blockSize() const +{ + return core.blockSize(); +} + +void SHAKE::reset() +{ + core.reset(); + finalized = false; +} + +void SHAKE::update(const void *data, size_t len) +{ + if (finalized) + reset(); + core.update(data, len); +} + +void SHAKE::extend(uint8_t *data, size_t len) +{ + if (!finalized) { + core.pad(0x1F); + finalized = true; + } + core.extract(data, len); +} + +void SHAKE::encrypt(uint8_t *output, const uint8_t *input, size_t len) +{ + if (!finalized) { + core.pad(0x1F); + finalized = true; + } + core.encrypt(output, input, len); +} + +void SHAKE::clear() +{ + core.clear(); + finalized = false; +} + +/** + * \class SHAKE128 SHAKE.h + * \brief SHAKE Extendable-Output Function (XOF) with 128-bit security. + * + * Reference: http://en.wikipedia.org/wiki/SHA-3 + * + * \sa SHAKE256, SHAKE, SHA3_256 + */ + +/** + * \fn SHAKE128::SHAKE128() + * \brief Constructs a SHAKE object with 128-bit security. + */ + +/** + * \brief Destroys this SHAKE128 object after clearing all sensitive + * information. + */ +SHAKE128::~SHAKE128() +{ +} + +/** + * \class SHAKE256 SHAKE.h + * \brief SHAKE Extendable-Output Function (XOF) with 256-bit security. + * + * Reference: http://en.wikipedia.org/wiki/SHA-3 + * + * \sa SHAKE128, SHAKE, SHA3_256 + */ + +/** + * \fn SHAKE256::SHAKE256() + * \brief Constructs a SHAKE object with 256-bit security. + */ + +/** + * \brief Destroys this SHAKE256 object after clearing all sensitive + * information. + */ +SHAKE256::~SHAKE256() +{ +} diff --git a/src/SHAKE.h b/src/SHAKE.h new file mode 100644 index 0000000..83826e6 --- /dev/null +++ b/src/SHAKE.h @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_SHAKE_h +#define CRYPTO_SHAKE_h + +#include "XOF.h" +#include "KeccakCore.h" + +class SHAKE : public XOF +{ +public: + virtual ~SHAKE(); + + size_t blockSize() const; + + void reset(); + void update(const void *data, size_t len); + + void extend(uint8_t *data, size_t len); + void encrypt(uint8_t *output, const uint8_t *input, size_t len); + + void clear(); + +protected: + SHAKE(size_t capacity); + +private: + KeccakCore core; + bool finalized; +}; + +class SHAKE128 : public SHAKE +{ +public: + SHAKE128() : SHAKE(256) {} + virtual ~SHAKE128(); +}; + +class SHAKE256 : public SHAKE +{ +public: + SHAKE256() : SHAKE(512) {} + virtual ~SHAKE256(); +}; + +#endif diff --git a/src/XOF.cpp b/src/XOF.cpp new file mode 100644 index 0000000..2462a81 --- /dev/null +++ b/src/XOF.cpp @@ -0,0 +1,216 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "XOF.h" + +/** + * \class XOF XOF.h + * \brief Abstract base class for Extendable-Output Functions (XOFs). + * + * Extendable-Output Functions, or XOFs, are a new class of cryptographic + * primitive that was defined by NIST during the SHA-3 standardization + * process. Essentially an XOF is a hash algorithm that has an + * arbitrary-length output instead of a fixed-length digest. + * + * XOFs can be used for a variety of cryptographic tasks: + * + * \li Mask generation functions for RSA OAEP style padding. + * \li Key derivation functions for expanding key seed material into + * arbitrary amounts of keying material for a secure session. + * \li Stream ciphers based on a key and IV. + * + * To use an XOF, it is first reset() and then data is added via multiple + * calls to update(): + * + * \code + * SHAKE256 xof; + * xof.reset(); + * xof.update(data1, sizeof(data1)); + * xof.update(data2, sizeof(data2)); + * ... + * \endcode + * + * Once all input data has been added, the XOF switches into extend mode + * to generate the arbitrary-length output data: + * + * \code + * xof.extend(output1, sizeof(output1)); + * xof.extend(output2, sizeof(output2)); + * ... + * \endcode + * + * Mask generation and key derivation is achieved as follows, where the + * key is unique for each invocation: + * + * \code + * SHAKE256 xof; + * xof.reset(); + * xof.update(key, sizeof(key)); + * xof.extend(output, sizeof(output)); + * \endcode + * + * Stream ciphers can be constructed as follows, using the special + * encrypt() function that XOR's the output of extend() with the + * input plaintext to generate the output ciphertext (or alternatively + * XOR's the output of extend() with the ciphertext to recover the + * plaintext): + * + * \code + * SHAKE256 xof; + * xof.reset(); + * xof.update(key, sizeof(key)); + * xof.update(iv, sizeof(iv)); + * xof.encrypt(output1, input1, sizeof(input1)); + * xof.encrypt(output2, input2, sizeof(input2)); + * ... + * \endcode + * + * If the key is reused, then the IV must be different for each session + * or the encryption scheme can be easily broken. It is better to + * generate a new key and IV combination for every session. + * + * It may also be a good idea to include some tag information with the input + * data to distinguish different uses of the XOF. For example: + * + * \code + * SHAKE256 xof; + * xof.reset(); + * xof.update(key, sizeof(key)); + * xof.update(iv, sizeof(iv)); + * xof.update("MyCrypt", 7); + * xof.encrypt(output, input, sizeof(input)); + * \endcode + * + * If the same key and IV was used with a different package, then it would + * not generate the same output as "MyCrypt". + * + * NIST warns that XOFs should not be used in place of hash functions. + * This is because of related outputs: if the same input is provided to + * an XOF with different output lengths, then the shorter output will + * be a prefix of the larger. This breaks the expected collision-resistance + * of regular hash functions. There is typically no need to use an XOF + * for hashing because NIST has already defined SHA3_256 and SHA3_512 + * for that purpose. + * + * Reference: http://en.wikipedia.org/wiki/SHA-3 + * + * \sa SHAKE256, SHAKE128, SHA3_256 + */ + +/** + * \brief Constructs a new XOF object. + */ +XOF::XOF() +{ +} + +/** + * \brief Destroys this XOF object. + * + * \note Subclasses are responsible for clearing any sensitive data + * that remains in the XOF object when it is destroyed. + * + * \sa clear() + */ +XOF::~XOF() +{ +} + +/** + * \fn size_t XOF::blockSize() const + * \brief Size of the internal block used by the XOF algorithm, in bytes. + * + * \sa update() + */ + +/** + * \fn void XOF::reset() + * \brief Resets the XOF ready for a new session. + * + * \sa update(), extend(), encrypt() + */ + +/** + * \fn void XOF::update(const void *data, size_t len) + * \brief Updates the XOF with more data. + * + * \param data Data to be hashed. + * \param len Number of bytes of data to be added to the XOF. + * + * If extend() or encrypt() has already been called, then the behavior of + * update() will be undefined. Call reset() first to start a new session. + * + * \sa reset(), extend(), encrypt() + */ + +/** + * \fn void XOF::extend(uint8_t *data, size_t len) + * \brief Generates extendable output from this XOF. + * + * \param data The data buffer to be filled. + * \param len The number of bytes to write to \a data. + * + * \sa reset(), update(), encrypt() + */ + +/** + * \fn void XOF::encrypt(uint8_t *output, const uint8_t *input, size_t len) + * \brief Encrypts an input buffer with extendable output from this XOF. + * + * \param output The output buffer to write to, which may be the same + * buffer as \a input. The \a output buffer must have at least as many + * bytes as the \a input buffer. + * \param input The input buffer to read from. + * \param len The number of bytes to encrypt. + * + * This function is a convenience that generates data with extend() and + * then XOR's it with the contents of \a input to generate the \a output. + * This function can also be used to decrypt. + * + * The encrypt() function can be called multiple times with different + * regions of the plaintext data. + * + * \sa reset(), update(), extend(), decrypt() + */ + +/** + * \fn void XOF::decrypt(uint8_t *output, const uint8_t *input, size_t len) + * \brief Decrypts an input buffer with extendable output from this XOF. + * + * \param output The output buffer to write to, which may be the same + * buffer as \a input. The \a output buffer must have at least as many + * bytes as the \a input buffer. + * \param input The input buffer to read from. + * \param len The number of bytes to encrypt. + * + * This is a convenience function that merely calls encrypt(). + * + * \sa reset(), update(), extend(), encrypt() + */ + +/** + * \fn void XOF::clear() + * \brief Clears the hash state, removing all sensitive data, and then + * resets the XOF ready for a new session. + * + * \sa reset() + */ diff --git a/src/XOF.h b/src/XOF.h new file mode 100644 index 0000000..fc72856 --- /dev/null +++ b/src/XOF.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_XOF_h +#define CRYPTO_XOF_h + +#include +#include + +class XOF +{ +public: + XOF(); + virtual ~XOF(); + + virtual size_t blockSize() const = 0; + + virtual void reset() = 0; + virtual void update(const void *data, size_t len) = 0; + + virtual void extend(uint8_t *data, size_t len) = 0; + virtual void encrypt(uint8_t *output, const uint8_t *input, size_t len) = 0; + + inline void decrypt(uint8_t *output, const uint8_t *input, size_t len) + { + encrypt(output, input, len); + } + + virtual void clear() = 0; +}; + +#endif diff --git a/src/XTS.cpp b/src/XTS.cpp new file mode 100644 index 0000000..87c4044 --- /dev/null +++ b/src/XTS.cpp @@ -0,0 +1,437 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "XTS.h" +#include "Crypto.h" +#include "GF128.h" +#include + +/** + * \class XTSCommon XTS.h + * \brief Concrete base class to assist with implementing XTS mode for + * 128-bit block ciphers. + * + * References: IEEE Std. 1619-2007, NIST SP 800-38E, a href="http://web.cs.ucdavis.edu/~rogaway/papers/offsets.pdf">XEX. + * + * \sa XTS, XTSSingleKey + */ + +/** + * \brief Constructs an XTS object with a default sector size of 512 bytes. + */ +XTSCommon::XTSCommon() + : sectSize(512) +{ +} + +/** + * \brief Clears all sensitive information and destroys this object. + */ +XTSCommon::~XTSCommon() +{ + clean(twk); +} + +/** + * \brief Gets the size of the key for XTS mode. + * + * The key size for XTS mode is twice the size of the underlying + * block cipher key size. + * + * \sa setKey(), tweakSize() + */ +size_t XTSCommon::keySize() const +{ + return blockCipher1->keySize() * 2; +} + +/** + * \brief Gets the maximum supported size for the tweak. + * + * This function returns 16, which indicates that any tweak up to 16 bytes + * in size can be specified via setTweak(). + */ +size_t XTSCommon::tweakSize() const +{ + return 16; +} + +/** + * \fn size_t XTSCommon::sectorSize() const + * \brief Gets the size of sectors encrypted or decrypted by this class. + * + * The default value is 512 bytes. + * + * \sa setSectorSize() + */ + +/** + * \brief Sets the size of sectors encrypted or decrypted by this class. + * + * \param size The sector size in bytes, which must be greater than or + * equal to 16. + * + * \return Returns false if \a size is less than 16. + * + * \sa sectorSize(), encryptSector() + */ +bool XTSCommon::setSectorSize(size_t size) +{ + if (size < 16) + return false; + sectSize = size; + return true; +} + +/** + * \brief Sets the key to use for XTS mode. + * + * \param key Points to the key. + * \param len The size of the key in bytes which must be twice the + * size of the underlying block cipher's key size. + * + * \return Returns true if the key was set or false if \a len was incorrect. + * + * This function should be followed by a call to setTweak() to specify + * the sector-specific tweak. + * + * \sa keySize(), setTweak(), encryptSector() + */ +bool XTSCommon::setKey(const uint8_t *key, size_t len) +{ + if (!blockCipher1->setKey(key, len / 2)) + return false; + return blockCipher2->setKey(key + len / 2, len - (len / 2)); +} + +/** + * \brief Sets the tweak value for the current sector to encrypt or decrypt. + * + * \param tweak Points to the tweak. + * \param len The length of the tweak which must be less than or equal to 16. + * + * \return Returns true if the tweak was set or false if \a len was incorrect. + * + * If \a len is less than 16, then the \a tweak will be zero-padded to + * 16 bytes. + * + * The \a tweak is encrypted with the second half of the XTS key to generate + * the actual tweak value for the sector. + * + * \sa tweakSize(), setKey(), encryptSector() + */ +bool XTSCommon::setTweak(const uint8_t *tweak, size_t len) +{ + if (len > 16) + return false; + memcpy(twk, tweak, len); + memset(((uint8_t *)twk) + len, 0, 16 - len); + blockCipher2->encryptBlock((uint8_t *)twk, (uint8_t *)twk); + return true; +} + +#define xorTweak(output, input, tweak) \ + do { \ + for (uint8_t i = 0; i < 16; ++i) \ + (output)[i] = (input)[i] ^ ((const uint8_t *)(tweak))[i]; \ + } while (0) + +/** + * \brief Encrypts an entire sector of data. + * + * \param output The output buffer to write the ciphertext to, which can + * be the same as \a input. + * \param input The input buffer to read the plaintext from. + * + * The \a input and \a output buffers must be at least sectorSize() + * bytes in length. + * + * \sa decryptSector(), setKey(), setTweak() + */ +void XTSCommon::encryptSector(uint8_t *output, const uint8_t *input) +{ + size_t sectLast = sectSize & ~15; + size_t posn = 0; + uint32_t t[4]; + memcpy(t, twk, sizeof(t)); + while (posn < sectLast) { + // Process all complete 16-byte blocks. + xorTweak(output, input, t); + blockCipher1->encryptBlock(output, output); + xorTweak(output, output, t); + GF128::dblXTS(t); + input += 16; + output += 16; + posn += 16; + } + if (posn < sectSize) { + // Perform ciphertext stealing on the final partial block. + uint8_t leftOver = sectSize - posn; + output -= 16; + while (leftOver > 0) { + // Swap the left-over bytes in the last two blocks. + --leftOver; + uint8_t temp = input[leftOver]; + output[leftOver + 16] = output[leftOver]; + output[leftOver] = temp; + } + xorTweak(output, output, t); + blockCipher1->encryptBlock(output, output); + xorTweak(output, output, t); + } +} + +/** + * \brief Decrypts an entire sector of data. + * + * \param output The output buffer to write the plaintext to, which can + * be the same as \a input. + * \param input The input buffer to read the ciphertext from. + * + * The \a input and \a output buffers must be at least sectorSize() + * bytes in length. + * + * \sa encryptSector(), setKey(), setTweak() + */ +void XTSCommon::decryptSector(uint8_t *output, const uint8_t *input) +{ + size_t sectLast = sectSize & ~15; + size_t posn = 0; + uint32_t t[4]; + memcpy(t, twk, sizeof(t)); + if (sectLast != sectSize) + sectLast -= 16; + while (posn < sectLast) { + // Process all complete 16-byte blocks. + xorTweak(output, input, t); + blockCipher1->decryptBlock(output, output); + xorTweak(output, output, t); + GF128::dblXTS(t); + input += 16; + output += 16; + posn += 16; + } + if (posn < sectSize) { + // Perform ciphertext stealing on the final two blocks. + uint8_t leftOver = sectSize - 16 - posn; + uint32_t u[4]; + + // Decrypt the second-last block of ciphertext to recover + // the last partial block of plaintext. We need to use + // dblXTS(t) as the tweak for this block. Save the current + // tweak in "u" for use later. + memcpy(u, t, sizeof(t)); + GF128::dblXTS(t); + xorTweak(output, input, t); + blockCipher1->decryptBlock(output, output); + xorTweak(output, output, t); + + // Swap the left-over bytes in the last two blocks. + while (leftOver > 0) { + --leftOver; + uint8_t temp = input[leftOver + 16]; + output[leftOver + 16] = output[leftOver]; + output[leftOver] = temp; + } + + // Decrypt the second-last block using the second-last tweak. + xorTweak(output, output, u); + blockCipher1->decryptBlock(output, output); + xorTweak(output, output, u); + } +} + +/** + * \brief Clears all security-sensitive state from this XTS object. + */ +void XTSCommon::clear() +{ + clean(twk); + blockCipher1->clear(); + blockCipher2->clear(); +} + +/** + * \fn void XTSCommon::setBlockCiphers(BlockCipher *cipher1, BlockCipher *cipher2) + * \brief Sets the two block ciphers to use for XTS mode. + * + * \param cipher1 Points to the first block cipher object, which must be + * capable of both encryption and decryption. + * \param cipher2 Points to the second block cipher object, which must be + * capable of both encryption but does not need to be capable of decryption. + * + * Both block ciphers must have a 128-bit block size. + */ + +/** + * \class XTSSingleKeyCommon XTS.h + * \brief Concrete base class to assist with implementing single-key XTS + * mode for 128-bit block ciphers. + * + * References: IEEE Std. 1619-2007, NIST SP 800-38E, a href="http://web.cs.ucdavis.edu/~rogaway/papers/offsets.pdf">XEX. + * + * \sa XTSSingleKey, XTSCommon + */ + +/** + * \fn XTSSingleKeyCommon::XTSSingleKeyCommon() + * \brief Constructs an XTS object with a default sector size of 512 bytes. + */ + +/** + * \brief Clears all sensitive information and destroys this object. + */ +XTSSingleKeyCommon::~XTSSingleKeyCommon() +{ +} + +/** + * \brief Gets the size of the key for single-pkey XTS mode. + * + * The key size for single-key XTS mode is the same as the key size + * for the underlying block cipher. + * + * \sa setKey(), tweakSize() + */ +size_t XTSSingleKeyCommon::keySize() const +{ + return blockCipher1->keySize(); +} + +/** + * \brief Sets the key to use for single-keyh XTS mode. + * + * \param key Points to the key. + * \param len The size of the key in bytes which must be same as the + * size of the underlying block cipher. + * + * \return Returns true if the key was set or false if \a len was incorrect. + * + * This function should be followed by a call to setTweak() to specify + * the sector-specific tweak. + * + * \sa keySize(), setTweak(), encryptSector() + */ +bool XTSSingleKeyCommon::setKey(const uint8_t *key, size_t len) +{ + return blockCipher1->setKey(key, len); +} + +/** + * \class XTS XTS.h + * \brief Implementation of the XTS mode for 128-bit block ciphers. + * + * XTS mode implements the XEX tweakable block cipher mode with ciphertext + * stealing for data that isn't a multiple of the 128-bit block size. + * + * XTS was designed for use in disk encryption where a large number of + * equal-sized "sectors" need to be encrypted in a way that information + * from one sector cannot be used to decrypt the other sectors. The mode + * combines the key with a sector-specific "tweak" which is usually + * based on the sector number. + * + * Some Arduino systems have SD cards, but typically embedded systems + * do not have disk drives. However, XTS can still be useful on + * Arduino systems with lots of EEPROM or flash memory. If the application + * needs to store critical security parameters like private keys then + * XTS can be used to encrypt non-volatile memory to protect the parameters. + * + * The following example encrypts a sector using XTS mode: + * + * \code + * XTS xts; + * xts.setSectorSize(520); + * xts.setKey(key, 64); // Twice the AES256 key size. + * xts.setTweak(sectorNumber, sizeof(sectorNumber)); + * xts.encryptSector(output, input); + * \endcode + * + * XTS keys are twice the size of the underlying block cipher + * (AES256 in the above example). The XTS key is divided into two halves. + * The first half is used to encrypt the plaintext and the second half + * is used to encrypt the sector-specific tweak. The same key can be + * used for both, in which case XTS is equivalent to the original + * XEX design upon which XTS was based. The companion XTSSingleKey class + * can be used for single-key scenarios. + * + * The template parameter must be a concrete subclass of BlockCipher + * indicating the specific block cipher to use. The example above uses + * AES256 as the underlying cipher. + * + * It is also possible to specify two different block ciphers, as long as + * they have the same key size. Because the second half of the key is only + * used to encrypt tweaks and never decrypt, a reduced block cipher + * implementation like SpeckTiny that only supports encryption can be + * used for the second block cipher: + * + * \code + * XTS xts; + * \endcode + * + * This might save some memory that would otherwise be needed for the + * decryption key schedule of the second block cipher. XTSSingleKey provides + * another method to save memory. + * + * References: IEEE Std. 1619-2007, NIST SP 800-38E, a href="http://web.cs.ucdavis.edu/~rogaway/papers/offsets.pdf">XEX. + * + * \sa XTSSingleKey, XTSCommon + */ + +/** + * \fn XTS::XTS() + * \brief Constructs an object for encrypting sectors in XTS mode. + * + * This constructor should be followed by a call to setSectorSize(). + * The default sector size is 512 bytes. + */ + +/** + * \fn XTS::~XTS() + * \brief Clears all sensitive information and destroys this object. + */ + +/** + * \class XTSSingleKey XTS.h + * \brief Implementation of the single-key XTS mode for 128-bit block ciphers. + * + * XTS mode normally uses two keys to encrypt plaintext and the + * sector-specific tweak values. This class uses the same key for + * both purposes, which can help save memory. + * + * References: IEEE Std. 1619-2007, NIST SP 800-38E, a href="http://web.cs.ucdavis.edu/~rogaway/papers/offsets.pdf">XEX. + * + * \sa XTS, XTSSingleKeyCommon + */ + +/** + * \fn XTSSingleKey::XTSSingleKey() + * \brief Constructs an object for encrypting sectors in XTS mode + * with a single key instead of two split keys. + * + * This constructor should be followed by a call to setSectorSize(). + * The default sector size is 512 bytes. + */ + +/** + * \fn XTSSingleKey::~XTSSingleKey() + * \brief Clears all sensitive information and destroys this object. + */ diff --git a/src/XTS.h b/src/XTS.h new file mode 100644 index 0000000..8b745ad --- /dev/null +++ b/src/XTS.h @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_XTS_h +#define CRYPTO_XTS_h + +#include "BlockCipher.h" + +class XTSSingleKeyCommon; + +class XTSCommon +{ +public: + virtual ~XTSCommon(); + + virtual size_t keySize() const; + size_t tweakSize() const; + + size_t sectorSize() const { return sectSize; } + bool setSectorSize(size_t size); + + virtual bool setKey(const uint8_t *key, size_t len); + bool setTweak(const uint8_t *tweak, size_t len); + + void encryptSector(uint8_t *output, const uint8_t *input); + void decryptSector(uint8_t *output, const uint8_t *input); + + void clear(); + +protected: + XTSCommon(); + void setBlockCiphers(BlockCipher *cipher1, BlockCipher *cipher2) + { + blockCipher1 = cipher1; + blockCipher2 = cipher2; + } + +private: + BlockCipher *blockCipher1; + BlockCipher *blockCipher2; + uint32_t twk[4]; + size_t sectSize; + + friend class XTSSingleKeyCommon; +}; + +class XTSSingleKeyCommon : public XTSCommon +{ +public: + virtual ~XTSSingleKeyCommon(); + + size_t keySize() const; + bool setKey(const uint8_t *key, size_t len); + +protected: + XTSSingleKeyCommon() : XTSCommon() {} +}; + +template +class XTS : public XTSCommon +{ +public: + XTS() { setBlockCiphers(&cipher1, &cipher2); } + ~XTS() {} + +private: + T1 cipher1; + T2 cipher2; +}; + +template +class XTSSingleKey : public XTSSingleKeyCommon +{ +public: + XTSSingleKey() { setBlockCiphers(&cipher, &cipher); } + ~XTSSingleKey() {} + +private: + T cipher; +}; + +#endif diff --git a/src/keywords.txt b/src/keywords.txt new file mode 100644 index 0000000..068542f --- /dev/null +++ b/src/keywords.txt @@ -0,0 +1,71 @@ +AES128 KEYWORD1 +AES192 KEYWORD1 +AES256 KEYWORD1 +AESTiny128 KEYWORD1 +AESTiny256 KEYWORD1 +AESSmall128 KEYWORD1 +AESSmall256 KEYWORD1 +ChaCha KEYWORD1 +ChaChaPoly KEYWORD1 + +BLAKE2b KEYWORD1 +BLAKE2s KEYWORD1 +SHA256 KEYWORD1 +SHA512 KEYWORD1 +SHA3_256 KEYWORD1 +SHA3_512 KEYWORD1 +KeccakCore KEYWORD1 +Poly1305 KEYWORD1 +GHASH KEYWORD1 +OMAC KEYWORD1 +GF128 KEYWORD1 + +SHAKE128 KEYWORD1 +SHAKE256 KEYWORD1 + +Curve25519 KEYWORD1 +Ed25519 KEYWORD1 + +CBC KEYWORD1 +CFB KEYWORD1 +CTR KEYWORD1 +OFB KEYWORD1 +GCM KEYWORD1 +EAX KEYWORD1 + +RNG KEYWORD1 + +keySize KEYWORD2 +ivSize KEYWORD2 +tagSize KEYWORD2 +setKey KEYWORD2 +setIV KEYWORD2 +encrypt KEYWORD2 +decrypt KEYWORD2 +clear KEYWORD2 +addAuthData KEYWORD2 + +hashSize KEYWORD2 +blockSize KEYWORD2 +reset KEYWORD2 +update KEYWORD2 +finalize KEYWORD2 + +begin KEYWORD2 +setAutoSaveTime KEYWORD2 +rand KEYWORD2 +available KEYWORD2 +stir KEYWORD2 +save KEYWORD2 +loop KEYWORD2 +destroy KEYWORD2 +calibrating KEYWORD2 + +eval KEYWORD2 +dh1 KEYWORD2 +dh2 KEYWORD2 + +sign KEYWORD2 +verify KEYWORD2 +generatePrivateKey KEYWORD2 +derivePublicKey KEYWORD2 diff --git a/src/library.json b/src/library.json new file mode 100644 index 0000000..d6343bc --- /dev/null +++ b/src/library.json @@ -0,0 +1,22 @@ +{ + "name": "Crypto", + "version": "0.2.0", + "keywords": "AES128,AES192,AES256,Speck,CTR,CFB,CBC,OFB,EAX,GCM,XTS,ChaCha,ChaChaPoly,EAX,GCM,SHA256,SHA512,SHA3_256,SHA3_512,BLAKE2s,BLAKE2b,SHAKE128,SHAKE256,Poly1305,GHASH,OMAC,Curve25519,Ed25519,P521,RNG,NOISE", + "description": "Arduino CryptoLibs - All cryptographic algorithms have been optimized for 8-bit Arduino platforms like the Uno", + "authors": + { + "name": "Rhys Weatherley", + "email": "rhys.weatherley@gmail.com", + "url": "https://rweather.github.io/arduinolibs/crypto.html" + }, + "export": { + "include": "libraries/Crypto" + }, + "repository": + { + "type": "git", + "url": "https://github.com/rweather/arduinolibs.git" + }, + "frameworks": "arduino", + "platforms": "*" +} diff --git a/src/utility/EndianUtil.h b/src/utility/EndianUtil.h new file mode 100644 index 0000000..b4e31aa --- /dev/null +++ b/src/utility/EndianUtil.h @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_ENDIANUTIL_H +#define CRYPTO_ENDIANUTIL_H + +#include + +#if !defined(HOST_BUILD) + +// CPU is assumed to be little endian. Edit this file if you +// need to port this library to a big endian CPU. + +#define CRYPTO_LITTLE_ENDIAN 1 + +#define htole16(x) (x) +#define le16toh(x) (x) +#define htobe16(x) \ + (__extension__ ({ \ + uint16_t _temp = (x); \ + ((_temp >> 8) & 0x00FF) | \ + ((_temp << 8) & 0xFF00); \ + })) +#define be16toh(x) (htobe16((x))) + +#define htole32(x) (x) +#define le32toh(x) (x) +#define htobe32(x) \ + (__extension__ ({ \ + uint32_t _temp = (x); \ + ((_temp >> 24) & 0x000000FF) | \ + ((_temp >> 8) & 0x0000FF00) | \ + ((_temp << 8) & 0x00FF0000) | \ + ((_temp << 24) & 0xFF000000); \ + })) +#define be32toh(x) (htobe32((x))) + +#define htole64(x) (x) +#define le64toh(x) (x) +#define htobe64(x) \ + (__extension__ ({ \ + uint64_t __temp = (x); \ + uint32_t __low = htobe32((uint32_t)__temp); \ + uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \ + (((uint64_t)__low) << 32) | __high; \ + })) +#define be64toh(x) (htobe64((x))) + +#else // HOST_BUILD + +#include +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define CRYPTO_LITTLE_ENDIAN 1 +#endif + +#endif // HOST_BUILD + +#endif diff --git a/src/utility/LimbUtil.h b/src/utility/LimbUtil.h new file mode 100644 index 0000000..2451f43 --- /dev/null +++ b/src/utility/LimbUtil.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_LIMBUTIL_H +#define CRYPTO_LIMBUTIL_H + +#include "ProgMemUtil.h" + +// Number of limbs in a big number value of various sizes. +#define NUM_LIMBS_BITS(n) \ + (((n) + sizeof(limb_t) * 8 - 1) / (8 * sizeof(limb_t))) +#define NUM_LIMBS_128BIT NUM_LIMBS_BITS(128) +#define NUM_LIMBS_256BIT NUM_LIMBS_BITS(256) +#define NUM_LIMBS_512BIT NUM_LIMBS_BITS(512) + +// The number of bits in a limb. +#define LIMB_BITS (8 * sizeof(limb_t)) + +// Read a limb-sized quantity from program memory. +#if BIGNUMBER_LIMB_8BIT +#define pgm_read_limb(x) (pgm_read_byte((x))) +#elif BIGNUMBER_LIMB_16BIT +#define pgm_read_limb(x) (pgm_read_word((x))) +#elif BIGNUMBER_LIMB_32BIT +#define pgm_read_limb(x) (pgm_read_dword((x))) +#elif BIGNUMBER_LIMB_64BIT +#define pgm_read_limb(x) (pgm_read_qword((x))) +#endif + +// Expand a 32-bit value into a set of limbs depending upon the limb size. +// This is used when initializing constant big number values in the code. +// For 64-bit system compatibility it is necessary to use LIMB_PAIR(x, y). +#if BIGNUMBER_LIMB_8BIT +#define LIMB(value) ((uint8_t)(value)), \ + ((uint8_t)((value) >> 8)), \ + ((uint8_t)((value) >> 16)), \ + ((uint8_t)((value) >> 24)) +#define LIMB_PAIR(x,y) LIMB((x)), LIMB((y)) +#elif BIGNUMBER_LIMB_16BIT +#define LIMB(value) ((uint16_t)(value)), \ + ((uint16_t)(((uint32_t)(value)) >> 16)) +#define LIMB_PAIR(x,y) LIMB((x)), LIMB((y)) +#elif BIGNUMBER_LIMB_32BIT +#define LIMB(value) (value) +#define LIMB_PAIR(x,y) LIMB((x)), LIMB((y)) +#elif BIGNUMBER_LIMB_64BIT +#define LIMB(value) (value) +#define LIMB_PAIR(x,y) ((((uint64_t)(y)) << 32) | ((uint64_t)(x))) +#endif + +#endif diff --git a/src/utility/ProgMemUtil.h b/src/utility/ProgMemUtil.h new file mode 100644 index 0000000..023154b --- /dev/null +++ b/src/utility/ProgMemUtil.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_PROGMEMUTIL_H +#define CRYPTO_PROGMEMUTIL_H + +#if defined(__AVR__) +#include +#define pgm_read_qword(x) \ + (__extension__ ({ \ + const uint32_t *_temp = (const uint32_t *)(x); \ + ((uint64_t)pgm_read_dword(_temp)) | \ + (((uint64_t)pgm_read_dword(_temp + 1)) << 32); \ + })) +#elif defined(ESP8266) || defined(ESP32) +#include +#define pgm_read_qword(x) \ + (__extension__ ({ \ + const uint32_t *_temp = (const uint32_t *)(x); \ + ((uint64_t)pgm_read_dword(_temp)) | \ + (((uint64_t)pgm_read_dword(_temp + 1)) << 32); \ + })) +#else +#include +#define PROGMEM +#ifndef pgm_read_byte +# define pgm_read_byte(x) (*(x)) +#endif +#ifndef pgm_read_word +# define pgm_read_word(x) (*(x)) +#endif +#ifndef pgm_read_dword +# define pgm_read_dword(x) (*(x)) +#endif +#ifndef pgm_read_qword +# define pgm_read_qword(x) (*(x)) +#endif +#ifndef memcpy_P +# define memcpy_P(d,s,l) memcpy((d), (s), (l)) +#endif +#endif + +#endif diff --git a/src/utility/RotateUtil.h b/src/utility/RotateUtil.h new file mode 100644 index 0000000..fcabc82 --- /dev/null +++ b/src/utility/RotateUtil.h @@ -0,0 +1,696 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_ROTATEUTIL_H +#define CRYPTO_ROTATEUTIL_H + +#include + +// Rotation functions that are optimised for best performance on AVR. +// The most efficient rotations are where the number of bits is 1 or a +// multiple of 8, so we compose the efficient rotations to produce all +// other rotation counts of interest. + +#if defined(__AVR__) +#define CRYPTO_ROTATE32_COMPOSED 1 +#define CRYPTO_ROTATE64_COMPOSED 0 +#else +#define CRYPTO_ROTATE32_COMPOSED 0 +#define CRYPTO_ROTATE64_COMPOSED 0 +#endif + +#if CRYPTO_ROTATE32_COMPOSED + +// Rotation macros for 32-bit arguments. + +// Generic left rotate - best performance when "bits" is 1 or a multiple of 8. +#define leftRotate(a, bits) \ + (__extension__ ({ \ + uint32_t _temp = (a); \ + (_temp << (bits)) | (_temp >> (32 - (bits))); \ + })) + +// Generic right rotate - best performance when "bits" is 1 or a multiple of 8. +#define rightRotate(a, bits) \ + (__extension__ ({ \ + uint32_t _temp = (a); \ + (_temp >> (bits)) | (_temp << (32 - (bits))); \ + })) + +// Left rotate by 1. +#define leftRotate1(a) (leftRotate((a), 1)) + +// Left rotate by 2. +#define leftRotate2(a) (leftRotate(leftRotate((a), 1), 1)) + +// Left rotate by 3. +#define leftRotate3(a) (leftRotate(leftRotate(leftRotate((a), 1), 1), 1)) + +// Left rotate by 4. +#define leftRotate4(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 1), 1), 1), 1)) + +// Left rotate by 5: Rotate left by 8, then right by 3. +#define leftRotate5(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 8), 1), 1), 1)) + +// Left rotate by 6: Rotate left by 8, then right by 2. +#define leftRotate6(a) (rightRotate(rightRotate(leftRotate((a), 8), 1), 1)) + +// Left rotate by 7: Rotate left by 8, then right by 1. +#define leftRotate7(a) (rightRotate(leftRotate((a), 8), 1)) + +// Left rotate by 8. +#define leftRotate8(a) (leftRotate((a), 8)) + +// Left rotate by 9: Rotate left by 8, then left by 1. +#define leftRotate9(a) (leftRotate(leftRotate((a), 8), 1)) + +// Left rotate by 10: Rotate left by 8, then left by 2. +#define leftRotate10(a) (leftRotate(leftRotate(leftRotate((a), 8), 1), 1)) + +// Left rotate by 11: Rotate left by 8, then left by 3. +#define leftRotate11(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 8), 1), 1), 1)) + +// Left rotate by 12: Rotate left by 16, then right by 4. +#define leftRotate12(a) (rightRotate(rightRotate(rightRotate(rightRotate(leftRotate((a), 16), 1), 1), 1), 1)) + +// Left rotate by 13: Rotate left by 16, then right by 3. +#define leftRotate13(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 16), 1), 1), 1)) + +// Left rotate by 14: Rotate left by 16, then right by 2. +#define leftRotate14(a) (rightRotate(rightRotate(leftRotate((a), 16), 1), 1)) + +// Left rotate by 15: Rotate left by 16, then right by 1. +#define leftRotate15(a) (rightRotate(leftRotate((a), 16), 1)) + +// Left rotate by 16. +#define leftRotate16(a) (leftRotate((a), 16)) + +// Left rotate by 17: Rotate left by 16, then left by 1. +#define leftRotate17(a) (leftRotate(leftRotate((a), 16), 1)) + +// Left rotate by 18: Rotate left by 16, then left by 2. +#define leftRotate18(a) (leftRotate(leftRotate(leftRotate((a), 16), 1), 1)) + +// Left rotate by 19: Rotate left by 16, then left by 3. +#define leftRotate19(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 16), 1), 1), 1)) + +// Left rotate by 20: Rotate left by 16, then left by 4. +#define leftRotate20(a) (leftRotate(leftRotate(leftRotate(leftRotate(leftRotate((a), 16), 1), 1), 1), 1)) + +// Left rotate by 21: Rotate left by 24, then right by 3. +#define leftRotate21(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 24), 1), 1), 1)) + +// Left rotate by 22: Rotate left by 24, then right by 2. +#define leftRotate22(a) (rightRotate(rightRotate(leftRotate((a), 24), 1), 1)) + +// Left rotate by 23: Rotate left by 24, then right by 1. +#define leftRotate23(a) (rightRotate(leftRotate((a), 24), 1)) + +// Left rotate by 24. +#define leftRotate24(a) (leftRotate((a), 24)) + +// Left rotate by 25: Rotate left by 24, then left by 1. +#define leftRotate25(a) (leftRotate(leftRotate((a), 24), 1)) + +// Left rotate by 26: Rotate left by 24, then left by 2. +#define leftRotate26(a) (leftRotate(leftRotate(leftRotate((a), 24), 1), 1)) + +// Left rotate by 27: Rotate left by 24, then left by 3. +#define leftRotate27(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 24), 1), 1), 1)) + +// Left rotate by 28: Rotate right by 4. +#define leftRotate28(a) (rightRotate(rightRotate(rightRotate(rightRotate((a), 1), 1), 1), 1)) + +// Left rotate by 29: Rotate right by 3. +#define leftRotate29(a) (rightRotate(rightRotate(rightRotate((a), 1), 1), 1)) + +// Left rotate by 30: Rotate right by 2. +#define leftRotate30(a) (rightRotate(rightRotate((a), 1), 1)) + +// Left rotate by 31: Rotate right by 1. +#define leftRotate31(a) (rightRotate((a), 1)) + +// Define the 32-bit right rotations in terms of left rotations. +#define rightRotate1(a) (leftRotate31((a))) +#define rightRotate2(a) (leftRotate30((a))) +#define rightRotate3(a) (leftRotate29((a))) +#define rightRotate4(a) (leftRotate28((a))) +#define rightRotate5(a) (leftRotate27((a))) +#define rightRotate6(a) (leftRotate26((a))) +#define rightRotate7(a) (leftRotate25((a))) +#define rightRotate8(a) (leftRotate24((a))) +#define rightRotate9(a) (leftRotate23((a))) +#define rightRotate10(a) (leftRotate22((a))) +#define rightRotate11(a) (leftRotate21((a))) +#define rightRotate12(a) (leftRotate20((a))) +#define rightRotate13(a) (leftRotate19((a))) +#define rightRotate14(a) (leftRotate18((a))) +#define rightRotate15(a) (leftRotate17((a))) +#define rightRotate16(a) (leftRotate16((a))) +#define rightRotate17(a) (leftRotate15((a))) +#define rightRotate18(a) (leftRotate14((a))) +#define rightRotate19(a) (leftRotate13((a))) +#define rightRotate20(a) (leftRotate12((a))) +#define rightRotate21(a) (leftRotate11((a))) +#define rightRotate22(a) (leftRotate10((a))) +#define rightRotate23(a) (leftRotate9((a))) +#define rightRotate24(a) (leftRotate8((a))) +#define rightRotate25(a) (leftRotate7((a))) +#define rightRotate26(a) (leftRotate6((a))) +#define rightRotate27(a) (leftRotate5((a))) +#define rightRotate28(a) (leftRotate4((a))) +#define rightRotate29(a) (leftRotate3((a))) +#define rightRotate30(a) (leftRotate2((a))) +#define rightRotate31(a) (leftRotate1((a))) + +#else // !CRYPTO_ROTATE32_COMPOSED + +// Generic rotation functions. All bit shifts are considered to have +// similar performance. Usually true of 32-bit and higher platforms. + +// Rotation macros for 32-bit arguments. + +// Generic left rotate. +#define leftRotate(a, bits) \ + (__extension__ ({ \ + uint32_t _temp = (a); \ + (_temp << (bits)) | (_temp >> (32 - (bits))); \ + })) + +// Generic right rotate. +#define rightRotate(a, bits) \ + (__extension__ ({ \ + uint32_t _temp = (a); \ + (_temp >> (bits)) | (_temp << (32 - (bits))); \ + })) + +// Left rotate by a specific number of bits. +#define leftRotate1(a) (leftRotate((a), 1)) +#define leftRotate2(a) (leftRotate((a), 2)) +#define leftRotate3(a) (leftRotate((a), 3)) +#define leftRotate4(a) (leftRotate((a), 4)) +#define leftRotate5(a) (leftRotate((a), 5)) +#define leftRotate6(a) (leftRotate((a), 6)) +#define leftRotate7(a) (leftRotate((a), 7)) +#define leftRotate8(a) (leftRotate((a), 8)) +#define leftRotate9(a) (leftRotate((a), 9)) +#define leftRotate10(a) (leftRotate((a), 10)) +#define leftRotate11(a) (leftRotate((a), 11)) +#define leftRotate12(a) (leftRotate((a), 12)) +#define leftRotate13(a) (leftRotate((a), 13)) +#define leftRotate14(a) (leftRotate((a), 14)) +#define leftRotate15(a) (leftRotate((a), 15)) +#define leftRotate16(a) (leftRotate((a), 16)) +#define leftRotate17(a) (leftRotate((a), 17)) +#define leftRotate18(a) (leftRotate((a), 18)) +#define leftRotate19(a) (leftRotate((a), 19)) +#define leftRotate20(a) (leftRotate((a), 20)) +#define leftRotate21(a) (leftRotate((a), 21)) +#define leftRotate22(a) (leftRotate((a), 22)) +#define leftRotate23(a) (leftRotate((a), 23)) +#define leftRotate24(a) (leftRotate((a), 24)) +#define leftRotate25(a) (leftRotate((a), 25)) +#define leftRotate26(a) (leftRotate((a), 26)) +#define leftRotate27(a) (leftRotate((a), 27)) +#define leftRotate28(a) (leftRotate((a), 28)) +#define leftRotate29(a) (leftRotate((a), 29)) +#define leftRotate30(a) (leftRotate((a), 30)) +#define leftRotate31(a) (leftRotate((a), 31)) + +// Right rotate by a specific number of bits. +#define rightRotate1(a) (rightRotate((a), 1)) +#define rightRotate2(a) (rightRotate((a), 2)) +#define rightRotate3(a) (rightRotate((a), 3)) +#define rightRotate4(a) (rightRotate((a), 4)) +#define rightRotate5(a) (rightRotate((a), 5)) +#define rightRotate6(a) (rightRotate((a), 6)) +#define rightRotate7(a) (rightRotate((a), 7)) +#define rightRotate8(a) (rightRotate((a), 8)) +#define rightRotate9(a) (rightRotate((a), 9)) +#define rightRotate10(a) (rightRotate((a), 10)) +#define rightRotate11(a) (rightRotate((a), 11)) +#define rightRotate12(a) (rightRotate((a), 12)) +#define rightRotate13(a) (rightRotate((a), 13)) +#define rightRotate14(a) (rightRotate((a), 14)) +#define rightRotate15(a) (rightRotate((a), 15)) +#define rightRotate16(a) (rightRotate((a), 16)) +#define rightRotate17(a) (rightRotate((a), 17)) +#define rightRotate18(a) (rightRotate((a), 18)) +#define rightRotate19(a) (rightRotate((a), 19)) +#define rightRotate20(a) (rightRotate((a), 20)) +#define rightRotate21(a) (rightRotate((a), 21)) +#define rightRotate22(a) (rightRotate((a), 22)) +#define rightRotate23(a) (rightRotate((a), 23)) +#define rightRotate24(a) (rightRotate((a), 24)) +#define rightRotate25(a) (rightRotate((a), 25)) +#define rightRotate26(a) (rightRotate((a), 26)) +#define rightRotate27(a) (rightRotate((a), 27)) +#define rightRotate28(a) (rightRotate((a), 28)) +#define rightRotate29(a) (rightRotate((a), 29)) +#define rightRotate30(a) (rightRotate((a), 30)) +#define rightRotate31(a) (rightRotate((a), 31)) + +#endif // !CRYPTO_ROTATE32_COMPOSED + +#if CRYPTO_ROTATE64_COMPOSED + +// Rotation macros for 64-bit arguments. + +// Generic left rotate - best performance when "bits" is 1 or a multiple of 8. +#define leftRotate_64(a, bits) \ + (__extension__ ({ \ + uint64_t _temp = (a); \ + (_temp << (bits)) | (_temp >> (64 - (bits))); \ + })) + +// Generic right rotate - best performance when "bits" is 1 or a multiple of 8. +#define rightRotate_64(a, bits) \ + (__extension__ ({ \ + uint64_t _temp = (a); \ + (_temp >> (bits)) | (_temp << (64 - (bits))); \ + })) + +// Left rotate by 1. +#define leftRotate1_64(a) (leftRotate_64((a), 1)) + +// Left rotate by 2. +#define leftRotate2_64(a) (leftRotate_64(leftRotate_64((a), 1), 1)) + +// Left rotate by 3. +#define leftRotate3_64(a) (leftRotate_64(leftRotate_64(leftRotate_64((a), 1), 1), 1)) + +// Left rotate by 4. +#define leftRotate4_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 1), 1), 1), 1)) + +// Left rotate by 5: Rotate left by 8, then right by 3. +#define leftRotate5_64(a) (rightRotate_64(rightRotate_64(rightRotate_64(leftRotate_64((a), 8), 1), 1), 1)) + +// Left rotate by 6: Rotate left by 8, then right by 2. +#define leftRotate6_64(a) (rightRotate_64(rightRotate_64(leftRotate_64((a), 8), 1), 1)) + +// Left rotate by 7: Rotate left by 8, then right by 1. +#define leftRotate7_64(a) (rightRotate_64(leftRotate_64((a), 8), 1)) + +// Left rotate by 8. +#define leftRotate8_64(a) (leftRotate_64((a), 8)) + +// Left rotate by 9: Rotate left by 8, then left by 1. +#define leftRotate9_64(a) (leftRotate_64(leftRotate_64((a), 8), 1)) + +// Left rotate by 10: Rotate left by 8, then left by 2. +#define leftRotate10_64(a) (leftRotate_64(leftRotate_64(leftRotate_64((a), 8), 1), 1)) + +// Left rotate by 11: Rotate left by 8, then left by 3. +#define leftRotate11_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 8), 1), 1), 1)) + +// Left rotate by 12: Rotate left by 16, then right by 4. +#define leftRotate12_64(a) (rightRotate_64(rightRotate_64(rightRotate_64(rightRotate_64(leftRotate_64((a), 16), 1), 1), 1), 1)) + +// Left rotate by 13: Rotate left by 16, then right by 3. +#define leftRotate13_64(a) (rightRotate_64(rightRotate_64(rightRotate_64(leftRotate_64((a), 16), 1), 1), 1)) + +// Left rotate by 14: Rotate left by 16, then right by 2. +#define leftRotate14_64(a) (rightRotate_64(rightRotate_64(leftRotate_64((a), 16), 1), 1)) + +// Left rotate by 15: Rotate left by 16, then right by 1. +#define leftRotate15_64(a) (rightRotate_64(leftRotate_64((a), 16), 1)) + +// Left rotate by 16. +#define leftRotate16_64(a) (leftRotate_64((a), 16)) + +// Left rotate by 17: Rotate left by 16, then left by 1. +#define leftRotate17_64(a) (leftRotate_64(leftRotate_64((a), 16), 1)) + +// Left rotate by 18: Rotate left by 16, then left by 2. +#define leftRotate18_64(a) (leftRotate_64(leftRotate_64(leftRotate_64((a), 16), 1), 1)) + +// Left rotate by 19: Rotate left by 16, then left by 3. +#define leftRotate19_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 16), 1), 1), 1)) + +// Left rotate by 20: Rotate left by 16, then left by 4. +#define leftRotate20_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 16), 1), 1), 1), 1)) + +// Left rotate by 21: Rotate left by 24, then right by 3. +#define leftRotate21_64(a) (rightRotate_64(rightRotate_64(rightRotate_64(leftRotate_64((a), 24), 1), 1), 1)) + +// Left rotate by 22: Rotate left by 24, then right by 2. +#define leftRotate22_64(a) (rightRotate_64(rightRotate_64(leftRotate_64((a), 24), 1), 1)) + +// Left rotate by 23: Rotate left by 24, then right by 1. +#define leftRotate23_64(a) (rightRotate_64(leftRotate_64((a), 24), 1)) + +// Left rotate by 24. +#define leftRotate24_64(a) (leftRotate_64((a), 24)) + +// Left rotate by 25: Rotate left by 24, then left by 1. +#define leftRotate25_64(a) (leftRotate_64(leftRotate_64((a), 24), 1)) + +// Left rotate by 26: Rotate left by 24, then left by 2. +#define leftRotate26_64(a) (leftRotate_64(leftRotate_64(leftRotate_64((a), 24), 1), 1)) + +// Left rotate by 27: Rotate left by 24, then left by 3. +#define leftRotate27_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 24), 1), 1), 1)) + +// Left rotate by 28: Rotate left by 24, then left by 4. +#define leftRotate28_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 24), 1), 1), 1), 1)) + +// Left rotate by 29: Rotate left by 32, then right by 3. +#define leftRotate29_64(a) (rightRotate_64(rightRotate_64(rightRotate_64(leftRotate_64((a), 32), 1), 1), 1)) + +// Left rotate by 30: Rotate left by 32, then right by 2. +#define leftRotate30_64(a) (rightRotate_64(rightRotate_64(leftRotate_64((a), 32), 1), 1)) + +// Left rotate by 31: Rotate left by 32, then right by 1. +#define leftRotate31_64(a) (rightRotate_64(leftRotate_64((a), 32), 1)) + +// Left rotate by 32. +#define leftRotate32_64(a) (leftRotate_64((a), 32)) + +// Left rotate by 33: Rotate left by 32, then left by 1. +#define leftRotate33_64(a) (leftRotate_64(leftRotate_64((a), 32), 1)) + +// Left rotate by 34: Rotate left by 32, then left by 2. +#define leftRotate34_64(a) (leftRotate_64(leftRotate_64(leftRotate_64((a), 32), 1), 1)) + +// Left rotate by 35: Rotate left by 32, then left by 3. +#define leftRotate35_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 32), 1), 1), 1)) + +// Left rotate by 36: Rotate left by 32, then left by 4. +#define leftRotate36_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 32), 1), 1), 1), 1)) + +// Left rotate by 37: Rotate left by 40, then right by 3. +#define leftRotate37_64(a) (rightRotate_64(rightRotate_64(rightRotate_64(leftRotate_64((a), 40), 1), 1), 1)) + +// Left rotate by 38: Rotate left by 40, then right by 2. +#define leftRotate38_64(a) (rightRotate_64(rightRotate_64(leftRotate_64((a), 40), 1), 1)) + +// Left rotate by 39: Rotate left by 40, then right by 1. +#define leftRotate39_64(a) (rightRotate_64(leftRotate_64((a), 40), 1)) + +// Left rotate by 40. +#define leftRotate40_64(a) (leftRotate_64((a), 40)) + +// Left rotate by 41: Rotate left by 40, then left by 1. +#define leftRotate41_64(a) (leftRotate_64(leftRotate_64((a), 40), 1)) + +// Left rotate by 42: Rotate left by 40, then left by 2. +#define leftRotate42_64(a) (leftRotate_64(leftRotate_64(leftRotate_64((a), 40), 1), 1)) + +// Left rotate by 43: Rotate left by 40, then left by 3. +#define leftRotate43_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 40), 1), 1), 1)) + +// Left rotate by 44: Rotate left by 40, then left by 4. +#define leftRotate44_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 40), 1), 1), 1), 1)) + +// Left rotate by 45: Rotate left by 48, then right by 3. +#define leftRotate45_64(a) (rightRotate_64(rightRotate_64(rightRotate_64(leftRotate_64((a), 48), 1), 1), 1)) + +// Left rotate by 46: Rotate left by 48, then right by 2. +#define leftRotate46_64(a) (rightRotate_64(rightRotate_64(leftRotate_64((a), 48), 1), 1)) + +// Left rotate by 47: Rotate left by 48, then right by 1. +#define leftRotate47_64(a) (rightRotate_64(leftRotate_64((a), 48), 1)) + +// Left rotate by 48. +#define leftRotate48_64(a) (leftRotate_64((a), 48)) + +// Left rotate by 49: Rotate left by 48, then left by 1. +#define leftRotate49_64(a) (leftRotate_64(leftRotate_64((a), 48), 1)) + +// Left rotate by 50: Rotate left by 48, then left by 2. +#define leftRotate50_64(a) (leftRotate_64(leftRotate_64(leftRotate_64((a), 48), 1), 1)) + +// Left rotate by 51: Rotate left by 48, then left by 3. +#define leftRotate51_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 48), 1), 1), 1)) + +// Left rotate by 52: Rotate left by 48, then left by 4. +#define leftRotate52_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 48), 1), 1), 1), 1)) + +// Left rotate by 53: Rotate left by 56, then right by 3. +#define leftRotate53_64(a) (rightRotate_64(rightRotate_64(rightRotate_64(leftRotate_64((a), 56), 1), 1), 1)) + +// Left rotate by 54: Rotate left by 56, then right by 2. +#define leftRotate54_64(a) (rightRotate_64(rightRotate_64(leftRotate_64((a), 56), 1), 1)) + +// Left rotate by 55: Rotate left by 56, then right by 1. +#define leftRotate55_64(a) (rightRotate_64(leftRotate_64((a), 56), 1)) + +// Left rotate by 56. +#define leftRotate56_64(a) (leftRotate_64((a), 56)) + +// Left rotate by 57: Rotate left by 56, then left by 1. +#define leftRotate57_64(a) (leftRotate_64(leftRotate_64((a), 56), 1)) + +// Left rotate by 58: Rotate left by 56, then left by 2. +#define leftRotate58_64(a) (leftRotate_64(leftRotate_64(leftRotate_64((a), 56), 1), 1)) + +// Left rotate by 59: Rotate left by 56, then left by 3. +#define leftRotate59_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 56), 1), 1), 1)) + +// Left rotate by 60: Rotate left by 60, then left by 4. +#define leftRotate60_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 56), 1), 1), 1), 1)) + +// Left rotate by 61: Rotate right by 3. +#define leftRotate61_64(a) (rightRotate_64(rightRotate_64(rightRotate_64((a), 1), 1), 1)) + +// Left rotate by 62: Rotate right by 2. +#define leftRotate62_64(a) (rightRotate_64(rightRotate_64((a), 1), 1)) + +// Left rotate by 63: Rotate right by 1. +#define leftRotate63_64(a) (rightRotate_64((a), 1)) + +// Define the 64-bit right rotations in terms of left rotations. +#define rightRotate1_64(a) (leftRotate63_64((a))) +#define rightRotate2_64(a) (leftRotate62_64((a))) +#define rightRotate3_64(a) (leftRotate61_64((a))) +#define rightRotate4_64(a) (leftRotate60_64((a))) +#define rightRotate5_64(a) (leftRotate59_64((a))) +#define rightRotate6_64(a) (leftRotate58_64((a))) +#define rightRotate7_64(a) (leftRotate57_64((a))) +#define rightRotate8_64(a) (leftRotate56_64((a))) +#define rightRotate9_64(a) (leftRotate55_64((a))) +#define rightRotate10_64(a) (leftRotate54_64((a))) +#define rightRotate11_64(a) (leftRotate53_64((a))) +#define rightRotate12_64(a) (leftRotate52_64((a))) +#define rightRotate13_64(a) (leftRotate51_64((a))) +#define rightRotate14_64(a) (leftRotate50_64((a))) +#define rightRotate15_64(a) (leftRotate49_64((a))) +#define rightRotate16_64(a) (leftRotate48_64((a))) +#define rightRotate17_64(a) (leftRotate47_64((a))) +#define rightRotate18_64(a) (leftRotate46_64((a))) +#define rightRotate19_64(a) (leftRotate45_64((a))) +#define rightRotate20_64(a) (leftRotate44_64((a))) +#define rightRotate21_64(a) (leftRotate43_64((a))) +#define rightRotate22_64(a) (leftRotate42_64((a))) +#define rightRotate23_64(a) (leftRotate41_64((a))) +#define rightRotate24_64(a) (leftRotate40_64((a))) +#define rightRotate25_64(a) (leftRotate39_64((a))) +#define rightRotate26_64(a) (leftRotate38_64((a))) +#define rightRotate27_64(a) (leftRotate37_64((a))) +#define rightRotate28_64(a) (leftRotate36_64((a))) +#define rightRotate29_64(a) (leftRotate35_64((a))) +#define rightRotate30_64(a) (leftRotate34_64((a))) +#define rightRotate31_64(a) (leftRotate33_64((a))) +#define rightRotate32_64(a) (leftRotate32_64((a))) +#define rightRotate33_64(a) (leftRotate31_64((a))) +#define rightRotate34_64(a) (leftRotate30_64((a))) +#define rightRotate35_64(a) (leftRotate29_64((a))) +#define rightRotate36_64(a) (leftRotate28_64((a))) +#define rightRotate37_64(a) (leftRotate27_64((a))) +#define rightRotate38_64(a) (leftRotate26_64((a))) +#define rightRotate39_64(a) (leftRotate25_64((a))) +#define rightRotate40_64(a) (leftRotate24_64((a))) +#define rightRotate41_64(a) (leftRotate23_64((a))) +#define rightRotate42_64(a) (leftRotate22_64((a))) +#define rightRotate43_64(a) (leftRotate21_64((a))) +#define rightRotate44_64(a) (leftRotate20_64((a))) +#define rightRotate45_64(a) (leftRotate19_64((a))) +#define rightRotate46_64(a) (leftRotate18_64((a))) +#define rightRotate47_64(a) (leftRotate17_64((a))) +#define rightRotate48_64(a) (leftRotate16_64((a))) +#define rightRotate49_64(a) (leftRotate15_64((a))) +#define rightRotate50_64(a) (leftRotate14_64((a))) +#define rightRotate51_64(a) (leftRotate13_64((a))) +#define rightRotate52_64(a) (leftRotate12_64((a))) +#define rightRotate53_64(a) (leftRotate11_64((a))) +#define rightRotate54_64(a) (leftRotate10_64((a))) +#define rightRotate55_64(a) (leftRotate9_64((a))) +#define rightRotate56_64(a) (leftRotate8_64((a))) +#define rightRotate57_64(a) (leftRotate7_64((a))) +#define rightRotate58_64(a) (leftRotate6_64((a))) +#define rightRotate59_64(a) (leftRotate5_64((a))) +#define rightRotate60_64(a) (leftRotate4_64((a))) +#define rightRotate61_64(a) (leftRotate3_64((a))) +#define rightRotate62_64(a) (leftRotate2_64((a))) +#define rightRotate63_64(a) (leftRotate1_64((a))) + +#else // !CRYPTO_ROTATE64_COMPOSED + +// Rotation macros for 64-bit arguments. + +// Generic left rotate. +#define leftRotate_64(a, bits) \ + (__extension__ ({ \ + uint64_t _temp = (a); \ + (_temp << (bits)) | (_temp >> (64 - (bits))); \ + })) + +// Generic right rotate. +#define rightRotate_64(a, bits) \ + (__extension__ ({ \ + uint64_t _temp = (a); \ + (_temp >> (bits)) | (_temp << (64 - (bits))); \ + })) + +// Left rotate by a specific number of bits. +#define leftRotate1_64(a) (leftRotate_64((a), 1)) +#define leftRotate2_64(a) (leftRotate_64((a), 2)) +#define leftRotate3_64(a) (leftRotate_64((a), 3)) +#define leftRotate4_64(a) (leftRotate_64((a), 4)) +#define leftRotate5_64(a) (leftRotate_64((a), 5)) +#define leftRotate6_64(a) (leftRotate_64((a), 6)) +#define leftRotate7_64(a) (leftRotate_64((a), 7)) +#define leftRotate8_64(a) (leftRotate_64((a), 8)) +#define leftRotate9_64(a) (leftRotate_64((a), 9)) +#define leftRotate10_64(a) (leftRotate_64((a), 10)) +#define leftRotate11_64(a) (leftRotate_64((a), 11)) +#define leftRotate12_64(a) (leftRotate_64((a), 12)) +#define leftRotate13_64(a) (leftRotate_64((a), 13)) +#define leftRotate14_64(a) (leftRotate_64((a), 14)) +#define leftRotate15_64(a) (leftRotate_64((a), 15)) +#define leftRotate16_64(a) (leftRotate_64((a), 16)) +#define leftRotate17_64(a) (leftRotate_64((a), 17)) +#define leftRotate18_64(a) (leftRotate_64((a), 18)) +#define leftRotate19_64(a) (leftRotate_64((a), 19)) +#define leftRotate20_64(a) (leftRotate_64((a), 20)) +#define leftRotate21_64(a) (leftRotate_64((a), 21)) +#define leftRotate22_64(a) (leftRotate_64((a), 22)) +#define leftRotate23_64(a) (leftRotate_64((a), 23)) +#define leftRotate24_64(a) (leftRotate_64((a), 24)) +#define leftRotate25_64(a) (leftRotate_64((a), 25)) +#define leftRotate26_64(a) (leftRotate_64((a), 26)) +#define leftRotate27_64(a) (leftRotate_64((a), 27)) +#define leftRotate28_64(a) (leftRotate_64((a), 28)) +#define leftRotate29_64(a) (leftRotate_64((a), 29)) +#define leftRotate30_64(a) (leftRotate_64((a), 30)) +#define leftRotate31_64(a) (leftRotate_64((a), 31)) +#define leftRotate32_64(a) (leftRotate_64((a), 32)) +#define leftRotate33_64(a) (leftRotate_64((a), 33)) +#define leftRotate34_64(a) (leftRotate_64((a), 34)) +#define leftRotate35_64(a) (leftRotate_64((a), 35)) +#define leftRotate36_64(a) (leftRotate_64((a), 36)) +#define leftRotate37_64(a) (leftRotate_64((a), 37)) +#define leftRotate38_64(a) (leftRotate_64((a), 38)) +#define leftRotate39_64(a) (leftRotate_64((a), 39)) +#define leftRotate40_64(a) (leftRotate_64((a), 40)) +#define leftRotate41_64(a) (leftRotate_64((a), 41)) +#define leftRotate42_64(a) (leftRotate_64((a), 42)) +#define leftRotate43_64(a) (leftRotate_64((a), 43)) +#define leftRotate44_64(a) (leftRotate_64((a), 44)) +#define leftRotate45_64(a) (leftRotate_64((a), 45)) +#define leftRotate46_64(a) (leftRotate_64((a), 46)) +#define leftRotate47_64(a) (leftRotate_64((a), 47)) +#define leftRotate48_64(a) (leftRotate_64((a), 48)) +#define leftRotate49_64(a) (leftRotate_64((a), 49)) +#define leftRotate50_64(a) (leftRotate_64((a), 50)) +#define leftRotate51_64(a) (leftRotate_64((a), 51)) +#define leftRotate52_64(a) (leftRotate_64((a), 52)) +#define leftRotate53_64(a) (leftRotate_64((a), 53)) +#define leftRotate54_64(a) (leftRotate_64((a), 54)) +#define leftRotate55_64(a) (leftRotate_64((a), 55)) +#define leftRotate56_64(a) (leftRotate_64((a), 56)) +#define leftRotate57_64(a) (leftRotate_64((a), 57)) +#define leftRotate58_64(a) (leftRotate_64((a), 58)) +#define leftRotate59_64(a) (leftRotate_64((a), 59)) +#define leftRotate60_64(a) (leftRotate_64((a), 60)) +#define leftRotate61_64(a) (leftRotate_64((a), 61)) +#define leftRotate62_64(a) (leftRotate_64((a), 62)) +#define leftRotate63_64(a) (leftRotate_64((a), 63)) + +// Right rotate by a specific number of bits. +#define rightRotate1_64(a) (rightRotate_64((a), 1)) +#define rightRotate2_64(a) (rightRotate_64((a), 2)) +#define rightRotate3_64(a) (rightRotate_64((a), 3)) +#define rightRotate4_64(a) (rightRotate_64((a), 4)) +#define rightRotate5_64(a) (rightRotate_64((a), 5)) +#define rightRotate6_64(a) (rightRotate_64((a), 6)) +#define rightRotate7_64(a) (rightRotate_64((a), 7)) +#define rightRotate8_64(a) (rightRotate_64((a), 8)) +#define rightRotate9_64(a) (rightRotate_64((a), 9)) +#define rightRotate10_64(a) (rightRotate_64((a), 10)) +#define rightRotate11_64(a) (rightRotate_64((a), 11)) +#define rightRotate12_64(a) (rightRotate_64((a), 12)) +#define rightRotate13_64(a) (rightRotate_64((a), 13)) +#define rightRotate14_64(a) (rightRotate_64((a), 14)) +#define rightRotate15_64(a) (rightRotate_64((a), 15)) +#define rightRotate16_64(a) (rightRotate_64((a), 16)) +#define rightRotate17_64(a) (rightRotate_64((a), 17)) +#define rightRotate18_64(a) (rightRotate_64((a), 18)) +#define rightRotate19_64(a) (rightRotate_64((a), 19)) +#define rightRotate20_64(a) (rightRotate_64((a), 20)) +#define rightRotate21_64(a) (rightRotate_64((a), 21)) +#define rightRotate22_64(a) (rightRotate_64((a), 22)) +#define rightRotate23_64(a) (rightRotate_64((a), 23)) +#define rightRotate24_64(a) (rightRotate_64((a), 24)) +#define rightRotate25_64(a) (rightRotate_64((a), 25)) +#define rightRotate26_64(a) (rightRotate_64((a), 26)) +#define rightRotate27_64(a) (rightRotate_64((a), 27)) +#define rightRotate28_64(a) (rightRotate_64((a), 28)) +#define rightRotate29_64(a) (rightRotate_64((a), 29)) +#define rightRotate30_64(a) (rightRotate_64((a), 30)) +#define rightRotate31_64(a) (rightRotate_64((a), 31)) +#define rightRotate32_64(a) (rightRotate_64((a), 32)) +#define rightRotate33_64(a) (rightRotate_64((a), 33)) +#define rightRotate34_64(a) (rightRotate_64((a), 34)) +#define rightRotate35_64(a) (rightRotate_64((a), 35)) +#define rightRotate36_64(a) (rightRotate_64((a), 36)) +#define rightRotate37_64(a) (rightRotate_64((a), 37)) +#define rightRotate38_64(a) (rightRotate_64((a), 38)) +#define rightRotate39_64(a) (rightRotate_64((a), 39)) +#define rightRotate40_64(a) (rightRotate_64((a), 40)) +#define rightRotate41_64(a) (rightRotate_64((a), 41)) +#define rightRotate42_64(a) (rightRotate_64((a), 42)) +#define rightRotate43_64(a) (rightRotate_64((a), 43)) +#define rightRotate44_64(a) (rightRotate_64((a), 44)) +#define rightRotate45_64(a) (rightRotate_64((a), 45)) +#define rightRotate46_64(a) (rightRotate_64((a), 46)) +#define rightRotate47_64(a) (rightRotate_64((a), 47)) +#define rightRotate48_64(a) (rightRotate_64((a), 48)) +#define rightRotate49_64(a) (rightRotate_64((a), 49)) +#define rightRotate50_64(a) (rightRotate_64((a), 50)) +#define rightRotate51_64(a) (rightRotate_64((a), 51)) +#define rightRotate52_64(a) (rightRotate_64((a), 52)) +#define rightRotate53_64(a) (rightRotate_64((a), 53)) +#define rightRotate54_64(a) (rightRotate_64((a), 54)) +#define rightRotate55_64(a) (rightRotate_64((a), 55)) +#define rightRotate56_64(a) (rightRotate_64((a), 56)) +#define rightRotate57_64(a) (rightRotate_64((a), 57)) +#define rightRotate58_64(a) (rightRotate_64((a), 58)) +#define rightRotate59_64(a) (rightRotate_64((a), 59)) +#define rightRotate60_64(a) (rightRotate_64((a), 60)) +#define rightRotate61_64(a) (rightRotate_64((a), 61)) +#define rightRotate62_64(a) (rightRotate_64((a), 62)) +#define rightRotate63_64(a) (rightRotate_64((a), 63)) + +#endif // !CRYPTO_ROTATE64_COMPOSED + +#endif diff --git a/utility/EndianUtil.h b/utility/EndianUtil.h new file mode 100644 index 0000000..b4e31aa --- /dev/null +++ b/utility/EndianUtil.h @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_ENDIANUTIL_H +#define CRYPTO_ENDIANUTIL_H + +#include + +#if !defined(HOST_BUILD) + +// CPU is assumed to be little endian. Edit this file if you +// need to port this library to a big endian CPU. + +#define CRYPTO_LITTLE_ENDIAN 1 + +#define htole16(x) (x) +#define le16toh(x) (x) +#define htobe16(x) \ + (__extension__ ({ \ + uint16_t _temp = (x); \ + ((_temp >> 8) & 0x00FF) | \ + ((_temp << 8) & 0xFF00); \ + })) +#define be16toh(x) (htobe16((x))) + +#define htole32(x) (x) +#define le32toh(x) (x) +#define htobe32(x) \ + (__extension__ ({ \ + uint32_t _temp = (x); \ + ((_temp >> 24) & 0x000000FF) | \ + ((_temp >> 8) & 0x0000FF00) | \ + ((_temp << 8) & 0x00FF0000) | \ + ((_temp << 24) & 0xFF000000); \ + })) +#define be32toh(x) (htobe32((x))) + +#define htole64(x) (x) +#define le64toh(x) (x) +#define htobe64(x) \ + (__extension__ ({ \ + uint64_t __temp = (x); \ + uint32_t __low = htobe32((uint32_t)__temp); \ + uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \ + (((uint64_t)__low) << 32) | __high; \ + })) +#define be64toh(x) (htobe64((x))) + +#else // HOST_BUILD + +#include +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define CRYPTO_LITTLE_ENDIAN 1 +#endif + +#endif // HOST_BUILD + +#endif diff --git a/utility/LimbUtil.h b/utility/LimbUtil.h new file mode 100644 index 0000000..2451f43 --- /dev/null +++ b/utility/LimbUtil.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_LIMBUTIL_H +#define CRYPTO_LIMBUTIL_H + +#include "ProgMemUtil.h" + +// Number of limbs in a big number value of various sizes. +#define NUM_LIMBS_BITS(n) \ + (((n) + sizeof(limb_t) * 8 - 1) / (8 * sizeof(limb_t))) +#define NUM_LIMBS_128BIT NUM_LIMBS_BITS(128) +#define NUM_LIMBS_256BIT NUM_LIMBS_BITS(256) +#define NUM_LIMBS_512BIT NUM_LIMBS_BITS(512) + +// The number of bits in a limb. +#define LIMB_BITS (8 * sizeof(limb_t)) + +// Read a limb-sized quantity from program memory. +#if BIGNUMBER_LIMB_8BIT +#define pgm_read_limb(x) (pgm_read_byte((x))) +#elif BIGNUMBER_LIMB_16BIT +#define pgm_read_limb(x) (pgm_read_word((x))) +#elif BIGNUMBER_LIMB_32BIT +#define pgm_read_limb(x) (pgm_read_dword((x))) +#elif BIGNUMBER_LIMB_64BIT +#define pgm_read_limb(x) (pgm_read_qword((x))) +#endif + +// Expand a 32-bit value into a set of limbs depending upon the limb size. +// This is used when initializing constant big number values in the code. +// For 64-bit system compatibility it is necessary to use LIMB_PAIR(x, y). +#if BIGNUMBER_LIMB_8BIT +#define LIMB(value) ((uint8_t)(value)), \ + ((uint8_t)((value) >> 8)), \ + ((uint8_t)((value) >> 16)), \ + ((uint8_t)((value) >> 24)) +#define LIMB_PAIR(x,y) LIMB((x)), LIMB((y)) +#elif BIGNUMBER_LIMB_16BIT +#define LIMB(value) ((uint16_t)(value)), \ + ((uint16_t)(((uint32_t)(value)) >> 16)) +#define LIMB_PAIR(x,y) LIMB((x)), LIMB((y)) +#elif BIGNUMBER_LIMB_32BIT +#define LIMB(value) (value) +#define LIMB_PAIR(x,y) LIMB((x)), LIMB((y)) +#elif BIGNUMBER_LIMB_64BIT +#define LIMB(value) (value) +#define LIMB_PAIR(x,y) ((((uint64_t)(y)) << 32) | ((uint64_t)(x))) +#endif + +#endif diff --git a/utility/ProgMemUtil.h b/utility/ProgMemUtil.h new file mode 100644 index 0000000..023154b --- /dev/null +++ b/utility/ProgMemUtil.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_PROGMEMUTIL_H +#define CRYPTO_PROGMEMUTIL_H + +#if defined(__AVR__) +#include +#define pgm_read_qword(x) \ + (__extension__ ({ \ + const uint32_t *_temp = (const uint32_t *)(x); \ + ((uint64_t)pgm_read_dword(_temp)) | \ + (((uint64_t)pgm_read_dword(_temp + 1)) << 32); \ + })) +#elif defined(ESP8266) || defined(ESP32) +#include +#define pgm_read_qword(x) \ + (__extension__ ({ \ + const uint32_t *_temp = (const uint32_t *)(x); \ + ((uint64_t)pgm_read_dword(_temp)) | \ + (((uint64_t)pgm_read_dword(_temp + 1)) << 32); \ + })) +#else +#include +#define PROGMEM +#ifndef pgm_read_byte +# define pgm_read_byte(x) (*(x)) +#endif +#ifndef pgm_read_word +# define pgm_read_word(x) (*(x)) +#endif +#ifndef pgm_read_dword +# define pgm_read_dword(x) (*(x)) +#endif +#ifndef pgm_read_qword +# define pgm_read_qword(x) (*(x)) +#endif +#ifndef memcpy_P +# define memcpy_P(d,s,l) memcpy((d), (s), (l)) +#endif +#endif + +#endif diff --git a/utility/RotateUtil.h b/utility/RotateUtil.h new file mode 100644 index 0000000..fcabc82 --- /dev/null +++ b/utility/RotateUtil.h @@ -0,0 +1,696 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_ROTATEUTIL_H +#define CRYPTO_ROTATEUTIL_H + +#include + +// Rotation functions that are optimised for best performance on AVR. +// The most efficient rotations are where the number of bits is 1 or a +// multiple of 8, so we compose the efficient rotations to produce all +// other rotation counts of interest. + +#if defined(__AVR__) +#define CRYPTO_ROTATE32_COMPOSED 1 +#define CRYPTO_ROTATE64_COMPOSED 0 +#else +#define CRYPTO_ROTATE32_COMPOSED 0 +#define CRYPTO_ROTATE64_COMPOSED 0 +#endif + +#if CRYPTO_ROTATE32_COMPOSED + +// Rotation macros for 32-bit arguments. + +// Generic left rotate - best performance when "bits" is 1 or a multiple of 8. +#define leftRotate(a, bits) \ + (__extension__ ({ \ + uint32_t _temp = (a); \ + (_temp << (bits)) | (_temp >> (32 - (bits))); \ + })) + +// Generic right rotate - best performance when "bits" is 1 or a multiple of 8. +#define rightRotate(a, bits) \ + (__extension__ ({ \ + uint32_t _temp = (a); \ + (_temp >> (bits)) | (_temp << (32 - (bits))); \ + })) + +// Left rotate by 1. +#define leftRotate1(a) (leftRotate((a), 1)) + +// Left rotate by 2. +#define leftRotate2(a) (leftRotate(leftRotate((a), 1), 1)) + +// Left rotate by 3. +#define leftRotate3(a) (leftRotate(leftRotate(leftRotate((a), 1), 1), 1)) + +// Left rotate by 4. +#define leftRotate4(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 1), 1), 1), 1)) + +// Left rotate by 5: Rotate left by 8, then right by 3. +#define leftRotate5(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 8), 1), 1), 1)) + +// Left rotate by 6: Rotate left by 8, then right by 2. +#define leftRotate6(a) (rightRotate(rightRotate(leftRotate((a), 8), 1), 1)) + +// Left rotate by 7: Rotate left by 8, then right by 1. +#define leftRotate7(a) (rightRotate(leftRotate((a), 8), 1)) + +// Left rotate by 8. +#define leftRotate8(a) (leftRotate((a), 8)) + +// Left rotate by 9: Rotate left by 8, then left by 1. +#define leftRotate9(a) (leftRotate(leftRotate((a), 8), 1)) + +// Left rotate by 10: Rotate left by 8, then left by 2. +#define leftRotate10(a) (leftRotate(leftRotate(leftRotate((a), 8), 1), 1)) + +// Left rotate by 11: Rotate left by 8, then left by 3. +#define leftRotate11(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 8), 1), 1), 1)) + +// Left rotate by 12: Rotate left by 16, then right by 4. +#define leftRotate12(a) (rightRotate(rightRotate(rightRotate(rightRotate(leftRotate((a), 16), 1), 1), 1), 1)) + +// Left rotate by 13: Rotate left by 16, then right by 3. +#define leftRotate13(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 16), 1), 1), 1)) + +// Left rotate by 14: Rotate left by 16, then right by 2. +#define leftRotate14(a) (rightRotate(rightRotate(leftRotate((a), 16), 1), 1)) + +// Left rotate by 15: Rotate left by 16, then right by 1. +#define leftRotate15(a) (rightRotate(leftRotate((a), 16), 1)) + +// Left rotate by 16. +#define leftRotate16(a) (leftRotate((a), 16)) + +// Left rotate by 17: Rotate left by 16, then left by 1. +#define leftRotate17(a) (leftRotate(leftRotate((a), 16), 1)) + +// Left rotate by 18: Rotate left by 16, then left by 2. +#define leftRotate18(a) (leftRotate(leftRotate(leftRotate((a), 16), 1), 1)) + +// Left rotate by 19: Rotate left by 16, then left by 3. +#define leftRotate19(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 16), 1), 1), 1)) + +// Left rotate by 20: Rotate left by 16, then left by 4. +#define leftRotate20(a) (leftRotate(leftRotate(leftRotate(leftRotate(leftRotate((a), 16), 1), 1), 1), 1)) + +// Left rotate by 21: Rotate left by 24, then right by 3. +#define leftRotate21(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 24), 1), 1), 1)) + +// Left rotate by 22: Rotate left by 24, then right by 2. +#define leftRotate22(a) (rightRotate(rightRotate(leftRotate((a), 24), 1), 1)) + +// Left rotate by 23: Rotate left by 24, then right by 1. +#define leftRotate23(a) (rightRotate(leftRotate((a), 24), 1)) + +// Left rotate by 24. +#define leftRotate24(a) (leftRotate((a), 24)) + +// Left rotate by 25: Rotate left by 24, then left by 1. +#define leftRotate25(a) (leftRotate(leftRotate((a), 24), 1)) + +// Left rotate by 26: Rotate left by 24, then left by 2. +#define leftRotate26(a) (leftRotate(leftRotate(leftRotate((a), 24), 1), 1)) + +// Left rotate by 27: Rotate left by 24, then left by 3. +#define leftRotate27(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 24), 1), 1), 1)) + +// Left rotate by 28: Rotate right by 4. +#define leftRotate28(a) (rightRotate(rightRotate(rightRotate(rightRotate((a), 1), 1), 1), 1)) + +// Left rotate by 29: Rotate right by 3. +#define leftRotate29(a) (rightRotate(rightRotate(rightRotate((a), 1), 1), 1)) + +// Left rotate by 30: Rotate right by 2. +#define leftRotate30(a) (rightRotate(rightRotate((a), 1), 1)) + +// Left rotate by 31: Rotate right by 1. +#define leftRotate31(a) (rightRotate((a), 1)) + +// Define the 32-bit right rotations in terms of left rotations. +#define rightRotate1(a) (leftRotate31((a))) +#define rightRotate2(a) (leftRotate30((a))) +#define rightRotate3(a) (leftRotate29((a))) +#define rightRotate4(a) (leftRotate28((a))) +#define rightRotate5(a) (leftRotate27((a))) +#define rightRotate6(a) (leftRotate26((a))) +#define rightRotate7(a) (leftRotate25((a))) +#define rightRotate8(a) (leftRotate24((a))) +#define rightRotate9(a) (leftRotate23((a))) +#define rightRotate10(a) (leftRotate22((a))) +#define rightRotate11(a) (leftRotate21((a))) +#define rightRotate12(a) (leftRotate20((a))) +#define rightRotate13(a) (leftRotate19((a))) +#define rightRotate14(a) (leftRotate18((a))) +#define rightRotate15(a) (leftRotate17((a))) +#define rightRotate16(a) (leftRotate16((a))) +#define rightRotate17(a) (leftRotate15((a))) +#define rightRotate18(a) (leftRotate14((a))) +#define rightRotate19(a) (leftRotate13((a))) +#define rightRotate20(a) (leftRotate12((a))) +#define rightRotate21(a) (leftRotate11((a))) +#define rightRotate22(a) (leftRotate10((a))) +#define rightRotate23(a) (leftRotate9((a))) +#define rightRotate24(a) (leftRotate8((a))) +#define rightRotate25(a) (leftRotate7((a))) +#define rightRotate26(a) (leftRotate6((a))) +#define rightRotate27(a) (leftRotate5((a))) +#define rightRotate28(a) (leftRotate4((a))) +#define rightRotate29(a) (leftRotate3((a))) +#define rightRotate30(a) (leftRotate2((a))) +#define rightRotate31(a) (leftRotate1((a))) + +#else // !CRYPTO_ROTATE32_COMPOSED + +// Generic rotation functions. All bit shifts are considered to have +// similar performance. Usually true of 32-bit and higher platforms. + +// Rotation macros for 32-bit arguments. + +// Generic left rotate. +#define leftRotate(a, bits) \ + (__extension__ ({ \ + uint32_t _temp = (a); \ + (_temp << (bits)) | (_temp >> (32 - (bits))); \ + })) + +// Generic right rotate. +#define rightRotate(a, bits) \ + (__extension__ ({ \ + uint32_t _temp = (a); \ + (_temp >> (bits)) | (_temp << (32 - (bits))); \ + })) + +// Left rotate by a specific number of bits. +#define leftRotate1(a) (leftRotate((a), 1)) +#define leftRotate2(a) (leftRotate((a), 2)) +#define leftRotate3(a) (leftRotate((a), 3)) +#define leftRotate4(a) (leftRotate((a), 4)) +#define leftRotate5(a) (leftRotate((a), 5)) +#define leftRotate6(a) (leftRotate((a), 6)) +#define leftRotate7(a) (leftRotate((a), 7)) +#define leftRotate8(a) (leftRotate((a), 8)) +#define leftRotate9(a) (leftRotate((a), 9)) +#define leftRotate10(a) (leftRotate((a), 10)) +#define leftRotate11(a) (leftRotate((a), 11)) +#define leftRotate12(a) (leftRotate((a), 12)) +#define leftRotate13(a) (leftRotate((a), 13)) +#define leftRotate14(a) (leftRotate((a), 14)) +#define leftRotate15(a) (leftRotate((a), 15)) +#define leftRotate16(a) (leftRotate((a), 16)) +#define leftRotate17(a) (leftRotate((a), 17)) +#define leftRotate18(a) (leftRotate((a), 18)) +#define leftRotate19(a) (leftRotate((a), 19)) +#define leftRotate20(a) (leftRotate((a), 20)) +#define leftRotate21(a) (leftRotate((a), 21)) +#define leftRotate22(a) (leftRotate((a), 22)) +#define leftRotate23(a) (leftRotate((a), 23)) +#define leftRotate24(a) (leftRotate((a), 24)) +#define leftRotate25(a) (leftRotate((a), 25)) +#define leftRotate26(a) (leftRotate((a), 26)) +#define leftRotate27(a) (leftRotate((a), 27)) +#define leftRotate28(a) (leftRotate((a), 28)) +#define leftRotate29(a) (leftRotate((a), 29)) +#define leftRotate30(a) (leftRotate((a), 30)) +#define leftRotate31(a) (leftRotate((a), 31)) + +// Right rotate by a specific number of bits. +#define rightRotate1(a) (rightRotate((a), 1)) +#define rightRotate2(a) (rightRotate((a), 2)) +#define rightRotate3(a) (rightRotate((a), 3)) +#define rightRotate4(a) (rightRotate((a), 4)) +#define rightRotate5(a) (rightRotate((a), 5)) +#define rightRotate6(a) (rightRotate((a), 6)) +#define rightRotate7(a) (rightRotate((a), 7)) +#define rightRotate8(a) (rightRotate((a), 8)) +#define rightRotate9(a) (rightRotate((a), 9)) +#define rightRotate10(a) (rightRotate((a), 10)) +#define rightRotate11(a) (rightRotate((a), 11)) +#define rightRotate12(a) (rightRotate((a), 12)) +#define rightRotate13(a) (rightRotate((a), 13)) +#define rightRotate14(a) (rightRotate((a), 14)) +#define rightRotate15(a) (rightRotate((a), 15)) +#define rightRotate16(a) (rightRotate((a), 16)) +#define rightRotate17(a) (rightRotate((a), 17)) +#define rightRotate18(a) (rightRotate((a), 18)) +#define rightRotate19(a) (rightRotate((a), 19)) +#define rightRotate20(a) (rightRotate((a), 20)) +#define rightRotate21(a) (rightRotate((a), 21)) +#define rightRotate22(a) (rightRotate((a), 22)) +#define rightRotate23(a) (rightRotate((a), 23)) +#define rightRotate24(a) (rightRotate((a), 24)) +#define rightRotate25(a) (rightRotate((a), 25)) +#define rightRotate26(a) (rightRotate((a), 26)) +#define rightRotate27(a) (rightRotate((a), 27)) +#define rightRotate28(a) (rightRotate((a), 28)) +#define rightRotate29(a) (rightRotate((a), 29)) +#define rightRotate30(a) (rightRotate((a), 30)) +#define rightRotate31(a) (rightRotate((a), 31)) + +#endif // !CRYPTO_ROTATE32_COMPOSED + +#if CRYPTO_ROTATE64_COMPOSED + +// Rotation macros for 64-bit arguments. + +// Generic left rotate - best performance when "bits" is 1 or a multiple of 8. +#define leftRotate_64(a, bits) \ + (__extension__ ({ \ + uint64_t _temp = (a); \ + (_temp << (bits)) | (_temp >> (64 - (bits))); \ + })) + +// Generic right rotate - best performance when "bits" is 1 or a multiple of 8. +#define rightRotate_64(a, bits) \ + (__extension__ ({ \ + uint64_t _temp = (a); \ + (_temp >> (bits)) | (_temp << (64 - (bits))); \ + })) + +// Left rotate by 1. +#define leftRotate1_64(a) (leftRotate_64((a), 1)) + +// Left rotate by 2. +#define leftRotate2_64(a) (leftRotate_64(leftRotate_64((a), 1), 1)) + +// Left rotate by 3. +#define leftRotate3_64(a) (leftRotate_64(leftRotate_64(leftRotate_64((a), 1), 1), 1)) + +// Left rotate by 4. +#define leftRotate4_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 1), 1), 1), 1)) + +// Left rotate by 5: Rotate left by 8, then right by 3. +#define leftRotate5_64(a) (rightRotate_64(rightRotate_64(rightRotate_64(leftRotate_64((a), 8), 1), 1), 1)) + +// Left rotate by 6: Rotate left by 8, then right by 2. +#define leftRotate6_64(a) (rightRotate_64(rightRotate_64(leftRotate_64((a), 8), 1), 1)) + +// Left rotate by 7: Rotate left by 8, then right by 1. +#define leftRotate7_64(a) (rightRotate_64(leftRotate_64((a), 8), 1)) + +// Left rotate by 8. +#define leftRotate8_64(a) (leftRotate_64((a), 8)) + +// Left rotate by 9: Rotate left by 8, then left by 1. +#define leftRotate9_64(a) (leftRotate_64(leftRotate_64((a), 8), 1)) + +// Left rotate by 10: Rotate left by 8, then left by 2. +#define leftRotate10_64(a) (leftRotate_64(leftRotate_64(leftRotate_64((a), 8), 1), 1)) + +// Left rotate by 11: Rotate left by 8, then left by 3. +#define leftRotate11_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 8), 1), 1), 1)) + +// Left rotate by 12: Rotate left by 16, then right by 4. +#define leftRotate12_64(a) (rightRotate_64(rightRotate_64(rightRotate_64(rightRotate_64(leftRotate_64((a), 16), 1), 1), 1), 1)) + +// Left rotate by 13: Rotate left by 16, then right by 3. +#define leftRotate13_64(a) (rightRotate_64(rightRotate_64(rightRotate_64(leftRotate_64((a), 16), 1), 1), 1)) + +// Left rotate by 14: Rotate left by 16, then right by 2. +#define leftRotate14_64(a) (rightRotate_64(rightRotate_64(leftRotate_64((a), 16), 1), 1)) + +// Left rotate by 15: Rotate left by 16, then right by 1. +#define leftRotate15_64(a) (rightRotate_64(leftRotate_64((a), 16), 1)) + +// Left rotate by 16. +#define leftRotate16_64(a) (leftRotate_64((a), 16)) + +// Left rotate by 17: Rotate left by 16, then left by 1. +#define leftRotate17_64(a) (leftRotate_64(leftRotate_64((a), 16), 1)) + +// Left rotate by 18: Rotate left by 16, then left by 2. +#define leftRotate18_64(a) (leftRotate_64(leftRotate_64(leftRotate_64((a), 16), 1), 1)) + +// Left rotate by 19: Rotate left by 16, then left by 3. +#define leftRotate19_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 16), 1), 1), 1)) + +// Left rotate by 20: Rotate left by 16, then left by 4. +#define leftRotate20_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 16), 1), 1), 1), 1)) + +// Left rotate by 21: Rotate left by 24, then right by 3. +#define leftRotate21_64(a) (rightRotate_64(rightRotate_64(rightRotate_64(leftRotate_64((a), 24), 1), 1), 1)) + +// Left rotate by 22: Rotate left by 24, then right by 2. +#define leftRotate22_64(a) (rightRotate_64(rightRotate_64(leftRotate_64((a), 24), 1), 1)) + +// Left rotate by 23: Rotate left by 24, then right by 1. +#define leftRotate23_64(a) (rightRotate_64(leftRotate_64((a), 24), 1)) + +// Left rotate by 24. +#define leftRotate24_64(a) (leftRotate_64((a), 24)) + +// Left rotate by 25: Rotate left by 24, then left by 1. +#define leftRotate25_64(a) (leftRotate_64(leftRotate_64((a), 24), 1)) + +// Left rotate by 26: Rotate left by 24, then left by 2. +#define leftRotate26_64(a) (leftRotate_64(leftRotate_64(leftRotate_64((a), 24), 1), 1)) + +// Left rotate by 27: Rotate left by 24, then left by 3. +#define leftRotate27_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 24), 1), 1), 1)) + +// Left rotate by 28: Rotate left by 24, then left by 4. +#define leftRotate28_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 24), 1), 1), 1), 1)) + +// Left rotate by 29: Rotate left by 32, then right by 3. +#define leftRotate29_64(a) (rightRotate_64(rightRotate_64(rightRotate_64(leftRotate_64((a), 32), 1), 1), 1)) + +// Left rotate by 30: Rotate left by 32, then right by 2. +#define leftRotate30_64(a) (rightRotate_64(rightRotate_64(leftRotate_64((a), 32), 1), 1)) + +// Left rotate by 31: Rotate left by 32, then right by 1. +#define leftRotate31_64(a) (rightRotate_64(leftRotate_64((a), 32), 1)) + +// Left rotate by 32. +#define leftRotate32_64(a) (leftRotate_64((a), 32)) + +// Left rotate by 33: Rotate left by 32, then left by 1. +#define leftRotate33_64(a) (leftRotate_64(leftRotate_64((a), 32), 1)) + +// Left rotate by 34: Rotate left by 32, then left by 2. +#define leftRotate34_64(a) (leftRotate_64(leftRotate_64(leftRotate_64((a), 32), 1), 1)) + +// Left rotate by 35: Rotate left by 32, then left by 3. +#define leftRotate35_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 32), 1), 1), 1)) + +// Left rotate by 36: Rotate left by 32, then left by 4. +#define leftRotate36_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 32), 1), 1), 1), 1)) + +// Left rotate by 37: Rotate left by 40, then right by 3. +#define leftRotate37_64(a) (rightRotate_64(rightRotate_64(rightRotate_64(leftRotate_64((a), 40), 1), 1), 1)) + +// Left rotate by 38: Rotate left by 40, then right by 2. +#define leftRotate38_64(a) (rightRotate_64(rightRotate_64(leftRotate_64((a), 40), 1), 1)) + +// Left rotate by 39: Rotate left by 40, then right by 1. +#define leftRotate39_64(a) (rightRotate_64(leftRotate_64((a), 40), 1)) + +// Left rotate by 40. +#define leftRotate40_64(a) (leftRotate_64((a), 40)) + +// Left rotate by 41: Rotate left by 40, then left by 1. +#define leftRotate41_64(a) (leftRotate_64(leftRotate_64((a), 40), 1)) + +// Left rotate by 42: Rotate left by 40, then left by 2. +#define leftRotate42_64(a) (leftRotate_64(leftRotate_64(leftRotate_64((a), 40), 1), 1)) + +// Left rotate by 43: Rotate left by 40, then left by 3. +#define leftRotate43_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 40), 1), 1), 1)) + +// Left rotate by 44: Rotate left by 40, then left by 4. +#define leftRotate44_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 40), 1), 1), 1), 1)) + +// Left rotate by 45: Rotate left by 48, then right by 3. +#define leftRotate45_64(a) (rightRotate_64(rightRotate_64(rightRotate_64(leftRotate_64((a), 48), 1), 1), 1)) + +// Left rotate by 46: Rotate left by 48, then right by 2. +#define leftRotate46_64(a) (rightRotate_64(rightRotate_64(leftRotate_64((a), 48), 1), 1)) + +// Left rotate by 47: Rotate left by 48, then right by 1. +#define leftRotate47_64(a) (rightRotate_64(leftRotate_64((a), 48), 1)) + +// Left rotate by 48. +#define leftRotate48_64(a) (leftRotate_64((a), 48)) + +// Left rotate by 49: Rotate left by 48, then left by 1. +#define leftRotate49_64(a) (leftRotate_64(leftRotate_64((a), 48), 1)) + +// Left rotate by 50: Rotate left by 48, then left by 2. +#define leftRotate50_64(a) (leftRotate_64(leftRotate_64(leftRotate_64((a), 48), 1), 1)) + +// Left rotate by 51: Rotate left by 48, then left by 3. +#define leftRotate51_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 48), 1), 1), 1)) + +// Left rotate by 52: Rotate left by 48, then left by 4. +#define leftRotate52_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 48), 1), 1), 1), 1)) + +// Left rotate by 53: Rotate left by 56, then right by 3. +#define leftRotate53_64(a) (rightRotate_64(rightRotate_64(rightRotate_64(leftRotate_64((a), 56), 1), 1), 1)) + +// Left rotate by 54: Rotate left by 56, then right by 2. +#define leftRotate54_64(a) (rightRotate_64(rightRotate_64(leftRotate_64((a), 56), 1), 1)) + +// Left rotate by 55: Rotate left by 56, then right by 1. +#define leftRotate55_64(a) (rightRotate_64(leftRotate_64((a), 56), 1)) + +// Left rotate by 56. +#define leftRotate56_64(a) (leftRotate_64((a), 56)) + +// Left rotate by 57: Rotate left by 56, then left by 1. +#define leftRotate57_64(a) (leftRotate_64(leftRotate_64((a), 56), 1)) + +// Left rotate by 58: Rotate left by 56, then left by 2. +#define leftRotate58_64(a) (leftRotate_64(leftRotate_64(leftRotate_64((a), 56), 1), 1)) + +// Left rotate by 59: Rotate left by 56, then left by 3. +#define leftRotate59_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 56), 1), 1), 1)) + +// Left rotate by 60: Rotate left by 60, then left by 4. +#define leftRotate60_64(a) (leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64(leftRotate_64((a), 56), 1), 1), 1), 1)) + +// Left rotate by 61: Rotate right by 3. +#define leftRotate61_64(a) (rightRotate_64(rightRotate_64(rightRotate_64((a), 1), 1), 1)) + +// Left rotate by 62: Rotate right by 2. +#define leftRotate62_64(a) (rightRotate_64(rightRotate_64((a), 1), 1)) + +// Left rotate by 63: Rotate right by 1. +#define leftRotate63_64(a) (rightRotate_64((a), 1)) + +// Define the 64-bit right rotations in terms of left rotations. +#define rightRotate1_64(a) (leftRotate63_64((a))) +#define rightRotate2_64(a) (leftRotate62_64((a))) +#define rightRotate3_64(a) (leftRotate61_64((a))) +#define rightRotate4_64(a) (leftRotate60_64((a))) +#define rightRotate5_64(a) (leftRotate59_64((a))) +#define rightRotate6_64(a) (leftRotate58_64((a))) +#define rightRotate7_64(a) (leftRotate57_64((a))) +#define rightRotate8_64(a) (leftRotate56_64((a))) +#define rightRotate9_64(a) (leftRotate55_64((a))) +#define rightRotate10_64(a) (leftRotate54_64((a))) +#define rightRotate11_64(a) (leftRotate53_64((a))) +#define rightRotate12_64(a) (leftRotate52_64((a))) +#define rightRotate13_64(a) (leftRotate51_64((a))) +#define rightRotate14_64(a) (leftRotate50_64((a))) +#define rightRotate15_64(a) (leftRotate49_64((a))) +#define rightRotate16_64(a) (leftRotate48_64((a))) +#define rightRotate17_64(a) (leftRotate47_64((a))) +#define rightRotate18_64(a) (leftRotate46_64((a))) +#define rightRotate19_64(a) (leftRotate45_64((a))) +#define rightRotate20_64(a) (leftRotate44_64((a))) +#define rightRotate21_64(a) (leftRotate43_64((a))) +#define rightRotate22_64(a) (leftRotate42_64((a))) +#define rightRotate23_64(a) (leftRotate41_64((a))) +#define rightRotate24_64(a) (leftRotate40_64((a))) +#define rightRotate25_64(a) (leftRotate39_64((a))) +#define rightRotate26_64(a) (leftRotate38_64((a))) +#define rightRotate27_64(a) (leftRotate37_64((a))) +#define rightRotate28_64(a) (leftRotate36_64((a))) +#define rightRotate29_64(a) (leftRotate35_64((a))) +#define rightRotate30_64(a) (leftRotate34_64((a))) +#define rightRotate31_64(a) (leftRotate33_64((a))) +#define rightRotate32_64(a) (leftRotate32_64((a))) +#define rightRotate33_64(a) (leftRotate31_64((a))) +#define rightRotate34_64(a) (leftRotate30_64((a))) +#define rightRotate35_64(a) (leftRotate29_64((a))) +#define rightRotate36_64(a) (leftRotate28_64((a))) +#define rightRotate37_64(a) (leftRotate27_64((a))) +#define rightRotate38_64(a) (leftRotate26_64((a))) +#define rightRotate39_64(a) (leftRotate25_64((a))) +#define rightRotate40_64(a) (leftRotate24_64((a))) +#define rightRotate41_64(a) (leftRotate23_64((a))) +#define rightRotate42_64(a) (leftRotate22_64((a))) +#define rightRotate43_64(a) (leftRotate21_64((a))) +#define rightRotate44_64(a) (leftRotate20_64((a))) +#define rightRotate45_64(a) (leftRotate19_64((a))) +#define rightRotate46_64(a) (leftRotate18_64((a))) +#define rightRotate47_64(a) (leftRotate17_64((a))) +#define rightRotate48_64(a) (leftRotate16_64((a))) +#define rightRotate49_64(a) (leftRotate15_64((a))) +#define rightRotate50_64(a) (leftRotate14_64((a))) +#define rightRotate51_64(a) (leftRotate13_64((a))) +#define rightRotate52_64(a) (leftRotate12_64((a))) +#define rightRotate53_64(a) (leftRotate11_64((a))) +#define rightRotate54_64(a) (leftRotate10_64((a))) +#define rightRotate55_64(a) (leftRotate9_64((a))) +#define rightRotate56_64(a) (leftRotate8_64((a))) +#define rightRotate57_64(a) (leftRotate7_64((a))) +#define rightRotate58_64(a) (leftRotate6_64((a))) +#define rightRotate59_64(a) (leftRotate5_64((a))) +#define rightRotate60_64(a) (leftRotate4_64((a))) +#define rightRotate61_64(a) (leftRotate3_64((a))) +#define rightRotate62_64(a) (leftRotate2_64((a))) +#define rightRotate63_64(a) (leftRotate1_64((a))) + +#else // !CRYPTO_ROTATE64_COMPOSED + +// Rotation macros for 64-bit arguments. + +// Generic left rotate. +#define leftRotate_64(a, bits) \ + (__extension__ ({ \ + uint64_t _temp = (a); \ + (_temp << (bits)) | (_temp >> (64 - (bits))); \ + })) + +// Generic right rotate. +#define rightRotate_64(a, bits) \ + (__extension__ ({ \ + uint64_t _temp = (a); \ + (_temp >> (bits)) | (_temp << (64 - (bits))); \ + })) + +// Left rotate by a specific number of bits. +#define leftRotate1_64(a) (leftRotate_64((a), 1)) +#define leftRotate2_64(a) (leftRotate_64((a), 2)) +#define leftRotate3_64(a) (leftRotate_64((a), 3)) +#define leftRotate4_64(a) (leftRotate_64((a), 4)) +#define leftRotate5_64(a) (leftRotate_64((a), 5)) +#define leftRotate6_64(a) (leftRotate_64((a), 6)) +#define leftRotate7_64(a) (leftRotate_64((a), 7)) +#define leftRotate8_64(a) (leftRotate_64((a), 8)) +#define leftRotate9_64(a) (leftRotate_64((a), 9)) +#define leftRotate10_64(a) (leftRotate_64((a), 10)) +#define leftRotate11_64(a) (leftRotate_64((a), 11)) +#define leftRotate12_64(a) (leftRotate_64((a), 12)) +#define leftRotate13_64(a) (leftRotate_64((a), 13)) +#define leftRotate14_64(a) (leftRotate_64((a), 14)) +#define leftRotate15_64(a) (leftRotate_64((a), 15)) +#define leftRotate16_64(a) (leftRotate_64((a), 16)) +#define leftRotate17_64(a) (leftRotate_64((a), 17)) +#define leftRotate18_64(a) (leftRotate_64((a), 18)) +#define leftRotate19_64(a) (leftRotate_64((a), 19)) +#define leftRotate20_64(a) (leftRotate_64((a), 20)) +#define leftRotate21_64(a) (leftRotate_64((a), 21)) +#define leftRotate22_64(a) (leftRotate_64((a), 22)) +#define leftRotate23_64(a) (leftRotate_64((a), 23)) +#define leftRotate24_64(a) (leftRotate_64((a), 24)) +#define leftRotate25_64(a) (leftRotate_64((a), 25)) +#define leftRotate26_64(a) (leftRotate_64((a), 26)) +#define leftRotate27_64(a) (leftRotate_64((a), 27)) +#define leftRotate28_64(a) (leftRotate_64((a), 28)) +#define leftRotate29_64(a) (leftRotate_64((a), 29)) +#define leftRotate30_64(a) (leftRotate_64((a), 30)) +#define leftRotate31_64(a) (leftRotate_64((a), 31)) +#define leftRotate32_64(a) (leftRotate_64((a), 32)) +#define leftRotate33_64(a) (leftRotate_64((a), 33)) +#define leftRotate34_64(a) (leftRotate_64((a), 34)) +#define leftRotate35_64(a) (leftRotate_64((a), 35)) +#define leftRotate36_64(a) (leftRotate_64((a), 36)) +#define leftRotate37_64(a) (leftRotate_64((a), 37)) +#define leftRotate38_64(a) (leftRotate_64((a), 38)) +#define leftRotate39_64(a) (leftRotate_64((a), 39)) +#define leftRotate40_64(a) (leftRotate_64((a), 40)) +#define leftRotate41_64(a) (leftRotate_64((a), 41)) +#define leftRotate42_64(a) (leftRotate_64((a), 42)) +#define leftRotate43_64(a) (leftRotate_64((a), 43)) +#define leftRotate44_64(a) (leftRotate_64((a), 44)) +#define leftRotate45_64(a) (leftRotate_64((a), 45)) +#define leftRotate46_64(a) (leftRotate_64((a), 46)) +#define leftRotate47_64(a) (leftRotate_64((a), 47)) +#define leftRotate48_64(a) (leftRotate_64((a), 48)) +#define leftRotate49_64(a) (leftRotate_64((a), 49)) +#define leftRotate50_64(a) (leftRotate_64((a), 50)) +#define leftRotate51_64(a) (leftRotate_64((a), 51)) +#define leftRotate52_64(a) (leftRotate_64((a), 52)) +#define leftRotate53_64(a) (leftRotate_64((a), 53)) +#define leftRotate54_64(a) (leftRotate_64((a), 54)) +#define leftRotate55_64(a) (leftRotate_64((a), 55)) +#define leftRotate56_64(a) (leftRotate_64((a), 56)) +#define leftRotate57_64(a) (leftRotate_64((a), 57)) +#define leftRotate58_64(a) (leftRotate_64((a), 58)) +#define leftRotate59_64(a) (leftRotate_64((a), 59)) +#define leftRotate60_64(a) (leftRotate_64((a), 60)) +#define leftRotate61_64(a) (leftRotate_64((a), 61)) +#define leftRotate62_64(a) (leftRotate_64((a), 62)) +#define leftRotate63_64(a) (leftRotate_64((a), 63)) + +// Right rotate by a specific number of bits. +#define rightRotate1_64(a) (rightRotate_64((a), 1)) +#define rightRotate2_64(a) (rightRotate_64((a), 2)) +#define rightRotate3_64(a) (rightRotate_64((a), 3)) +#define rightRotate4_64(a) (rightRotate_64((a), 4)) +#define rightRotate5_64(a) (rightRotate_64((a), 5)) +#define rightRotate6_64(a) (rightRotate_64((a), 6)) +#define rightRotate7_64(a) (rightRotate_64((a), 7)) +#define rightRotate8_64(a) (rightRotate_64((a), 8)) +#define rightRotate9_64(a) (rightRotate_64((a), 9)) +#define rightRotate10_64(a) (rightRotate_64((a), 10)) +#define rightRotate11_64(a) (rightRotate_64((a), 11)) +#define rightRotate12_64(a) (rightRotate_64((a), 12)) +#define rightRotate13_64(a) (rightRotate_64((a), 13)) +#define rightRotate14_64(a) (rightRotate_64((a), 14)) +#define rightRotate15_64(a) (rightRotate_64((a), 15)) +#define rightRotate16_64(a) (rightRotate_64((a), 16)) +#define rightRotate17_64(a) (rightRotate_64((a), 17)) +#define rightRotate18_64(a) (rightRotate_64((a), 18)) +#define rightRotate19_64(a) (rightRotate_64((a), 19)) +#define rightRotate20_64(a) (rightRotate_64((a), 20)) +#define rightRotate21_64(a) (rightRotate_64((a), 21)) +#define rightRotate22_64(a) (rightRotate_64((a), 22)) +#define rightRotate23_64(a) (rightRotate_64((a), 23)) +#define rightRotate24_64(a) (rightRotate_64((a), 24)) +#define rightRotate25_64(a) (rightRotate_64((a), 25)) +#define rightRotate26_64(a) (rightRotate_64((a), 26)) +#define rightRotate27_64(a) (rightRotate_64((a), 27)) +#define rightRotate28_64(a) (rightRotate_64((a), 28)) +#define rightRotate29_64(a) (rightRotate_64((a), 29)) +#define rightRotate30_64(a) (rightRotate_64((a), 30)) +#define rightRotate31_64(a) (rightRotate_64((a), 31)) +#define rightRotate32_64(a) (rightRotate_64((a), 32)) +#define rightRotate33_64(a) (rightRotate_64((a), 33)) +#define rightRotate34_64(a) (rightRotate_64((a), 34)) +#define rightRotate35_64(a) (rightRotate_64((a), 35)) +#define rightRotate36_64(a) (rightRotate_64((a), 36)) +#define rightRotate37_64(a) (rightRotate_64((a), 37)) +#define rightRotate38_64(a) (rightRotate_64((a), 38)) +#define rightRotate39_64(a) (rightRotate_64((a), 39)) +#define rightRotate40_64(a) (rightRotate_64((a), 40)) +#define rightRotate41_64(a) (rightRotate_64((a), 41)) +#define rightRotate42_64(a) (rightRotate_64((a), 42)) +#define rightRotate43_64(a) (rightRotate_64((a), 43)) +#define rightRotate44_64(a) (rightRotate_64((a), 44)) +#define rightRotate45_64(a) (rightRotate_64((a), 45)) +#define rightRotate46_64(a) (rightRotate_64((a), 46)) +#define rightRotate47_64(a) (rightRotate_64((a), 47)) +#define rightRotate48_64(a) (rightRotate_64((a), 48)) +#define rightRotate49_64(a) (rightRotate_64((a), 49)) +#define rightRotate50_64(a) (rightRotate_64((a), 50)) +#define rightRotate51_64(a) (rightRotate_64((a), 51)) +#define rightRotate52_64(a) (rightRotate_64((a), 52)) +#define rightRotate53_64(a) (rightRotate_64((a), 53)) +#define rightRotate54_64(a) (rightRotate_64((a), 54)) +#define rightRotate55_64(a) (rightRotate_64((a), 55)) +#define rightRotate56_64(a) (rightRotate_64((a), 56)) +#define rightRotate57_64(a) (rightRotate_64((a), 57)) +#define rightRotate58_64(a) (rightRotate_64((a), 58)) +#define rightRotate59_64(a) (rightRotate_64((a), 59)) +#define rightRotate60_64(a) (rightRotate_64((a), 60)) +#define rightRotate61_64(a) (rightRotate_64((a), 61)) +#define rightRotate62_64(a) (rightRotate_64((a), 62)) +#define rightRotate63_64(a) (rightRotate_64((a), 63)) + +#endif // !CRYPTO_ROTATE64_COMPOSED + +#endif