/*****************************************************************************/ /* cereal.c A cerealized string is a serialized form of any binary data. The idea is to simply encode binary into 7-bit ASCII suitable for inclusion in a FORM hidden INPUT item's value field (and thereby avoiding any issues with UTF-8 encodings, etc). It does this by directly using any character between '(' (40) and '}' (125). This also excludes the '&' (38) which is a reserved character in HTML (the entity introducer). Other characters are encoded using a simple scheme. The cerealizer also Run-Length Encodes (RLE) to compress redundant data. %xx an 8bit character in hexadecimal (look familiar?) !c a character shifted down by 128 (from 8 into 7 bit) #xx an 8 bit RLE count in hexdecimal (0 to 255) $c a character from '(' to '}' representing an RLE count from 0 to 85 ~ represents a space Examples: !09 is a TAB !21 is an exclamation point $4~ is 20 spaces #FF!00 is 255 nulls The state string is preceded by a single character 40..125 ('('..'}') representing the cerealizer version (1..86), then a fixed, eight character hexadecimal string representing the length of the unceralized data, then another fixed, eight character hexadecimal string representing the 32 bit hash of the data. These are used as basic integrity checks on the supplied serialized state and it's reconstituted data structure. COPYRIGHT --------- Copyright (C) 2005-2024 Mark G.Daniel This program, comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to redistribute it under the conditions of the GNU GENERAL PUBLIC LICENSE, version 3, or any later version. VERSION HISTORY --------------- 27-JUL-2006 MGD bugfix; CerealDataOut() perhaps those ambits were not the the issue, set a minimum buffer space of 256 (to catch possible zero-length data streams that still need a hash) 20-JUL-2006 MGD bugfix; CerealDataOut() increase ambit from 3x to 5x 28-MAR-2006 MGD bugfix; CerealDataOut() allow ambit 3x buffer space 01-FEB-2005 MGD initial */ /*****************************************************************************/ #ifdef SOYMAIL_VMS_V7 #undef _VMS_V6_SOURCE #define _VMS_V6_SOURCE #undef __VMS_VER #define __VMS_VER 70000000 #undef __CRTL_VER #define __CRTL_VER 70000000 #endif #pragma nomember_alignment /* standard C header files */ #include #include #include #include #include #include /* VMS related header files */ #include /* application header file */ #include "cgilib.h" #include #define FI_LI __FILE__, __LINE__ /* a character to indicate the cerealising version (0..n) */ #define CEREAL_VERSION_CHAR (char)(40+0) #define XX 255 static char CerealIndexHex [] = { XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,XX,XX, XX,XX,XX,XX, XX,10,11,12, 13,14,15,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,10,11,12, 13,14,15,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, }; #define HEXCHAR(c) (CerealIndexHex[(unsigned char)(c & 0x7f)]) /* external storage */ extern int Debug; int ErrorExit (int, char*, int); /*****************************************************************************/ /* Serialize the supplied data returning a pointer to an allocated string. */ char* CerealDataOut ( void *DataPtr, int DataLength ) { static char HexChars [] = "0123456789ABCDEF"; int cnt, StringLength, StringSize; unsigned int hash32; unsigned char ch; char *cptr, *sptr, *zptr, *sdzptr, *StringPtr; /*********/ /* begin */ /*********/ if (Debug) fprintf (stdout, "CerealDataOut() %d\n", DataLength); StringSize = DataLength * 5; /* should be close to worst-case */ if (StringSize < 256) StringSize = 256; StringPtr = sptr = CgiLibVeeMemCalloc (StringSize); if (!sptr) ErrorExit (vaxc$errno, FI_LI); /* allow thirty-two bytes for maximum encoding requirements */ zptr = StringPtr + StringSize - 32; hash32 = CerealElfHash ((char*)DataPtr, DataLength); sptr += sprintf (sptr, "%c%08X%08X", CEREAL_VERSION_CHAR, DataLength, hash32); cptr = (char*)DataPtr; sdzptr = cptr + DataLength; while (cptr < sdzptr) { if (sptr >= zptr) { /* hmmm, string's a bit bigger than originally expected */ StringLength = sptr - StringPtr; StringSize += StringSize / 2; StringPtr = CgiLibVeeMemRealloc (StringPtr, StringSize); if (!StringPtr) ErrorExit (vaxc$errno, FI_LI); sptr = StringPtr + StringLength; zptr = StringPtr + StringSize - 16; } /* scan ahead looking for sequences that can be RLE */ ch = (unsigned char)*cptr++; for (cnt = 1; cptr < sdzptr && (unsigned char)*cptr == ch && cnt < 255; cptr++) cnt++; if (cnt > 125-40) { *sptr++ = '#'; *sptr++ = HexChars[((unsigned char)(cnt & 0xf0) >> 4)]; *sptr++ = HexChars[(unsigned char)(cnt & 0x0f)]; cnt = 1; } else if (ch == 32 || (ch >= 40 && ch <= 125)) { if (cnt > 3) { *sptr++ = '$'; *sptr++ = cnt + 40; cnt = 1; } } else if (ch >= 128+40 && ch <= 128+125) { if (cnt > 2) { *sptr++ = '$'; *sptr++ = cnt + 40; cnt = 1; } } else if (cnt > 1) { *sptr++ = '$'; *sptr++ = cnt + 40; cnt = 1; } if (ch == 32) { /* space character */ while (cnt--) *sptr++ = '~'; } else if (ch >= 40 && ch <= 125) { /* unencoded character */ while (cnt--) *sptr++ = ch; } else if (ch >= 128+40 && ch <= 128+125) { /* shift 8 to 7 bit encoded character */ ch -= 128; while (cnt--) { *sptr++ = '!'; *sptr++ = ch; } } else { /* hexadecimal encoded character */ while (cnt--) { *sptr++ = '%'; *sptr++ = HexChars[((unsigned char)(ch & 0xf0) >> 4)]; *sptr++ = HexChars[(unsigned char)(ch & 0x0f)]; } } } if (sptr >= zptr) ErrorExit (SS$_BUGCHECK, FI_LI); *sptr++ = '\0'; StringLength = sptr - StringPtr; sptr = CgiLibVeeMemCalloc (StringLength); if (!sptr) ErrorExit (vaxc$errno, FI_LI); strcpy (sptr, StringPtr); CgiLibVeeMemFree (StringPtr); if (Debug) fprintf (stdout, "%d->%d\n", DataLength, StringLength-9); return (sptr); } /*****************************************************************************/ /* Un-serialize the supplied string returning the pointed to data. If 'DataPtr' is NULL the function just calculates and returns the length of the uncerialized data (so that the calling function can allocate one dynamically). */ int CerealDataIn ( char *StringPtr, void *DataPtr, int DataSize ) { int cnt, dlen, hash32a, hash32b; unsigned int hash32; unsigned char ch, hexch; char *cptr, *sptr, *zptr; /*********/ /* begin */ /*********/ if (Debug) fprintf (stdout, "CerealDataIn() |%s|\n", StringPtr); cptr = StringPtr; if (*cptr++ != CEREAL_VERSION_CHAR) { if (Debug) fprintf (stdout, "VERSION\n"); if (DataPtr) memset (DataPtr, 0, DataSize); return (0); } /* get data length of cerealized string */ while (*cptr && cptr < StringPtr+9) cptr++; if (!*cptr) return (0); ch = (unsigned char)*cptr; *cptr = '\0'; dlen = strtol (StringPtr+1, NULL, 16); *cptr = (char)ch; if (!DataPtr) return (dlen); if (dlen > DataSize) { if (Debug) fprintf (stdout, "DATASIZE\n"); memset (DataPtr, 0, DataSize); return (0); } /* get the 32 bit hash of the uncerealized data */ while (*cptr && cptr < StringPtr+17) cptr++; if (!*cptr) return (0); ch = (unsigned char)*cptr; *cptr = '\0'; hash32a = strtol (StringPtr+9, NULL, 16); *cptr = (char)ch; cnt = 1; zptr = (sptr = (char*)DataPtr) + dlen; while (ch = (unsigned char)*cptr++) { if (ch >= 40 && ch <= 125) { /* unencoded character */ while (cnt-- && sptr < zptr) *sptr++ = ch; cnt = 1; continue; } if (ch == '~') { /* space */ while (cnt-- && sptr < zptr) *sptr++ = ' '; cnt = 1; continue; } if (ch == '!') { /* 8 to 7 bit shifted character */ ch = (unsigned char)*cptr++; if (ch < 40 && ch > 125) break; ch += 128; while (cnt-- && sptr < zptr) *sptr++ = ch; cnt = 1; continue; } if (ch == '%') { /* hexdecimal encoded character */ hexch = (unsigned char)*cptr++; if (HEXCHAR(hexch) == XX) break; ch = (unsigned char)HEXCHAR(hexch) << 4; hexch = (unsigned char)*cptr++; if (HEXCHAR(hexch) == XX) break; ch |= (unsigned char)HEXCHAR(hexch); while (cnt-- && sptr < zptr) *sptr++ = ch; cnt = 1; continue; } if (ch == '$') { /* character encoded count */ cnt = (unsigned char)*cptr++; if (cnt < 40 || cnt > 125) break; cnt -= 40; continue; } if (ch == '#') { /* hexadecimal encoded count */ hexch = (unsigned char)*cptr++; if (HEXCHAR(hexch) == XX) break; cnt = (unsigned char)HEXCHAR(hexch) << 4; hexch = (unsigned char)*cptr++; if (HEXCHAR(hexch) == XX) break; cnt |= HEXCHAR(hexch); continue; } } if (Debug) fprintf (stdout, "%d %d\n", DataSize, sptr-(char*)DataPtr); if (sptr < zptr) { if (Debug) fprintf (stdout, "UNDERFLOW\n"); memset (DataPtr, 0, DataSize); return (0); } if (sptr > zptr) { if (Debug) fprintf (stdout, "OVERFLOW\n"); memset (DataPtr, 0, DataSize); return (0); } hash32b = CerealElfHash ((char*)DataPtr, dlen); if (Debug) fprintf (stdout, "%08.08x %08.08x %d->%d\n", hash32a, hash32b, cptr-StringPtr-18, dlen); if (hash32a == hash32b) return (dlen); memset (DataPtr, 0, dlen); return (0); } /*****************************************************************************/ /* Hashing function (reinitializes with each call). Based on code by Arash Partow. */ unsigned int CerealElfHash ( char *str, int len ) { unsigned int x, i, hash; /*********/ /* begin */ /*********/ hash = 0; for (i = 0; i < len; str++, i++) { hash = (hash << 4) + (*str); if ((x = hash & 0xF0000000L) != 0) { hash ^= (x >> 24); hash &= ~x; } } return (hash & 0x7FFFFFFF); } /*****************************************************************************/