-
- // #include "../builtins/libunicode.h"
- #include <limits.h>
- #include <stdio.h>
- #define ps(x) printf("%s = %s\n", #x, x);
- #define pi(x) printf("%s = %d\n", #x, x);
- #define pp(x) printf("%s = %p\n", #x, x);
- #include <assert.h>
-
-
- int getBit(unsigned char byte, int position) // position in range 0-7
- {
- return (byte >> position) & 0x1;
- }
-
- struct bstate {
- unsigned char * data;
- size_t index;
- size_t bitcount;
- };
-
- typedef struct bstate bstate;
-
- bstate * bopen(unsigned char * data) {
- bstate * n = malloc(sizeof(bstate));
- n->data = data;
- n->index = 0;
- n->bitcount = 0;
- return n;
- }
-
- void bclose(bstate * n) {
- n->data = NULL;
- n->index = 0;
- n->bitcount = 0;
- free(n);
- n = NULL;
- }
-
- unsigned char * bread(bstate * n, int len, int * ii) {
- unsigned char * bits = malloc(len);
- int was_set = 0;
-
- for (int i = 0; i < len; i++)
- {
- bits[i]=!getBit(n->data[n->index], *ii)?'0':'1';
- (*ii)--;
- n->bitcount++;
- if (n->bitcount>7) {
- n->index++;
- n->bitcount = 0;
- was_set = 1;
- *ii = 7;
- }
- }
- bits[len] = 0;
- was_set = 0;
- return bits;
- }
-
- int dts(int x) {
- int n = 0;
- while(x /= 2) n++;
- return n;
- }
-
- // should this use a bstate?
- unsigned char * bwrite(unsigned char * encoding) {
- // assume host byte size is CHAR_BIT
- if(strlen(encoding)%CHAR_BIT != 0) {
- printf("warning: bit length is not evenly divisible against a byte, the last byte may be corrupted\n");
- }
- size_t len = strlen(encoding)/CHAR_BIT;
- unsigned char * s = malloc(len);
- memset(s, 0, len);
-
- int i;
- for( i = 0; i < strlen(encoding); i++) {
- s[i>>dts(CHAR_BIT)] |= ( encoding[i] == '1' ? 1 : 0 ) << ((CHAR_BIT-1)-(i)&(CHAR_BIT-1));
- }
- s[len] = 0;
- return s;
- }
-
-
- struct unicode {
- int bytes;
- int codepoint_bits;
- } unicode;
-
- void detect(char * string) {
- ps(string);
- bstate * c = bopen(string);
- int i = 7;
- char * bits = NULL;
- bits = bread(c, 8, &i);
- if (bits[0] == '0') {
- unicode.bytes = 1;
- unicode.codepoint_bits = 7;
- }
- else
- if (bits[0] == '1') {
- if (bits[1] == '1') {
- if (bits[2] == '0') {
- unicode.bytes = 2;
- unicode.codepoint_bits = 11;
- }
- else
- if (bits[2] == '1') {
- if (bits[3] == '0') {
- unicode.bytes = 3;
- unicode.codepoint_bits = 16;
- }
- else
- if (bits[3] == '1') {
- if (bits[4] == '0') {
- unicode.bytes = 4;
- unicode.codepoint_bits = 21;
- }
- }
- }
- }
- }
- pi(unicode.bytes)
- pi(unicode.codepoint_bits)
- ps(bits); free(bits); bits = NULL;
- bits = bread(c, 8, &i);
- ps(bits); free(bits); bits = NULL;
- bits = bread(c, 8, &i);
- ps(bits); free(bits); bits = NULL;
- bclose(c);
- }
-
- void main(void) {
- detect("a");
- detect("あ");
- detect("€");
- // int ii = 7, iii = 7;
- // puts("opening binary state");
- // bstate * c = bopen("€");
- // pp(c);
- // ps(c->data);
- // ps(bread(c, 8, &iii));
- // ps(bread(c, 8, &iii));
- // ps(bread(c, 8, &iii));
- // bclose(c);
- // bstate * b = bopen("€");
- // ps(bread(b, 1, &ii));
- // ps(bread(b, 1, &ii));
- // ps(bread(b, 1, &ii));
- // ps(bread(b, 1, &ii));
- // ps(bread(b, 1, &ii));
- // ps(bread(b, 1, &ii));
- // ps(bread(b, 1, &ii));
- // ps(bread(b, 1, &ii));
- // bclose(b);
- // int i = 7;
- // bstate * d___ = bopen("€");
- // char * d__ = bread(d___, 8*3, &i);
- // ps(d__);
- // char * d = bwrite(d__);
- // bclose(d__);
- // ps(d);
- // i = 7;
- // pp(d);
- // bstate * d____ = bopen(d);
- // char * d_ = bread(d____, 8*3, &i);
- // pp(d);
- // ps(d_);
- // free(d);
- // free(d_);
- // bclose(d____);
- }
-