spacepaste

  1.  
  2. // #include "../builtins/libunicode.h"
  3. #include <limits.h>
  4. #include <stdio.h>
  5. #define ps(x) printf("%s = %s\n", #x, x);
  6. #define pi(x) printf("%s = %d\n", #x, x);
  7. #define pp(x) printf("%s = %p\n", #x, x);
  8. #include <assert.h>
  9. int getBit(unsigned char byte, int position) // position in range 0-7
  10. {
  11. return (byte >> position) & 0x1;
  12. }
  13. struct bstate {
  14. unsigned char * data;
  15. size_t index;
  16. size_t bitcount;
  17. };
  18. typedef struct bstate bstate;
  19. bstate * bopen(unsigned char * data) {
  20. bstate * n = malloc(sizeof(bstate));
  21. n->data = data;
  22. n->index = 0;
  23. n->bitcount = 0;
  24. return n;
  25. }
  26. void bclose(bstate * n) {
  27. n->data = NULL;
  28. n->index = 0;
  29. n->bitcount = 0;
  30. free(n);
  31. n = NULL;
  32. }
  33. unsigned char * bread(bstate * n, int len, int * ii) {
  34. unsigned char * bits = malloc(len);
  35. int was_set = 0;
  36. for (int i = 0; i < len; i++)
  37. {
  38. bits[i]=!getBit(n->data[n->index], *ii)?'0':'1';
  39. (*ii)--;
  40. n->bitcount++;
  41. if (n->bitcount>7) {
  42. n->index++;
  43. n->bitcount = 0;
  44. was_set = 1;
  45. *ii = 7;
  46. }
  47. }
  48. bits[len] = 0;
  49. was_set = 0;
  50. return bits;
  51. }
  52. int dts(int x) {
  53. int n = 0;
  54. while(x /= 2) n++;
  55. return n;
  56. }
  57. // should this use a bstate?
  58. unsigned char * bwrite(unsigned char * encoding) {
  59. // assume host byte size is CHAR_BIT
  60. if(strlen(encoding)%CHAR_BIT != 0) {
  61. printf("warning: bit length is not evenly divisible against a byte, the last byte may be corrupted\n");
  62. }
  63. size_t len = strlen(encoding)/CHAR_BIT;
  64. unsigned char * s = malloc(len);
  65. memset(s, 0, len);
  66. int i;
  67. for( i = 0; i < strlen(encoding); i++) {
  68. s[i>>dts(CHAR_BIT)] |= ( encoding[i] == '1' ? 1 : 0 ) << ((CHAR_BIT-1)-(i)&(CHAR_BIT-1));
  69. }
  70. s[len] = 0;
  71. return s;
  72. }
  73. struct unicode {
  74. int bytes;
  75. int codepoint_bits;
  76. } unicode;
  77. void detect(char * string) {
  78. ps(string);
  79. bstate * c = bopen(string);
  80. int i = 7;
  81. char * bits = NULL;
  82. bits = bread(c, 8, &i);
  83. if (bits[0] == '0') {
  84. unicode.bytes = 1;
  85. unicode.codepoint_bits = 7;
  86. }
  87. else
  88. if (bits[0] == '1') {
  89. if (bits[1] == '1') {
  90. if (bits[2] == '0') {
  91. unicode.bytes = 2;
  92. unicode.codepoint_bits = 11;
  93. }
  94. else
  95. if (bits[2] == '1') {
  96. if (bits[3] == '0') {
  97. unicode.bytes = 3;
  98. unicode.codepoint_bits = 16;
  99. }
  100. else
  101. if (bits[3] == '1') {
  102. if (bits[4] == '0') {
  103. unicode.bytes = 4;
  104. unicode.codepoint_bits = 21;
  105. }
  106. }
  107. }
  108. }
  109. }
  110. pi(unicode.bytes)
  111. pi(unicode.codepoint_bits)
  112. ps(bits); free(bits); bits = NULL;
  113. bits = bread(c, 8, &i);
  114. ps(bits); free(bits); bits = NULL;
  115. bits = bread(c, 8, &i);
  116. ps(bits); free(bits); bits = NULL;
  117. bclose(c);
  118. }
  119. void main(void) {
  120. detect("a");
  121. detect("あ");
  122. detect("€");
  123. // int ii = 7, iii = 7;
  124. // puts("opening binary state");
  125. // bstate * c = bopen("€");
  126. // pp(c);
  127. // ps(c->data);
  128. // ps(bread(c, 8, &iii));
  129. // ps(bread(c, 8, &iii));
  130. // ps(bread(c, 8, &iii));
  131. // bclose(c);
  132. // bstate * b = bopen("€");
  133. // ps(bread(b, 1, &ii));
  134. // ps(bread(b, 1, &ii));
  135. // ps(bread(b, 1, &ii));
  136. // ps(bread(b, 1, &ii));
  137. // ps(bread(b, 1, &ii));
  138. // ps(bread(b, 1, &ii));
  139. // ps(bread(b, 1, &ii));
  140. // ps(bread(b, 1, &ii));
  141. // bclose(b);
  142. // int i = 7;
  143. // bstate * d___ = bopen("€");
  144. // char * d__ = bread(d___, 8*3, &i);
  145. // ps(d__);
  146. // char * d = bwrite(d__);
  147. // bclose(d__);
  148. // ps(d);
  149. // i = 7;
  150. // pp(d);
  151. // bstate * d____ = bopen(d);
  152. // char * d_ = bread(d____, 8*3, &i);
  153. // pp(d);
  154. // ps(d_);
  155. // free(d);
  156. // free(d_);
  157. // bclose(d____);
  158. }
  159.