ofxMsdfgen/libs/msdf-atlas-gen/include/msdf-atlas-gen/charset-parser.cpp

250 lines
8.4 KiB
C++

#include "Charset.h"
#include <cstdio>
#include <string>
#include "utf8.h"
namespace msdf_atlas {
static char escapedChar(char c) {
switch (c) {
case '0':
return '\0';
case 'n': case 'N':
return '\n';
case 'r': case 'R':
return '\r';
case 's': case 'S':
return ' ';
case 't': case 'T':
return '\t';
case '\\': case '"': case '\'':
default:
return c;
}
}
static int readWord(std::string &str, FILE *f) {
while (true) {
int c = fgetc(f);
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')
str.push_back((char) c);
else
return c;
}
}
static bool readString(std::string &str, FILE *f, char terminator) {
bool escape = false;
while (true) {
int c = fgetc(f);
if (c < 0)
return false;
if (escape) {
str.push_back(escapedChar((char) c));
escape = false;
} else {
if (c == terminator)
return true;
else if (c == '\\')
escape = true;
else
str.push_back((char) c);
}
}
}
static bool parseInt(int &i, const char *str) {
i = 0;
if (str[0] == '0' && (str[1] == 'x' || str[1] == 'X')) { // hex
str += 2;
for (; *str; ++str) {
if (*str >= '0' && *str <= '9') {
i <<= 4;
i += *str-'0';
} else if (*str >= 'A' && *str <= 'F') {
i <<= 4;
i += *str-'A'+10;
} else if (*str >= 'a' && *str <= 'f') {
i <<= 4;
i += *str-'a'+10;
} else
return false;
}
} else { // dec
for (; *str; ++str) {
if (*str >= '0' && *str <= '9') {
i *= 10;
i += *str-'0';
} else
return false;
}
}
return true;
}
static std::string combinePath(const char *basePath, const char *relPath) {
if (relPath[0] == '/' || (relPath[0] && relPath[1] == ':')) // absolute path?
return relPath;
int lastSlash = -1;
for (int i = 0; basePath[i]; ++i)
if (basePath[i] == '/' || basePath[i] == '\\')
lastSlash = i;
if (lastSlash < 0)
return relPath;
return std::string(basePath, lastSlash+1)+relPath;
}
bool Charset::load(const char *filename, bool disableCharLiterals) {
if (FILE *f = fopen(filename, "rb")) {
enum {
CLEAR,
TIGHT,
RANGE_BRACKET,
RANGE_START,
RANGE_SEPARATOR,
RANGE_END
} state = CLEAR;
std::string buffer;
std::vector<unicode_t> unicodeBuffer;
unicode_t rangeStart = 0;
for (int c = fgetc(f), start = true; c >= 0; start = false) {
switch (c) {
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': // number
if (!(state == CLEAR || state == RANGE_BRACKET || state == RANGE_SEPARATOR))
goto FAIL;
buffer.push_back((char) c);
c = readWord(buffer, f);
{
int cp;
if (!parseInt(cp, buffer.c_str()))
goto FAIL;
switch (state) {
case CLEAR:
if (cp >= 0)
add((unicode_t) cp);
state = TIGHT;
break;
case RANGE_BRACKET:
rangeStart = (unicode_t) cp;
state = RANGE_START;
break;
case RANGE_SEPARATOR:
for (unicode_t u = rangeStart; (int) u <= cp; ++u)
add(u);
state = RANGE_END;
break;
default:;
}
}
buffer.clear();
continue; // next character already read
case '\'': // single UTF-8 character
if (!(state == CLEAR || state == RANGE_BRACKET || state == RANGE_SEPARATOR) || disableCharLiterals)
goto FAIL;
if (!readString(buffer, f, '\''))
goto FAIL;
utf8Decode(unicodeBuffer, buffer.c_str());
if (unicodeBuffer.size() == 1) {
switch (state) {
case CLEAR:
if (unicodeBuffer[0] > 0)
add(unicodeBuffer[0]);
state = TIGHT;
break;
case RANGE_BRACKET:
rangeStart = unicodeBuffer[0];
state = RANGE_START;
break;
case RANGE_SEPARATOR:
for (unicode_t u = rangeStart; u <= unicodeBuffer[0]; ++u)
add(u);
state = RANGE_END;
break;
default:;
}
} else
goto FAIL;
unicodeBuffer.clear();
buffer.clear();
break;
case '"': // string of UTF-8 characters
if (state != CLEAR || disableCharLiterals)
goto FAIL;
if (!readString(buffer, f, '"'))
goto FAIL;
utf8Decode(unicodeBuffer, buffer.c_str());
for (unicode_t cp : unicodeBuffer)
add(cp);
unicodeBuffer.clear();
buffer.clear();
state = TIGHT;
break;
case '[': // character range start
if (state != CLEAR)
goto FAIL;
state = RANGE_BRACKET;
break;
case ']': // character range end
if (state == RANGE_END)
state = TIGHT;
else
goto FAIL;
break;
case '@': // annotation
if (state != CLEAR)
goto FAIL;
c = readWord(buffer, f);
if (buffer == "include") {
while (c == ' ' || c == '\t' || c == '\n' || c == '\r')
c = fgetc(f);
if (c != '"')
goto FAIL;
buffer.clear();
if (!readString(buffer, f, '"'))
goto FAIL;
load(combinePath(filename, buffer.c_str()).c_str());
state = TIGHT;
} else
goto FAIL;
buffer.clear();
break;
case ',': case ';': // separator
if (!(state == CLEAR || state == TIGHT)) {
if (state == RANGE_START)
state = RANGE_SEPARATOR;
else
goto FAIL;
} // else treat as whitespace
case ' ': case '\n': case '\r': case '\t': // whitespace
if (state == TIGHT)
state = CLEAR;
break;
case 0xef: // UTF-8 byte order mark
if (start) {
if (!(fgetc(f) == 0xbb && fgetc(f) == 0xbf))
goto FAIL;
break;
}
default: // unexpected character
goto FAIL;
}
c = fgetc(f);
}
fclose(f);
return state == CLEAR || state == TIGHT;
FAIL:
fclose(f);
return false;
}
return false;
}
}