在我制作工具的多个情景下都会产生某些名字无法使用的问题,例如虚幻的ABC插件不支持中文名,蓝图的变量经常存在空格与特殊字符无法绑定到其他语言等。
为了解决这个问题,我做了一个可以将名字字符限定在a-z A-z 0-9 _之内的编解码方案。
对中文、空格、特殊字符等非可见ASCII字符进行转义,编程语言不支持数字开头的标识符变量,所以也会对开头的数字进行转义。
一个非数字开头,含有大小写数字以及下划线的名字将不会被编码所改变。如 Name_A016。
该编码规范将使用_h作为转义符开头,_结尾,中间的内容是转义结果,虽然一个下划线正常情况下不会被编码,但这里用下划线举例,_编码结果为_h2D_,其中_h为转义符,2D为下划线的ASCII编码,最后的_代表转义结束。
如果原字符串中出现了用于转义的_h,那么这种情况会将下划线转义为_h2D_,而h转义为_h5F_,结果为_h2D__h5F_
如数字开头的字符串,则会在开头添加_h01_,如3DWidget将会被编码为_h01_3DWidget。
转义符用下划线来代表结束也是为了支持中文字符串,因为字符串的长度是不确定的。
如“技能3”,其中的“技能”将会被编码为_hu8062FD80_,在这个编码里_h和往常一样作为转义符出现,而接下来出现的u代表着unicode编码,8062 FD80则代表着技能二字的unicode编码,所以编码结果为_hu8062FD80_3
这里使用UE的代码来实现。
#include "NameEncode.h" #include <ctype.h> #define ESCAPE_SYMBOL_FIRST TEXT('_') #define ESCAPE_SYMBOL_SECOND TEXT('h') #define ESCAPE_SYMBOL TEXT("_h") #define ESCAPE_SYMBOL_LEN 2 #define ESCAPE_END_SYMBOL TEXT('_') #define ESCAPE_HEAD_NUMBER 0x01 #define ESCAPE_HEAD_NUMBER_SYMBOL TEXT("01") #define ESCAPE_UNICODE_IDENT_LE TEXT('u') #define ESCAPE_UNICODE_IDENT_BE TEXT('U') #define ESCAPE_UNICODE_IDENT (IsLittleEndian() ? ESCAPE_UNICODE_IDENT_LE : ESCAPE_UNICODE_IDENT_BE) static inline bool ShouldEscape(int C) { return C <= 0x7F && !(::isalnum(C) || C == '_'); } static inline bool IsLittleEndian() { static struct V { V() { unsigned short N = 1; Bool = static_cast<bool>(reinterpret_cast<unsigned char*>(&N)[0]); } bool Bool; } Local; return Local.Bool; } static const char* HexList = "0123456789ABCDEF"; static inline void ByteToHex(uint8_t Value, TCHAR* Out) { Out[0] = HexList[Value / 16]; Out[1] = HexList[Value % 16]; } static uint8 HexToByte(TCHAR Str) { return Str >= 'A' ? Str - 'A' + 10 : Str - '0'; } static uint8 HexToByte(const TCHAR* Str) { return HexToByte(Str[0]) * 16 + HexToByte(Str[1]); } FString FNameEncode::Encode(const FString& Name, bool bEncodeWideString) { constexpr int kNormal = 1; constexpr int kEscapeChar = 2; constexpr int kUnicodeString = 3; FString Ret; Ret.Reserve(Name.Len() + 4); const int LastIndex = Name.Len() - 1; int LastState = kNormal; for (int i = 0; i < Name.Len(); ++i) { const auto C = Name[i]; if(C == ESCAPE_HEAD_NUMBER) { UE_LOG(LogTemp, Error, TEXT("string contains illegal characters: %s"), *Name); return {}; } bool bEscapeSymbol = C == ESCAPE_SYMBOL_FIRST && i != LastIndex && Name[i + 1] == ESCAPE_SYMBOL_SECOND; bool bDigitHead = i == 0 && ::isdigit(C); if (bDigitHead) { Ret.AppendChars(ESCAPE_SYMBOL, ESCAPE_SYMBOL_LEN); Ret.AppendChars(ESCAPE_HEAD_NUMBER_SYMBOL, 2); Ret.AppendChar(ESCAPE_END_SYMBOL); Ret.AppendChar(C); LastState = kEscapeChar; } else if (bEscapeSymbol || ShouldEscape(C)) { Ret.AppendChars(ESCAPE_SYMBOL, ESCAPE_SYMBOL_LEN); TCHAR Hex[2]; ByteToHex(C, Hex); Ret.AppendChars(Hex, 2); if (bEscapeSymbol) //skip escape symbol { ++i; } Ret.AppendChar(ESCAPE_END_SYMBOL); LastState = kEscapeChar; } else if (bEncodeWideString && C > 0x7F) { if (LastState != kUnicodeString) { Ret.AppendChars(ESCAPE_SYMBOL, ESCAPE_SYMBOL_LEN); // add endian info Ret.AppendChar(ESCAPE_UNICODE_IDENT); } unsigned short _C = static_cast<unsigned short>(C); unsigned char* P = reinterpret_cast<unsigned char*>(&_C); TCHAR Hex[4]; ByteToHex(P[0], Hex); ByteToHex(P[1], Hex + 2); Ret.AppendChars(Hex, 4); const bool bNext = bEncodeWideString && i != LastIndex && Name[i + 1] > 0x7F; if (bNext) { // next } else { Ret.AppendChar(ESCAPE_END_SYMBOL); } LastState = kUnicodeString; } else { // a-z A-Z 0-9 _ Ret.AppendChar(C); LastState = kNormal; } } return Ret; } FString FNameEncode::Decode(const FString& Name) { constexpr int kNormal = 1; constexpr int kEscapeChar = 2; constexpr int kUnicodeString = 3; FString Ret; Ret.Reserve(Name.Len()); const int LastIndex = Name.Len() - 1; int State = kNormal; bool UnicodeStringCtx_LE = true; bool UnicodeStringCtx_Collected = false; unsigned char UnicodeStringCtx_CollectedByte{}; for (int i = 0; i < Name.Len(); ++i) { const auto C = Name[i]; switch (State) { case kNormal: { const bool bEscape = C == ESCAPE_SYMBOL_FIRST && i < LastIndex - 1 && Name[i + 1] == ESCAPE_SYMBOL_SECOND; if (bEscape) { State = kEscapeChar; i += 1; // skip escape symbol break; } Ret.AppendChar(C); } break; case kEscapeChar: { if (C == ESCAPE_UNICODE_IDENT_LE || C == ESCAPE_UNICODE_IDENT_BE) { UnicodeStringCtx_LE = C == ESCAPE_UNICODE_IDENT_LE; State = kUnicodeString; break; } auto Hex = HexToByte(GetData(Name) + i); if (i == 2 /* hex start offset */ && Hex == ESCAPE_HEAD_NUMBER) { // do nothing. } else if (Hex == '_') { Ret.AppendChars(ESCAPE_SYMBOL, ESCAPE_SYMBOL_LEN); } else { Ret.AppendChar(Hex); } i += 2; //skip next char and end symbol State = kNormal; } break; case kUnicodeString: { if (i + 1 >= Name.Len()) { UE_LOG(LogTemp, Error, TEXT("decode failed. %s"), *Name); return {}; } unsigned char Byte = HexToByte(GetData(Name) + i); if (!UnicodeStringCtx_Collected) { UnicodeStringCtx_CollectedByte = Byte; UnicodeStringCtx_Collected = true; ++i; break; } UnicodeStringCtx_Collected = false; short WideChar; unsigned char* P = reinterpret_cast<unsigned char*>(&WideChar); P[0] = UnicodeStringCtx_CollectedByte; P[1] = Byte; // endian mismatch if (IsLittleEndian() && !UnicodeStringCtx_LE) { const auto Temp = P[0]; P[0] = P[1]; P[1] = Temp; } ++i; // peek next if (i != LastIndex && Name[i + 1] != ESCAPE_END_SYMBOL) { // next Ret.AppendChar(WideChar); } else { ++i; //skip end symbol Ret.AppendChar(WideChar); State = kNormal; } } break; default: ; } } return Ret; }