// Copyright 2016 The CC Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package cc // import "modernc.org/cc" import ( "encoding/binary" "go/token" "sort" "strings" "time" "modernc.org/golex/lex" "modernc.org/mathutil" "modernc.org/xc" ) const ( intBits = mathutil.IntBits bitShift = intBits>>6 + 5 bitMask = intBits - 1 scINITIAL = 0 // Start condition (shared value). ) const ( // Character class is an 8 bit encoding of an Unicode rune for the // golex generated FSM. // // Every ASCII rune is its own class. DO NOT change any of the // existing values. Adding new classes is OK. ccEOF = iota + 0x80 _ // ccError ccOther // Any other rune. ccUCNDigit // [0], Annex D, Universal character names for identifiers - digits. ccUCNNonDigit // [0], Annex D, Universal character names for identifiers - non digits. ) const ( tsVoid = iota // 0: "void" tsChar // 1: "char" tsShort // 2: "short" tsInt // 3: "int" tsLong // 4: "long" tsFloat // 5: "float" tsDouble // 6: "double" tsSigned // 7: "signed" tsUnsigned // 8: "unsigned" tsBool // 9: "_Bool" tsComplex // 10: "_Complex" tsStructSpecifier // 11: StructOrUnionSpecifier: struct tsUnionSpecifier // 12: StructOrUnionSpecifier: union tsEnumSpecifier // 13: EnumSpecifier tsTypedefName // 14: TYPEDEFNAME tsTypeof // 15: "typeof" tsUintptr // 16: Pseudo type ) const ( tsBits = 5 // Values [0, 16] tsMask = 1<"), ASM: dict.SID("asm"), AUTO: dict.SID("auto"), BOOL: dict.SID("_Bool"), BREAK: dict.SID("break"), CASE: dict.SID("case"), CHAR: dict.SID("char"), COMPLEX: dict.SID("_Complex"), CONST: dict.SID("const"), CONTINUE: dict.SID("continue"), DDD: dict.SID("..."), DEC: dict.SID("--"), DEFAULT: dict.SID("default"), DIVASSIGN: dict.SID("/="), DO: dict.SID("do"), DOUBLE: dict.SID("double"), ELSE: dict.SID("else"), ENUM: dict.SID("enum"), EQ: dict.SID("=="), EXTERN: dict.SID("extern"), FLOAT: dict.SID("float"), FOR: dict.SID("for"), GEQ: dict.SID(">="), GOTO: dict.SID("goto"), IF: dict.SID("if"), INC: dict.SID("++"), INLINE: dict.SID("inline"), INT: dict.SID("int"), LEQ: dict.SID("<="), LONG: dict.SID("long"), LSH: dict.SID("<<"), LSHASSIGN: dict.SID("<<="), MODASSIGN: dict.SID("%="), MULASSIGN: dict.SID("*="), NEQ: dict.SID("!="), ORASSIGN: dict.SID("|="), OROR: dict.SID("||"), PPPASTE: dict.SID("##"), REGISTER: dict.SID("register"), RESTRICT: dict.SID("restrict"), RETURN: dict.SID("return"), RSH: dict.SID(">>"), RSHASSIGN: dict.SID(">>="), SHORT: dict.SID("short"), SIGNED: dict.SID("signed"), SIZEOF: dict.SID("sizeof"), STATIC: dict.SID("static"), STATIC_ASSERT: dict.SID("_Static_assert"), STRUCT: dict.SID("struct"), SUBASSIGN: dict.SID("-="), SWITCH: dict.SID("switch"), TYPEDEF: dict.SID("typedef"), TYPEOF: dict.SID("typeof"), UNION: dict.SID("union"), UNSIGNED: dict.SID("unsigned"), VOID: dict.SID("void"), VOLATILE: dict.SID("volatile"), WHILE: dict.SID("while"), XORASSIGN: dict.SID("^="), } id0 = dict.SID("0") id1 = dict.SID("1") idAlignof = dict.SID("_Alignof") idAlignofAlt = dict.SID("__alignof__") idAsm = dict.SID("asm") idAsmAlt = dict.SID("__asm__") idBuiltinClasifyType = dict.SID("__builtin_classify_type") idBuiltinConstantP = dict.SID("__builtin_constant_p") idBuiltinTypesCompatible = dict.SID("__builtin_types_compatible__") // Implements __builtin_types_compatible_p idChar = dict.SID("char") idConst = dict.SID("const") idDate = dict.SID("__DATE__") idDefined = dict.SID("defined") idEmptyString = dict.SID(`""`) idFile = dict.SID("__FILE__") idID = dict.SID("ID") idInlineAlt = dict.SID("__inline__") idL = dict.SID("L") idLine = dict.SID("__LINE__") idMagicFunc = dict.SID("__func__") idNoreturn = dict.SID("_Noreturn") idPopMacro = dict.SID("pop_macro") idPragma = dict.SID("_Pragma") idPushMacro = dict.SID("push_macro") idRestrictAlt = dict.SID("__restrict__") idSTDC = dict.SID("__STDC__") idSTDCHosted = dict.SID("__STDC_HOSTED__") idSTDCMBMightNeqWc = dict.SID("__STDC_MB_MIGHT_NEQ_WC__") idSTDCVersion = dict.SID("__STDC_VERSION__") idSignedAlt = dict.SID("__signed__") idSpace = dict.SID(" ") idStatic = dict.SID("static") idStaticAssert = dict.SID("_Static_assert") idTDate = dict.SID(tuTime.Format("Jan _2 2006")) // The date of translation of the preprocessing translation unit. idTTime = dict.SID(tuTime.Format("15:04:05")) // The time of translation of the preprocessing translation unit. idTime = dict.SID("__TIME__") idTypeof = dict.SID("typeof") idTypeofAlt = dict.SID("__typeof__") idVAARGS = dict.SID("__VA_ARGS__") idVolatileAlt = dict.SID("__volatile__") tuTime = time.Now() tokHasVal = map[rune]bool{ CHARCONST: true, FLOATCONST: true, IDENTIFIER: true, IDENTIFIER_LPAREN: true, INTCONST: true, LONGCHARCONST: true, LONGSTRINGLITERAL: true, PPHEADER_NAME: true, PPNUMBER: true, PPOTHER: true, STRINGLITERAL: true, TYPEDEFNAME: true, } // Valid combinations of TypeSpecifier.Case ([0], 6.7.2, 2) tsValid = map[int]Kind{ tsEncode(tsBool): Bool, // _Bool tsEncode(tsChar): Char, // char tsEncode(tsComplex): DoubleComplex, // _Complex tsEncode(tsDouble): Double, // double tsEncode(tsDouble, tsComplex): DoubleComplex, // double _Complex tsEncode(tsEnumSpecifier): Enum, // enum specifier tsEncode(tsFloat): Float, // float tsEncode(tsFloat, tsComplex): FloatComplex, // float _Complex tsEncode(tsInt): Int, // int tsEncode(tsLong): Long, // long tsEncode(tsLong, tsDouble): LongDouble, // long double tsEncode(tsLong, tsDouble, tsComplex): LongDoubleComplex, // long double _Complex tsEncode(tsLong, tsInt): Long, // long int tsEncode(tsLong, tsLong): LongLong, // long long tsEncode(tsLong, tsLong, tsInt): LongLong, // long long int tsEncode(tsShort): Short, // short tsEncode(tsShort, tsInt): Short, // short int tsEncode(tsSigned): Int, // signed tsEncode(tsSigned, tsChar): SChar, // signed char tsEncode(tsSigned, tsInt): Int, // signed int tsEncode(tsSigned, tsLong): Long, // signed long tsEncode(tsSigned, tsLong, tsInt): Long, // signed long int tsEncode(tsSigned, tsLong, tsLong): LongLong, // signed long long tsEncode(tsSigned, tsLong, tsLong, tsInt): LongLong, // signed long long int tsEncode(tsSigned, tsShort): Short, // signed short tsEncode(tsSigned, tsShort, tsInt): Short, // signed short int tsEncode(tsStructSpecifier): Struct, // struct tsEncode(tsTypedefName): TypedefName, // typedef name tsEncode(tsTypeof): typeof, // typeof name tsEncode(tsUintptr): UintPtr, // Pseudo type. tsEncode(tsUnionSpecifier): Union, // union tsEncode(tsUnsigned): UInt, // unsigned tsEncode(tsUnsigned, tsChar): UChar, // unsigned char tsEncode(tsUnsigned, tsInt): UInt, // unsigned int tsEncode(tsUnsigned, tsLong): ULong, // unsigned long tsEncode(tsUnsigned, tsLong, tsInt): ULong, // unsigned long int tsEncode(tsUnsigned, tsLong, tsLong): ULongLong, // unsigned long long tsEncode(tsUnsigned, tsLong, tsLong, tsInt): ULongLong, // unsigned long long int tsEncode(tsUnsigned, tsShort): UShort, // unsigned short tsEncode(tsUnsigned, tsShort, tsInt): UShort, // unsigned short int tsEncode(tsVoid): Void, // void } ) func isUCNDigit(r rune) bool { return int(r) < len(ucnDigits)<>bitShift]&(1<>bitShift]&(1<>= 1 // Remove value is valid bit. for n != 0 { r = append(r, n&tsMask) n >>= tsBits } return r } func (l *lexer) encodeToken(tok xc.Token) { n := binary.PutUvarint(l.encBuf1[:], uint64(tok.Rune)) pos := tok.Pos() n += binary.PutUvarint(l.encBuf1[n:], uint64(pos-l.encPos)) l.encPos = pos if tokHasVal[tok.Rune] { n += binary.PutUvarint(l.encBuf1[n:], uint64(tok.Val)) } l.encBuf = append(l.encBuf, l.encBuf1[:n]...) } func decodeToken(p *[]byte, pos *token.Pos) xc.Token { b := *p r, n := binary.Uvarint(b) b = b[n:] d, n := binary.Uvarint(b) b = b[n:] np := *pos + token.Pos(d) *pos = np c := lex.NewChar(np, rune(r)) var v uint64 if tokHasVal[c.Rune] { v, n = binary.Uvarint(b) b = b[n:] } *p = b return xc.Token{Char: c, Val: int(v)} } func decodeTokens(id PPTokenList, r []xc.Token, withSpaces bool) []xc.Token { b := dict.S(int(id)) var pos token.Pos r = r[:0] for len(b) != 0 { tok := decodeToken(&b, &pos) if tok.Rune == ' ' && !withSpaces { continue } r = append(r, tok) } return r } func tokVal(t xc.Token) int { r := t.Rune if r == 0 { return 0 } if v := t.Val; v != 0 { return v } if r != 0 && r < 0x80 { return int(r) + 1 } if i, ok := tokConstVals[r]; ok { return i } panic("internal error") } // TokSrc returns t in its source form. func TokSrc(t xc.Token) string { if x, ok := tokConstVals[t.Rune]; ok { return string(dict.S(x)) } if tokHasVal[t.Rune] { return string(t.S()) } return string(t.Rune) } // universal-character-name \\u{hex-quad}|\\U{hex-quad}{hex-quad} func decodeUCN(runes []rune) (rune, int) { if runes[0] != '\\' { panic("internal error") } runes = runes[1:] switch runes[0] { case 'u': return rune(decodeHexQuad(runes[1:])), 6 case 'U': return rune(decodeHexQuad(runes[1:])<<16 | decodeHexQuad(runes[5:])), 10 default: panic("internal error") } } // hex-quad {hexadecimal-digit}{hexadecimal-digit}{hexadecimal-digit}{hexadecimal-digit} func decodeHexQuad(runes []rune) int { n := 0 for _, r := range runes[:4] { n = n<<4 | decodeHex(r) } return n } func decodeHex(r rune) int { switch { case r >= '0' && r <= '9': return int(r) - '0' default: x := int(r) &^ 0x20 return x - 'A' + 10 } } // escape-sequence {simple-sequence}|{octal-escape-sequence}|{hexadecimal-escape-sequence}|{universal-character-name} // simple-sequence \\['\x22?\\abfnrtv] // octal-escape-sequence \\{octal-digit}{octal-digit}?{octal-digit}? // hexadecimal-escape-sequence \\x{hexadecimal-digit}+ func decodeEscapeSequence(runes []rune) (rune, int) { if runes[0] != '\\' { panic("internal error") } r := runes[1] switch r { case '\'', '"', '?', '\\': return r, 2 case 'a': return 7, 2 case 'b': return 8, 2 case 'f': return 12, 2 case 'n': return 10, 2 case 'r': return 13, 2 case 't': return 9, 2 case 'v': return 11, 2 case 'x': v, n := 0, 2 loop2: for _, r := range runes[2:] { switch { case r >= '0' && r <= '9', r >= 'a' && r <= 'f', r >= 'A' && r <= 'F': v = v<<4 | decodeHex(r) n++ default: break loop2 } } return -rune(v & 0xff), n case 'u', 'U': return decodeUCN(runes) } if r < '0' || r > '7' { panic("internal error") } v, n := 0, 1 loop: for _, r := range runes[1:] { switch { case r >= '0' && r <= '7': v = v<<3 | (int(r) - '0') n++ default: break loop } } return -rune(v), n }