Here's a list of mildly interesting things about the C language that I learned mostly by consuming Clang's ASTs. Although surprises are getting sparser, I might continue to update this document over time.
There are many more mildly interesting features of C++, but the language is literally known for being weird, whereas C is usually considered smaller and simpler, so this is (almost) only about C.
1. Combined type and variable/field declaration, inside a struct scope [https://godbolt.org/g/Rh94Go]
struct foo {
   struct bar {
       int x;
   } baz;
};
void frob() {
   struct bar b; // <-- defined in body of `struct foo`
}2. Compound literals are lvalues [https://godbolt.org/g/Zup5ZB]
struct foo {
    int bar;
};
void baz() {
    // compound literal:
    // https://en.cppreference.com/w/c/language/compound_literal
    (struct foo){};
    // these are actually lvalues
    ((struct foo){}).bar = 4;
    &(struct foo){};
}3. Switch cases anywhere [https://godbolt.org/g/fSeL18]
void foo(int p, char* complicated) {
    switch (p) {
    case 0:
        if (complicated[0] == 'a') {
            if (complicated[1] == 'b') {
    case 1:
                complicated[2] = 'c';
            }
        }
        break;
    }
}(also see: Duff's Device)
4. Flexible array members [https://godbolt.org/g/HCjfzX]
struct flex {
    int count;
    int elems[]; // <-- flexible array member
};
// this lays out the object exactly as expected
struct flex f = {
    .count = 3,
    .elems = {32, 31, 30}
};
_Static_assert(sizeof(struct flex) == sizeof(int), "");
// sizeof(f) does not include the size of statically-declared elements
_Static_assert(sizeof(f) == sizeof(struct flex), "");
// this only builds because .elems is not initialized:
struct flex g[2];5. {0} as a universal initializer [https://godbolt.org/g/MPKkXv]
typedef int empty_array_t[0];
typedef struct {} empty_struct_t;
typedef int array_t[10];
typedef struct { int f; } struct_t;
typedef float vector_t __attribute__((ext_vector_type(4)));
// {} can initialize structs and arrays and vectors, but not scalars:
empty_array_t ea = {};
empty_struct_t es = {};
array_t a = {};
struct_t s = {};
vector_t v = {};
void* p = {}; // <-- error
int i = {}; // <-- error
// {0} can initialize any data type, including empty arrays/structs.
empty_array_t eaa = {0};
empty_struct_t ess = {0};
array_t aa = {0};
struct_t bb = {0};
vector_t cc = {0};
void* dd = {0}; // <-- happy!
int ee = {0}; // <-- happy!6. Function typedefs [https://godbolt.org/g/5ctrLv]
typedef void (*function_pointer_t)(int); // <-- this creates a function pointer type
typedef void function_t(int); // <-- this creates a function type
// function_pointer_t == function_t*
function_t my_func; // <-- this declares "void my_func(int)"
void bar() {
    my_func(42);
}7. Array pointers [https://godbolt.org/g/N85dvv]
typedef int array_t[10]; // array typedef
typedef array_t* array_ptr_t; // array pointer typedef
// same as:
// typedef int (*array_ptr_t)[10];
void foo(array_ptr_t array_ptr) {
    int x = (*array_ptr)[1];
}
void bar() {
    int arr_10[10];
    foo(&arr_10); // <-- yep
    
    int arr_11[11];
    foo(&arr_11); // <-- nope
}8. Modifiers to array sizes in parameter definitions [https://godbolt.org/z/FnwYUs]
void foo(int arr[static const restrict volatile 10]) {
    // static: the array contains at least 10 elements
    // const, volatile and restrict all apply to the array type.
}(corrected by Reddit user /u/romv1)
9. Flat initializer lists [https://godbolt.org/g/RmwnoG]
struct foo {
    int x, y;
};
struct lots_of_inits {
    struct foo z[2];
    int w[3];
};
// this is probably more typical
struct lots_of_inits init = {
    {{1, 2}, {3, 4}}, {5, 6, 7}
};
// but braces for inner elements are optional
struct lots_of_inits flat_init = {
    1, 2, 3, 4, 5, 6, 7
};10. What’s an lvalue, anyway [https://godbolt.org/g/5echfM]
struct bitfield {
    unsigned x: 3;
};
void foo() {
    int a[2];
    int i;
    const int j;
    struct bitfield bf;
    // these are all lvalues
    a; // DeclRefExpr <col:5> 'int [2]' lvalue Var 0x556800650150 'a' 'int [2]'
    i; // DeclRefExpr <col:5> 'int' lvalue Var 0x56289851bf20 'i' 'int'
    j; // DeclRefExpr <col:5> 'const int' lvalue Var 0x555fc6694ff0 'j' 'const int'
    bf.x; // MemberExpr <col:5, col:8> 'unsigned int' lvalue bitfield .x 0x55dab002de28
    // this is not an lvalue
    foo; // DeclRefExpr <col:6> 'void ()' Function 0x563cb79da098 'foo' 'void ()'
    // ... but you can't assign to all of them
    // a = (int [2]){1, 2};
    i = 4;
    // j = 4;
    bf.x = 4;
    // ... and you can't take all of their addresses
    &a;
    &i;
    &j;
    // &bf.x;
    &foo; // but you can take the address of a function, which is not an lvalue
    // so, an lvalue is a value that:
    // - can have its address taken...
    //  - unless it is a bitfield (still an lvalue)
    //  - unless it is a function (not an lvalue)
    // - can be assigned to...
    //  - unless it is an array (still an lvalue)
    //  - unless it is a constant (still an lvalue)
}11. Void globals [https://godbolt.org/z/C52Wn2]
// You can declare extern globals to incomplete types,
// including `void`.
extern void foo;12. Alignment implications of bitfields [https://godbolt.org/z/KmB4CB]
struct foo {
    char a;
    long b: 16;
    char c;
};
// `struct foo` has the alignment of its most-aligned member:
// `long b` has an alignment of 8...
int alignof_foo = _Alignof(struct foo);
// ...but `long b: 16` is a bitfield, and is aligned on a char
// boundary.
int offsetof_c = __builtin_offsetof(struct foo, c);13. static variables are scope-local [https://godbolt.org/z/hdcLYW]
int foo() {
    int* a;
    int* b;
    {
        static int foo;
        a = &foo;
    }
    {
        static int foo;
        b = &foo;
    }
    // this always returns false: two static variables with the same name
    // but declared in different scope refer to different storage.
    return a == b;
}14. Typedef goes anywhere [https://godbolt.org/z/vZmgha]
short typedef signed s16;
unsigned int typedef u32;
struct foo { int bar } const typedef baz;
s16 a;
u32 b;
baz c;15. Indexing into an integer [https://godbolt.org/z/IBA5Gr]
int foo(int* ptr, int index) {
    // When indexing, the pointer and integer parts
    // of the subscript expression are interchangeable.
    return ptr[index] + index[ptr];
    // It works this way, according to the standard (§6.5.2.1:2),
    // because A[B] is the same as *(A + B), and addition
    // is commutative.
}16. The type of enums vs. the type of enumerators [https://godbolt.org/z/Mhsn1n7nd]
In C, enumerators (values declared in enums) have integer type rather than the type of their enclosing enum. For instance:
enum foo { bar, baz, frob };enum foo is a valid type to use that can store the value of bar, baz and frob.
However, the type of bar, baz and frob is an implementation-defined integer type! On
many implementations, bar has type int and enum foo has the underlying type
unsigned. This means that a check as simple as this one:
enum foo f = bar;
f < baz;involves a comparison of integers with different signedness.
Further, the type of each enumerator is not guaranteed to be the same. In this example:
enum foo { bar, baz = 0x80000000 };The type of bar can be int and the type of baz can be unsigned.
1. The power of UB [https://godbolt.org/g/H6mBFT]
extern void this_is_not_directly_called_by_main();
static void (*side_effects)() = 0;
void bar() {
    side_effects = this_is_not_directly_called_by_main;
}
int main() {
    side_effects();
}compiles to:
bar:                                    # @bar
        ret
main:                                   # @main
        push    rax
        xor     eax, eax
        call    this_is_not_directly_called_by_main
        xor     eax, eax
        pop     rcx
        ret
Main directly calls this_is_not_directly_called_by_main in this implementation. This happens because:
- LLVM sees that side_effectshas only two possible values: NULL (the initial value) orthis_is_not_directly_called_by_main(ifbaris called)
- LLVM sees that side_effectsis called, and it is UB to call a null pointer
- UB is impossible, so LLVM assumes that barwill have executed by the timemainruns rather than face the consequences
- Under this assumption, side_effectsis alwaysthis_is_not_directly_called_by_main.
2. A constant-expression macro that tells you if an expression is an integer constant [https://godbolt.org/g/a41gmx]
#define ICE_P(x) (sizeof(int) == sizeof(*(1 ? ((void*)((x) * 0l)) : (int*)1)))
int is_a_constant = ICE_P(4);
int is_not_a_constant = ICE_P(is_a_constant);From Martin Uecker, on the Linux kernel ML. __builtin_constant_p does the same thing on Clang and GCC.
3. Labels inside expression statements in really weird places [https://godbolt.org/g/k9wDRf]
You can make some pretty weird stuff in C, but for a real disaster, you need C++.
class foo {
    int x;
public:
    foo();
};
foo::foo() : x(({ a: 4; })) {
    goto a;
}Needless to say, statement expressions are not standard C++ (or standard C), but if your compiler has them, chances are that you can use them in really interesting ways.
We can also create a main function of type void and forsake the ugly looking return 0 at the end of the code :)