Skip to content

Instantly share code, notes, and snippets.

@lileiigithub
Forked from milhidaka/convert_float32.c
Created September 6, 2021 10:30
Show Gist options
  • Select an option

  • Save lileiigithub/32b96fae908d8367e4235df2050e8553 to your computer and use it in GitHub Desktop.

Select an option

Save lileiigithub/32b96fae908d8367e4235df2050e8553 to your computer and use it in GitHub Desktop.

Revisions

  1. @milhidaka milhidaka created this gist Mar 13, 2019.
    81 changes: 81 additions & 0 deletions convert_float32.c
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,81 @@
    #include <stdio.h>
    #include <stdint.h>
    #include <assert.h>

    #define DATA_SIZE 2052

    float decode(uint16_t float16_value)
    {
    // MSB -> LSB
    // float16=1bit: sign, 5bit: exponent, 10bit: fraction
    // float32=1bit: sign, 8bit: exponent, 23bit: fraction
    // for normal exponent(1 to 0x1e): value=2**(exponent-15)*(1.fraction)
    // for denormalized exponent(0): value=2**-14*(0.fraction)
    uint32_t sign = float16_value >> 15;
    uint32_t exponent = (float16_value >> 10) & 0x1F;
    uint32_t fraction = (float16_value & 0x3FF);
    uint32_t float32_value;
    if (exponent == 0)
    {
    if (fraction == 0)
    {
    // zero
    float32_value = (sign << 31);
    }
    else
    {
    // can be represented as ordinary value in float32
    // 2 ** -14 * 0.0101
    // => 2 ** -16 * 1.0100
    // int int_exponent = -14;
    exponent = 127 - 14;
    while ((fraction & (1 << 10)) == 0)
    {
    //int_exponent--;
    exponent--;
    fraction <<= 1;
    }
    fraction &= 0x3FF;
    // int_exponent += 127;
    float32_value = (sign << 31) | (exponent << 23) | (fraction << 13);
    }
    }
    else if (exponent == 0x1F)
    {
    /* Inf or NaN */
    float32_value = (sign << 31) | (0xFF << 23) | (fraction << 13);
    }
    else
    {
    /* ordinary number */
    float32_value = (sign << 31) | ((exponent + (127-15)) << 23) | (fraction << 13);
    }

    return *((float*)&float32_value);
    }

    int main(void)
    {
    uint16_t float16_data[DATA_SIZE];
    float float32_data[DATA_SIZE];
    FILE* fr = fopen("float16.bin", "rb");
    assert(fr != NULL);

    size_t loaded = fread(float16_data, sizeof(uint16_t), DATA_SIZE, fr);
    assert(loaded == DATA_SIZE);

    fclose(fr);

    for (int i = 0; i < DATA_SIZE; i++)
    {
    float32_data[i] = decode(float16_data[i]);
    }

    FILE* fw = fopen("float32_decoded.bin", "wb");
    assert(fw != NULL);

    size_t saved = fwrite(float32_data, sizeof(float), DATA_SIZE, fw);
    assert(saved == DATA_SIZE);

    fclose(fw);
    }
    12 changes: 12 additions & 0 deletions numpy_make_data.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,12 @@
    import numpy as np

    np.random.seed(1)
    data_small = np.random.normal(scale=1e-2, size=(1024,)).astype(np.float16)
    data_large = np.random.normal(scale=1e2, size=(1024,)).astype(np.float16)
    data_special = np.array([0 / 1, 0 / -1, np.inf, -np.inf], dtype=np.float16)

    data = np.concatenate((data_small, data_large, data_special))

    data.tofile("float16.bin")

    data.astype(np.float32).tofile("float32.bin")