taken from libzip's _zip_guess_encoding.
zip_encoding_type_t
  117 _zip_guess_encoding(zip_string_t *str, zip_encoding_type_t expected_encoding)
  118 {
  119     zip_encoding_type_t enc;
  120     const zip_uint8_t *name;
  121     zip_uint32_t i, j, ulen;
  122 
  123     if (str == NULL)
  124     return ZIP_ENCODING_ASCII;
  125 
  126     name = str->raw;
  127 
  128     if (str->encoding != ZIP_ENCODING_UNKNOWN)
  129     enc = str->encoding;
  130     else {
  131     enc = ZIP_ENCODING_ASCII;
  132     for (i=0; i<str->length; i++) {
  133         if ((name[i] > 31 && name[i] < 128) || name[i] == '\r' || name[i] == '\n' || name[i] == '\t')
  134         continue;
  135 
  136         enc = ZIP_ENCODING_UTF8_GUESSED;
  137         if ((name[i] & UTF_8_LEN_2_MASK) == UTF_8_LEN_2_MATCH)
  138         ulen = 1;
  139         else if ((name[i] & UTF_8_LEN_3_MASK) == UTF_8_LEN_3_MATCH)
  140         ulen = 2;
  141         else if ((name[i] & UTF_8_LEN_4_MASK) == UTF_8_LEN_4_MATCH)
  142         ulen = 3;
  143         else {
  144         enc = ZIP_ENCODING_CP437;
  145         break;
  146         }
  147 
  148         if (i + ulen >= str->length) {
  149         enc = ZIP_ENCODING_CP437;
  150         break;
  151         }
  152 
  153         for (j=1; j<=ulen; j++) {
  154         if ((name[i+j] & UTF_8_CONTINUE_MASK) != UTF_8_CONTINUE_MATCH) {
  155             enc = ZIP_ENCODING_CP437;
  156             goto done;
  157         }
  158         }
  159         i += ulen;
  160     }
  161     }
  162 
  163 done:
  164     str->encoding = enc;
  165 
  166     if (expected_encoding != ZIP_ENCODING_UNKNOWN) {
  167     if (expected_encoding == ZIP_ENCODING_UTF8_KNOWN && enc == ZIP_ENCODING_UTF8_GUESSED)
  168         str->encoding = enc = ZIP_ENCODING_UTF8_KNOWN;
  169 
  170     if (expected_encoding != enc && enc != ZIP_ENCODING_ASCII)
  171         return ZIP_ENCODING_ERROR;
  172     }
  173     
  174     return enc;
  175 }
  176