/* %% By 2011 Jimmy Ruska (JimmyR.com), % Licensed under the Apache License, Version 2.0 (the "License"); you may not % use this file except in compliance with the License. You may obtain a copy of % the License at % % http://www.apache.org/licenses/LICENSE-2.0 % % Unless required by applicable law or agreed to in writing, software % distributed under the License is distributed on an "AS IS" BASIS, WITHOUT % WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the % License for the specific language governing permissions and limitations under % the License. alias nif="gcc -O3 -Wall -fPIC -shared -o crawler_utils.erl.so crawler_utils.erl.c -I /usr/local/lib/erlang/erts-5.8.4/include" nif && cp crawler_utils.erl.so ~/e/crawler/c_src/ cd ~/c/; gcc -O3 -fno-optimize-sibling-calls -Wall -fPIC -shared -o crawler_utils.erl.so crawler_utils.erl.c parse_url.c -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include -lglib-2.0 -I /usr/local/lib/erlang/erts-5.8.4/include && cp ~/c/crawler_utils.erl.c ~/e/crawler/c_src/ && cp ~/c/parse_url.c ~/e/crawler/c_src/ && cp ~/c/parse_url.h ~/e/crawler/c_src/ maybe something to capitalize first char The functions htoi and url_decode are based on php source under this license. +----------------------------------------------------------------------+ | PHP Version 5 | +----------------------------------------------------------------------+ | Copyright (c) 1997-2011 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | | available through the world-wide-web at the following url: | | http://www.php.net/license/3_01.txt | | If you did not receive a copy of the PHP license and are unable to | | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ | Author: Jim Winstead | +----------------------------------------------------------------------+ */ #include "erl_nif.h" #include "stdio.h" #include #include "parse_url.h" //#include void reverse_binary_unsafe(unsigned char* bin, size_t len); void binary_to_hex1(unsigned char* bin,unsigned char* dest,const size_t len); int hex_to_binary1(unsigned char* from,unsigned char* to, size_t len); char hex_char_to_int(const char n); char int_to_hex_char(const char n); static unsigned int htoi(unsigned char *s); int url_decode_unsafe1(unsigned char *str, int len); static int load(ErlNifEnv* env, void** priv, ERL_NIF_TERM load_info); static int reload(ErlNifEnv* env, void** priv, ERL_NIF_TERM load_info); static int upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM load_info); static void unload(ErlNifEnv* env, void* priv); static int load(ErlNifEnv* env, void** priv, ERL_NIF_TERM load_info) { return 0; } static int reload(ErlNifEnv* env, void** priv, ERL_NIF_TERM load_info) { return 0; } static int upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM load_info) { return 0; } static void unload(ErlNifEnv* env, void* priv) { return; } static ERL_NIF_TERM reverse_binary_unsafe1(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ErlNifBinary block; enif_inspect_iolist_as_binary(env, argv[0], &block); reverse_binary_unsafe(block.data,block.size-1); return enif_make_binary(env, &block); } void reverse_binary_unsafe(unsigned char *bin, size_t len){ unsigned int i; char swap; for (i=0; i96 && n<103) return n-97+10; else if (n>64 && n<71) return n-65+10; else if (n>47 && n<58) return n-48; else return 48; } ///////////// static ERL_NIF_TERM binary_to_hex(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ErlNifBinary block; size_t ret_size; ErlNifBinary ret; enif_inspect_iolist_as_binary(env, argv[0], &block); if(block.size==0) return enif_make_binary(env, &block); ret_size = block.size * 2; if ( !enif_alloc_binary(ret_size, &ret) ) return enif_make_atom(env, "error"); binary_to_hex1(block.data,ret.data,block.size); return enif_make_binary(env, &ret); } void binary_to_hex1(unsigned char* bin,unsigned char* dest,const size_t len){ unsigned int i,n; unsigned char x; //fprintf(stderr,"Len: %i --",(int)len); for(i=n=0;i>4,x & 15); dest[n++]=int_to_hex_char(x >> 4); dest[n++]=int_to_hex_char(x & 15); } } char int_to_hex_char(const char n){ if (n<10) return n+'0'; else if (n>9) return n+'A'-10; else return '0'; } static ERL_NIF_TERM binary_to_lower_unsafe(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ErlNifBinary block; unsigned int i; unsigned char x; enif_inspect_iolist_as_binary(env, argv[0], &block); for (i=0;i'A'-1 && x<'Z'+1) block.data[i]=x+32; } return enif_make_binary(env, &block); } static ERL_NIF_TERM binary_to_upper_unsafe(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ErlNifBinary block; unsigned int i; unsigned char x; enif_inspect_iolist_as_binary(env, argv[0], &block); for (i=0;i'a'-1 && x<'z'+1) block.data[i]=x-32; } return enif_make_binary(env, &block); } /////// /* static ERL_NIF_TERM binary_string_to_integer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) */ /* { */ /* ErlNifBinary block; */ /* unsigned total; */ /* unsigned char *p, *stop; */ /* enif_inspect_binary(env, argv[0], &block); */ /* p=&block.data[0]; */ /* for (total=0,stop=p+block.size;p '0'-1 && *p < '9'+1) || (*p > 'a'-1 && *p < 'z'+1) || (*p > 'A'-1 && *p < 'Z'+1) || *p=='-' || *p=='_' || *p=='.' || *p=='~'){ ret.data[i++]=*p; } else{ ret.data[i++]='%'; ret.data[i++]=int_to_hex_char(*p >> 4); ret.data[i++]=int_to_hex_char(*p & 15); } } if (!enif_realloc_binary(&ret, i) ) return enif_make_atom(env, "error"); return enif_make_binary(env, &ret); } static ERL_NIF_TERM binary_trim(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ErlNifBinary block; unsigned int start=0, end; enif_inspect_iolist_as_binary(env, argv[0], &block); if (block.size==0) return enif_make_binary(env, &block); // int enif_is_list(ErlNifEnv* env, ERL_NIF_TERM term) // ERL_NIF_TERM enif_make_binary(ErlNifEnv* env, ErlNifBinary* bin) // int enif_alloc_binary(size_t size, ErlNifBinary* bin) // int enif_inspect_iolist_as_binary(ErlNifEnv* env, ERL_NIF_TERM term, ErlNifBinary* bin) // ERL_NIF_TERM enif_make_sub_binary(ErlNifEnv* env, ERL_NIF_TERM bin_term, size_t pos, size_t size) end=block.size-1; //fprintf(stderr,"start %i end %i,",start,end); while (isspace(block.data[start])){ start++; // scanned all the way to the end if (start>end){ if (!enif_realloc_binary(&block, 0)) return enif_make_atom(env, "error"); return enif_make_binary(env, &block); } } while (isspace(block.data[end])>0 && end>0) end--; end=block.size-1-end; if (start==0 && end==0) return enif_make_binary(env, &block); return enif_make_sub_binary(env, enif_make_binary(env,&block), start, block.size-start-end); } //url decode static ERL_NIF_TERM url_decode_unsafe(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ErlNifBinary block; unsigned int new_size; enif_inspect_iolist_as_binary(env, argv[0], &block); new_size=url_decode_unsafe1(block.data,block.size); if (!enif_realloc_binary(&block, new_size) ) return enif_make_atom(env, "error"); return enif_make_binary(env, &block); } int url_decode_unsafe1(unsigned char *str, int len) { unsigned char *dest = str; unsigned char *data = str; while (len--) { if (*data == '+') *dest = ' '; else if (*data == '%' && len >= 2 && isdigit((int) *(data + 1)) && isdigit((int) *(data + 2))) { *dest = (unsigned char) htoi(data + 1); data += 2; len -= 2; } else *dest = *data; data++; dest++; } return dest - str; } static unsigned int htoi(unsigned char *s) { //hex to integer unsigned int value; int c; c = ((unsigned char *)s)[0]; if (isupper(c)) c = tolower(c); value = (c > '0'-1 && c < '9'+1 ? c - '0' : c - 'a' + 10) * 16; c = ((unsigned char *)s)[1]; if (isupper(c)) c = tolower(c); value += c > '0'-1 && c < '9'+1 ? c - '0' : c - 'a' + 10; return (value); } // end url decode static ErlNifFunc nif_funcs[] = { {"h2b_unsafe", 1, hex_to_binary_unsafe} ,{"h2b", 1, hex_to_binary} ,{"b2h", 1, binary_to_hex} ,{"trim", 1, binary_trim} ,{"reverse_bin_unsafe", 1, reverse_binary_unsafe1} ,{"lower_unsafe", 1, binary_to_lower_unsafe} ,{"upper_unsafe", 1, binary_to_upper_unsafe} ,{"url_encode", 1, url_encode} ,{"url_decode_unsafe", 1, url_decode_unsafe} ,{"parse_url", 1, parse_url} //,{"bs2i", 1, binary_string_to_integer} }; ERL_NIF_INIT(crawler_utils, nif_funcs, load, reload, upgrade, unload)