Last active
July 6, 2016 11:05
-
-
Save ashim888/797cae73c921e282a56db0dc477455e1 to your computer and use it in GitHub Desktop.
Revisions
-
ashim888 revised this gist
Jul 6, 2016 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -23,7 +23,7 @@ def safety_check(domain): language = language_prediction[0] # TRANSLATE LANGUAGE translator = Translator('<Your Client ID>', '<Your Client Secret>') if language!='en': print 'Another language Found: '+ language overall_text= translator.translate(overall_text, "en") -
ashim888 revised this gist
Jul 6, 2016 . 1 changed file with 0 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -9,7 +9,6 @@ porn_list=["anal","hentai","anus","arse","butt","arsehole","ass","fcuk","fuck","naked","xvideos","porn", "sex", "porno", "free porn", "porn tube", "porn videos", "streaming porn","Free porn", "sex videos","pussy","Porn hub", "xxx" "porn", "sex" ] def safety_check(domain): tokenizer = RegexpTokenizer(r'\w+') client = MongoClient('mongodb://192.168.1.10:27017/',27017) db = client.cutestat_v3 -
ashim888 created this gist
Jul 6, 2016 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,61 @@ #!/usr/bin/python # -*- coding: utf-8 -*- from pymongo import MongoClient import pprint from nltk.corpus import stopwords from nltk.tokenize import RegexpTokenizer from microsofttranslator import Translator import langid porn_list=["anal","hentai","anus","arse","butt","arsehole","ass","fcuk","fuck","naked","xvideos","porn", "sex", "porno", "free porn", "porn tube", "porn videos", "streaming porn","Free porn", "sex videos","pussy","Porn hub", "xxx" "porn", "sex" ] def safety_check(domain): count=0 tokenizer = RegexpTokenizer(r'\w+') client = MongoClient('mongodb://192.168.1.10:27017/',27017) db = client.cutestat_v3 try: cursor = db.WebInfo.find_one({"domain":domain}) if cursor!=None: stop = stopwords.words('english') overall_text=str(cursor['title'])+str(cursor['metaDescription']) + str(cursor['metaTags']) # Language Detect language_prediction=langid.classify(overall_text) if language_prediction!=None: language = language_prediction[0] # TRANSLATE LANGUAGE translator = Translator('ashim888_translator', 'TPwDatIXNxEasFBxSpLJ/coozTnml/4NaWHiyHVavRQ=') if language!='en': print 'Another language Found: '+ language overall_text= translator.translate(overall_text, "en") overall_text=set([i.lower() for i in tokenizer.tokenize(overall_text) if i not in stop]) # check if any key matches with each other count=overall_text.intersection(set(porn_list)) if len(count)>0: print domain+" SAFETY CHECK FAIL" print "Total Abusive Keywords Found:", len(count) print '\n' else: print domain+" SAFETY CHECK PASS" print '\n' else: print domain + ' Not Found In Database' +'\n' except TypeError as exc: print domain + " Not found" except UnboundLocalError as exc: print exc except Exception as exc: print exc safety_check("www.befuck.com") safety_check("baidu.com") safety_check("gioia.it") safety_check("partyporn.co.il") safety_check("x-nxx.co.il") safety_check("jw.org") safety_check("xhamster.com") safety_check("www.xnxx.com") safety_check("ratopati.com")