Last active
April 21, 2022 10:19
-
-
Save dvm-shlee/7d2fa751b0b7af8c90e2e15a38d64c4c to your computer and use it in GitHub Desktop.
Revisions
-
SungHo Lee revised this gist
Nov 25, 2016 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -152,12 +152,12 @@ def translatekoen(text): """ base_link = "http://translate.google.com/m?hl=%s&sl=%s&q=%s" if (sys.version_info[0] < 3): to_translate = urllib.quote_plus(text.encode('utf8')) link = base_link % ('en', 'ko', to_translate) request = Request(link, headers=agent) page = urlopen(request).read() else: to_translate = urllib.parse.quote(text.encode('utf8')) link = base_link % ('en', 'ko', to_translate) request = urllib.request.Request(link, headers=agent) page = urllib.request.urlopen(request).read().decode("utf-8") -
SungHo Lee revised this gist
Nov 25, 2016 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -152,12 +152,12 @@ def translatekoen(text): """ base_link = "http://translate.google.com/m?hl=%s&sl=%s&q=%s" if (sys.version_info[0] < 3): to_translate = urllib.quote_plus(text.decode('utf8')) link = base_link % ('en', 'ko', to_translate) request = Request(link, headers=agent) page = urlopen(request).read() else: to_translate = urllib.parse.quote(text.decode('utf8')) link = base_link % ('en', 'ko', to_translate) request = urllib.request.Request(link, headers=agent) page = urllib.request.urlopen(request).read().decode("utf-8") -
SungHo Lee revised this gist
Nov 23, 2016 . 1 changed file with 0 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -170,7 +170,6 @@ def translatekoen(text): def koen(line, cell): translated_en = translatekoen(cell) corrected_en = correct_en(translated_en) print('Translated (Korean to English): ' + translated_en) print("") print("Grammar checked (English) : " + corrected_en) -
SungHo Lee revised this gist
Nov 23, 2016 . No changes.There are no files selected for viewing
-
SungHo Lee revised this gist
Nov 23, 2016 . 1 changed file with 1 addition and 37 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -146,40 +146,6 @@ def correct_en(text): fixed_gap += to_index-from_index-len(suggest) return(fixed_text) def translatekoen(text): """Returns the translation using google translate source: https://github.com/mouuff/mtranslate @@ -202,9 +168,7 @@ def translatekoen(text): return(result[0]) def koen(line, cell): translated_en = translatekoen(cell) corrected_en = correct_en(translated_en) print("") print('Translated (Korean to English): ' + translated_en) -
SungHo Lee revised this gist
Nov 22, 2016 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -209,7 +209,7 @@ def koen(line, cell): print("") print('Translated (Korean to English): ' + translated_en) print("") print("Grammar checked (English) : " + corrected_en) def load_ipython_extension(shell): shell.register_magic_function(koen, 'cell') -
SungHo Lee created this gist
Nov 22, 2016 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,215 @@ from __future__ import absolute_import import json import sys import re import requests import urllib import urlparse from urllib2 import HTTPError from urllib2 import URLError from urllib2 import urlopen from urllib2 import Request from bs4 import BeautifulSoup as Soup agent = {'User-Agent': "Mozilla/4.0 (\ compatible;\ MSIE 6.0;\ Windows NT 5.1;\ SV1;\ .NET CLR 1.1.4322;\ .NET CLR 2.0.50727;\ .NET CLR 3.0.04506.30\ )"} class ColoredText: """Colored text class source: https://github.com/zoncoen/python-ginger """ colors = ['black', 'red', 'green', 'orange', 'blue', 'magenta', 'cyan', 'white'] color_dict = {} for i, c in enumerate(colors): color_dict[c] = (i + 30, i + 40) @classmethod def colorize(cls, text, color=None, bgcolor=None): """Colorize text @param cls Class @param text Text @param color Text color @param bgcolor Background color """ c = None bg = None gap = 0 if color is not None: try: c = cls.color_dict[color][0] except KeyError: print("Invalid text color:", color) return(text, gap) if bgcolor is not None: try: bg = cls.color_dict[bgcolor][1] except KeyError: print("Invalid background color:", bgcolor) return(text, gap) s_open, s_close = '', '' if c is not None: s_open = '\033[%dm' % c gap = len(s_open) if bg is not None: s_open += '\033[%dm' % bg gap = len(s_open) if not c is None or bg is None: s_close = '\033[0m' gap += len(s_close) return('%s%s%s' % (s_open, text, s_close), gap) def get_ginger_url(text): """Get URL for checking grammar using Ginger. source: https://github.com/zoncoen/python-ginger @param text English text @return URL """ API_KEY = "6ae0c3a0-afdc-4532-a810-82ded0054236" scheme = "http" netloc = "services.gingersoftware.com" path = "/Ginger/correct/json/GingerTheText" params = "" query = urllib.urlencode([ ("lang", "US"), ("clientVersion", "2.0"), ("apiKey", API_KEY), ("text", text)]) fragment = "" return(urlparse.urlunparse((scheme, netloc, path, params, query, fragment))) def get_ginger_result(text): """Get a result of checking grammar. source: https://github.com/zoncoen/python-ginger @param text English text @return result of grammar check by Ginger """ url = get_ginger_url(text) try: response = urllib.urlopen(url) except urllib2.HTTPError as e: print("HTTP Error:", e.code) except URLError as e: print("URL Error:", e.reason) except IOError, (errno, strerror): print("I/O error (%s): %s" % (errno, strerror)) try: result = json.loads(response.read().decode('utf-8')) except ValueError: print("Value Error: Invalid server response.") return(result) def correct_en(text): if len(text) > 600: print("You can't check more than 600 characters at a time.") fixed_text = text results = get_ginger_result(text) # Correct grammar if(not results["LightGingerTheTextResult"]): return(text + " - Good English! :)") # Incorrect grammar color_gap, fixed_gap = 0, 0 for result in results["LightGingerTheTextResult"]: if(result["Suggestions"]): from_index = result["From"] + color_gap to_index = result["To"] + 1 + color_gap suggest = result["Suggestions"][0]["Text"] # Colorize text colored_incorrect = ColoredText.colorize(text[from_index:to_index], 'red')[0] colored_suggest, gap = ColoredText.colorize(suggest, 'green') text = text[:from_index] + colored_incorrect + text[to_index:] fixed_text = fixed_text[:from_index-fixed_gap] + colored_suggest + fixed_text[to_index-fixed_gap:] color_gap += gap fixed_gap += to_index-from_index-len(suggest) return(fixed_text) def correct_ko(text): """Pusan University's Korean Grammar and Spell checker @param text Korean text @return result of spell check by Spell checker from Pusan University """ if len(text) > 300: print("You can't check more than 300 characters at a time.") data = {'text1':text} response = requests.post('http://speller.cs.pusan.ac.kr/PnuSpellerISAPI_201602/lib/check.asp', data=data, headers=agent) results = Soup(response.text, 'lxml') fulltext = results.findAll('div', attrs={'id':'bufUnderline'})[0].text.encode('utf8') errs = [err.text.encode('utf8') for err in results.findAll('td', attrs={'class':'tdErrWord'})] replaces_raw = [rep.contents for rep in results.findAll('td', attrs={'class':'tdReplace'})] replaces = [] for rep in replaces_raw: if type(rep) != list: replaces.append(rep.encode('utf8')) else: replaces.append(rep[0].encode('utf8')) corrtext = {'colored':list(), 'output':list()} if fulltext == '': return(text.encode('utf8'), text.encode('utf8')) else: fulltext_c = fulltext[:] for i, err in enumerate(errs): idx = fulltext.find(err) idx_c = fulltext_c.find(err) fulltext = fulltext[:idx] + replaces[i] + fulltext[idx+len(err):] fulltext_c = "{}{}{}".format(fulltext_c[:idx_c], ColoredText.colorize(replaces[i], 'green')[0], fulltext_c[idx_c+len(err):]) return(fulltext_c, fulltext) def translatekoen(text): """Returns the translation using google translate source: https://github.com/mouuff/mtranslate """ base_link = "http://translate.google.com/m?hl=%s&sl=%s&q=%s" if (sys.version_info[0] < 3): to_translate = urllib.quote_plus(text) link = base_link % ('en', 'ko', to_translate) request = Request(link, headers=agent) page = urlopen(request).read() else: to_translate = urllib.parse.quote(text) link = base_link % ('en', 'ko', to_translate) request = urllib.request.Request(link, headers=agent) page = urllib.request.urlopen(request).read().decode("utf-8") expr = r'class="t0">(.*?)<' result = re.findall(expr, page) if (len(result) == 0): return ("") return(result[0]) def koen(line, cell): corrected_ko_colored, corrected_ko = correct_ko(cell) print(u'Spell checked (Korean) : ' + corrected_ko_colored.decode('utf8')) translated_en = translatekoen(corrected_ko) corrected_en = correct_en(translated_en) print("") print('Translated (Korean to English): ' + translated_en) print("") print("Grammar checked (Englich) : " + corrected_en) def load_ipython_extension(shell): shell.register_magic_function(koen, 'cell')