Skip to content

Instantly share code, notes, and snippets.

@dvm-shlee
Last active April 21, 2022 10:19
Show Gist options
  • Save dvm-shlee/7d2fa751b0b7af8c90e2e15a38d64c4c to your computer and use it in GitHub Desktop.
Save dvm-shlee/7d2fa751b0b7af8c90e2e15a38d64c4c to your computer and use it in GitHub Desktop.

Revisions

  1. SungHo Lee revised this gist Nov 25, 2016. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions pykoen.py
    Original file line number Diff line number Diff line change
    @@ -152,12 +152,12 @@ def translatekoen(text):
    """
    base_link = "http://translate.google.com/m?hl=%s&sl=%s&q=%s"
    if (sys.version_info[0] < 3):
    to_translate = urllib.quote_plus(text.decode('utf8'))
    to_translate = urllib.quote_plus(text.encode('utf8'))
    link = base_link % ('en', 'ko', to_translate)
    request = Request(link, headers=agent)
    page = urlopen(request).read()
    else:
    to_translate = urllib.parse.quote(text.decode('utf8'))
    to_translate = urllib.parse.quote(text.encode('utf8'))
    link = base_link % ('en', 'ko', to_translate)
    request = urllib.request.Request(link, headers=agent)
    page = urllib.request.urlopen(request).read().decode("utf-8")
  2. SungHo Lee revised this gist Nov 25, 2016. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions pykoen.py
    Original file line number Diff line number Diff line change
    @@ -152,12 +152,12 @@ def translatekoen(text):
    """
    base_link = "http://translate.google.com/m?hl=%s&sl=%s&q=%s"
    if (sys.version_info[0] < 3):
    to_translate = urllib.quote_plus(text)
    to_translate = urllib.quote_plus(text.decode('utf8'))
    link = base_link % ('en', 'ko', to_translate)
    request = Request(link, headers=agent)
    page = urlopen(request).read()
    else:
    to_translate = urllib.parse.quote(text)
    to_translate = urllib.parse.quote(text.decode('utf8'))
    link = base_link % ('en', 'ko', to_translate)
    request = urllib.request.Request(link, headers=agent)
    page = urllib.request.urlopen(request).read().decode("utf-8")
  3. SungHo Lee revised this gist Nov 23, 2016. 1 changed file with 0 additions and 1 deletion.
    1 change: 0 additions & 1 deletion pykoen.py
    Original file line number Diff line number Diff line change
    @@ -170,7 +170,6 @@ def translatekoen(text):
    def koen(line, cell):
    translated_en = translatekoen(cell)
    corrected_en = correct_en(translated_en)
    print("")
    print('Translated (Korean to English): ' + translated_en)
    print("")
    print("Grammar checked (English) : " + corrected_en)
  4. SungHo Lee revised this gist Nov 23, 2016. No changes.
  5. SungHo Lee revised this gist Nov 23, 2016. 1 changed file with 1 addition and 37 deletions.
    38 changes: 1 addition & 37 deletions pykoen.py
    Original file line number Diff line number Diff line change
    @@ -146,40 +146,6 @@ def correct_en(text):
    fixed_gap += to_index-from_index-len(suggest)
    return(fixed_text)

    def correct_ko(text):
    """Pusan University's Korean Grammar and Spell checker
    @param text Korean text
    @return result of spell check by Spell checker from Pusan University
    """
    if len(text) > 300:
    print("You can't check more than 300 characters at a time.")
    data = {'text1':text}
    response = requests.post('http://speller.cs.pusan.ac.kr/PnuSpellerISAPI_201602/lib/check.asp', data=data, headers=agent)
    results = Soup(response.text, 'lxml')
    fulltext = results.findAll('div', attrs={'id':'bufUnderline'})[0].text.encode('utf8')
    errs = [err.text.encode('utf8') for err in results.findAll('td', attrs={'class':'tdErrWord'})]
    replaces_raw = [rep.contents for rep in results.findAll('td', attrs={'class':'tdReplace'})]
    replaces = []
    for rep in replaces_raw:
    if type(rep) != list:
    replaces.append(rep.encode('utf8'))
    else:
    replaces.append(rep[0].encode('utf8'))
    corrtext = {'colored':list(), 'output':list()}
    if fulltext == '':
    return(text.encode('utf8'), text.encode('utf8'))
    else:
    fulltext_c = fulltext[:]
    for i, err in enumerate(errs):
    idx = fulltext.find(err)
    idx_c = fulltext_c.find(err)
    fulltext = fulltext[:idx] + replaces[i] + fulltext[idx+len(err):]
    fulltext_c = "{}{}{}".format(fulltext_c[:idx_c],
    ColoredText.colorize(replaces[i],
    'green')[0],
    fulltext_c[idx_c+len(err):])
    return(fulltext_c, fulltext)

    def translatekoen(text):
    """Returns the translation using google translate
    source: https://github.com/mouuff/mtranslate
    @@ -202,9 +168,7 @@ def translatekoen(text):
    return(result[0])

    def koen(line, cell):
    corrected_ko_colored, corrected_ko = correct_ko(cell)
    print(u'Spell checked (Korean) : ' + corrected_ko_colored.decode('utf8'))
    translated_en = translatekoen(corrected_ko)
    translated_en = translatekoen(cell)
    corrected_en = correct_en(translated_en)
    print("")
    print('Translated (Korean to English): ' + translated_en)
  6. SungHo Lee revised this gist Nov 22, 2016. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion pykoen.py
    Original file line number Diff line number Diff line change
    @@ -209,7 +209,7 @@ def koen(line, cell):
    print("")
    print('Translated (Korean to English): ' + translated_en)
    print("")
    print("Grammar checked (Englich) : " + corrected_en)
    print("Grammar checked (English) : " + corrected_en)

    def load_ipython_extension(shell):
    shell.register_magic_function(koen, 'cell')
  7. SungHo Lee created this gist Nov 22, 2016.
    215 changes: 215 additions & 0 deletions pykoen.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,215 @@
    from __future__ import absolute_import
    import json
    import sys
    import re
    import requests
    import urllib
    import urlparse
    from urllib2 import HTTPError
    from urllib2 import URLError
    from urllib2 import urlopen
    from urllib2 import Request
    from bs4 import BeautifulSoup as Soup

    agent = {'User-Agent':
    "Mozilla/4.0 (\
    compatible;\
    MSIE 6.0;\
    Windows NT 5.1;\
    SV1;\
    .NET CLR 1.1.4322;\
    .NET CLR 2.0.50727;\
    .NET CLR 3.0.04506.30\
    )"}

    class ColoredText:
    """Colored text class
    source: https://github.com/zoncoen/python-ginger
    """
    colors = ['black', 'red', 'green', 'orange', 'blue', 'magenta', 'cyan', 'white']
    color_dict = {}
    for i, c in enumerate(colors):
    color_dict[c] = (i + 30, i + 40)

    @classmethod
    def colorize(cls, text, color=None, bgcolor=None):
    """Colorize text
    @param cls Class
    @param text Text
    @param color Text color
    @param bgcolor Background color
    """
    c = None
    bg = None
    gap = 0
    if color is not None:
    try:
    c = cls.color_dict[color][0]
    except KeyError:
    print("Invalid text color:", color)
    return(text, gap)

    if bgcolor is not None:
    try:
    bg = cls.color_dict[bgcolor][1]
    except KeyError:
    print("Invalid background color:", bgcolor)
    return(text, gap)

    s_open, s_close = '', ''
    if c is not None:
    s_open = '\033[%dm' % c
    gap = len(s_open)
    if bg is not None:
    s_open += '\033[%dm' % bg
    gap = len(s_open)
    if not c is None or bg is None:
    s_close = '\033[0m'
    gap += len(s_close)
    return('%s%s%s' % (s_open, text, s_close), gap)

    def get_ginger_url(text):
    """Get URL for checking grammar using Ginger.
    source: https://github.com/zoncoen/python-ginger
    @param text English text
    @return URL
    """
    API_KEY = "6ae0c3a0-afdc-4532-a810-82ded0054236"

    scheme = "http"
    netloc = "services.gingersoftware.com"
    path = "/Ginger/correct/json/GingerTheText"
    params = ""
    query = urllib.urlencode([
    ("lang", "US"),
    ("clientVersion", "2.0"),
    ("apiKey", API_KEY),
    ("text", text)])
    fragment = ""

    return(urlparse.urlunparse((scheme, netloc, path, params, query, fragment)))

    def get_ginger_result(text):
    """Get a result of checking grammar.
    source: https://github.com/zoncoen/python-ginger
    @param text English text
    @return result of grammar check by Ginger
    """
    url = get_ginger_url(text)

    try:
    response = urllib.urlopen(url)
    except urllib2.HTTPError as e:
    print("HTTP Error:", e.code)
    except URLError as e:
    print("URL Error:", e.reason)
    except IOError, (errno, strerror):
    print("I/O error (%s): %s" % (errno, strerror))
    try:
    result = json.loads(response.read().decode('utf-8'))
    except ValueError:
    print("Value Error: Invalid server response.")

    return(result)

    def correct_en(text):
    if len(text) > 600:
    print("You can't check more than 600 characters at a time.")
    fixed_text = text
    results = get_ginger_result(text)

    # Correct grammar
    if(not results["LightGingerTheTextResult"]):
    return(text + " - Good English! :)")

    # Incorrect grammar
    color_gap, fixed_gap = 0, 0
    for result in results["LightGingerTheTextResult"]:
    if(result["Suggestions"]):
    from_index = result["From"] + color_gap
    to_index = result["To"] + 1 + color_gap
    suggest = result["Suggestions"][0]["Text"]

    # Colorize text
    colored_incorrect = ColoredText.colorize(text[from_index:to_index], 'red')[0]
    colored_suggest, gap = ColoredText.colorize(suggest, 'green')

    text = text[:from_index] + colored_incorrect + text[to_index:]
    fixed_text = fixed_text[:from_index-fixed_gap] + colored_suggest + fixed_text[to_index-fixed_gap:]

    color_gap += gap
    fixed_gap += to_index-from_index-len(suggest)
    return(fixed_text)

    def correct_ko(text):
    """Pusan University's Korean Grammar and Spell checker
    @param text Korean text
    @return result of spell check by Spell checker from Pusan University
    """
    if len(text) > 300:
    print("You can't check more than 300 characters at a time.")
    data = {'text1':text}
    response = requests.post('http://speller.cs.pusan.ac.kr/PnuSpellerISAPI_201602/lib/check.asp', data=data, headers=agent)
    results = Soup(response.text, 'lxml')
    fulltext = results.findAll('div', attrs={'id':'bufUnderline'})[0].text.encode('utf8')
    errs = [err.text.encode('utf8') for err in results.findAll('td', attrs={'class':'tdErrWord'})]
    replaces_raw = [rep.contents for rep in results.findAll('td', attrs={'class':'tdReplace'})]
    replaces = []
    for rep in replaces_raw:
    if type(rep) != list:
    replaces.append(rep.encode('utf8'))
    else:
    replaces.append(rep[0].encode('utf8'))
    corrtext = {'colored':list(), 'output':list()}
    if fulltext == '':
    return(text.encode('utf8'), text.encode('utf8'))
    else:
    fulltext_c = fulltext[:]
    for i, err in enumerate(errs):
    idx = fulltext.find(err)
    idx_c = fulltext_c.find(err)
    fulltext = fulltext[:idx] + replaces[i] + fulltext[idx+len(err):]
    fulltext_c = "{}{}{}".format(fulltext_c[:idx_c],
    ColoredText.colorize(replaces[i],
    'green')[0],
    fulltext_c[idx_c+len(err):])
    return(fulltext_c, fulltext)

    def translatekoen(text):
    """Returns the translation using google translate
    source: https://github.com/mouuff/mtranslate
    """
    base_link = "http://translate.google.com/m?hl=%s&sl=%s&q=%s"
    if (sys.version_info[0] < 3):
    to_translate = urllib.quote_plus(text)
    link = base_link % ('en', 'ko', to_translate)
    request = Request(link, headers=agent)
    page = urlopen(request).read()
    else:
    to_translate = urllib.parse.quote(text)
    link = base_link % ('en', 'ko', to_translate)
    request = urllib.request.Request(link, headers=agent)
    page = urllib.request.urlopen(request).read().decode("utf-8")
    expr = r'class="t0">(.*?)<'
    result = re.findall(expr, page)
    if (len(result) == 0):
    return ("")
    return(result[0])

    def koen(line, cell):
    corrected_ko_colored, corrected_ko = correct_ko(cell)
    print(u'Spell checked (Korean) : ' + corrected_ko_colored.decode('utf8'))
    translated_en = translatekoen(corrected_ko)
    corrected_en = correct_en(translated_en)
    print("")
    print('Translated (Korean to English): ' + translated_en)
    print("")
    print("Grammar checked (Englich) : " + corrected_en)

    def load_ipython_extension(shell):
    shell.register_magic_function(koen, 'cell')