Skip to content

Instantly share code, notes, and snippets.

@edrabc
Forked from vladimirgamalyan/mccmnc.py
Last active August 29, 2015 14:24
Show Gist options
  • Select an option

  • Save edrabc/5665e0043317ea7842f9 to your computer and use it in GitHub Desktop.

Select an option

Save edrabc/5665e0043317ea7842f9 to your computer and use it in GitHub Desktop.

Revisions

  1. edrabc revised this gist Jul 1, 2015. 3 changed files with 16 additions and 6 deletions.
    4 changes: 4 additions & 0 deletions mcc-mnc-wiki
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,4 @@
    ```sh
    pip install -r requirements.txt
    python mccmnc.py
    ```
    15 changes: 9 additions & 6 deletions mccmnc.py
    Original file line number Diff line number Diff line change
    @@ -7,7 +7,7 @@
    import re


    def add_operator(mcc, mnc, brand, operator, country, country_code, db):
    def add_operator(mcc, mnc, brand, operator, status, country, country_code, db):
    assert re.match('^\d{3}$', mcc)
    assert re.match('^\d{2,3}$', mnc)
    if mcc not in db:
    @@ -17,7 +17,8 @@ def add_operator(mcc, mnc, brand, operator, country, country_code, db):
    'brand': brand,
    'operator': operator,
    'country': country,
    'countryCode': country_code
    'countryCode': country_code,
    'status': status
    }


    @@ -28,23 +29,25 @@ def scan_table(table, country, country_code, db):
    assert hdr[1].text == u'MNC'
    assert hdr[2].text == u'Brand'
    assert hdr[3].text == u'Operator'
    assert hdr[4].text == u'Status'
    for row in rows:
    td = row.find_all('td')
    mcc = td[0].text
    mnc = td[1].text
    brand = td[2].text.replace('[citation needed]', '')
    operator = td[3].text.replace('[citation needed]', '')
    status = td[4].text
    if mcc and mnc and '?' not in mnc:
    if '-' in mnc:
    # TODO: mnc range
    pass
    else:
    add_operator(mcc, mnc, brand, operator, country, country_code, db)
    add_operator(mcc, mnc, brand, operator, status, country, country_code, db)


    def contains_headline(tag):
    return tag.find(class_='mw-headline') is not None


    def main():
    db = {}
    @@ -57,7 +60,7 @@ def main():
    country = tab_title.pop(0)
    country_code = ''.join(tab_title)
    scan_table(table, country, country_code, db)

    with open('mccmnc.json', 'w') as f:
    json.dump(db, f, indent=4, sort_keys=True)

    3 changes: 3 additions & 0 deletions requirements.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,3 @@
    beautifulsoup4==4.3.2
    lxml==3.4.4
    requests==2.7.0
  2. @vladimirgamalyan vladimirgamalyan created this gist Jun 14, 2015.
    66 changes: 66 additions & 0 deletions mccmnc.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,66 @@
    #!/usr/bin/env python
    # -*- coding: utf-8 -*-

    import requests
    from bs4 import BeautifulSoup
    import json
    import re


    def add_operator(mcc, mnc, brand, operator, country, country_code, db):
    assert re.match('^\d{3}$', mcc)
    assert re.match('^\d{2,3}$', mnc)
    if mcc not in db:
    db[mcc] = {}
    # assert mnc not in db[mcc]
    db[mcc][mnc] = {
    'brand': brand,
    'operator': operator,
    'country': country,
    'countryCode': country_code
    }


    def scan_table(table, country, country_code, db):
    rows = table.find_all('tr')
    hdr = rows.pop(0).find_all('th')
    assert hdr[0].text == u'MCC'
    assert hdr[1].text == u'MNC'
    assert hdr[2].text == u'Brand'
    assert hdr[3].text == u'Operator'
    for row in rows:
    td = row.find_all('td')
    mcc = td[0].text
    mnc = td[1].text
    brand = td[2].text.replace('[citation needed]', '')
    operator = td[3].text.replace('[citation needed]', '')
    if mcc and mnc and '?' not in mnc:
    if '-' in mnc:
    # TODO: mnc range
    pass
    else:
    add_operator(mcc, mnc, brand, operator, country, country_code, db)


    def contains_headline(tag):
    return tag.find(class_='mw-headline') is not None


    def main():
    db = {}
    soup = BeautifulSoup(requests.get('https://en.wikipedia.org/wiki/Mobile_country_code').text, 'xml')
    for th in soup.find_all('th', text='MCC'):
    table = th.find_parent('table')
    tab_title = table.find_previous_sibling(contains_headline).find(class_='mw-headline').findAll(text=True)
    tab_title = ''.join(tab_title).split(' - ')
    assert (len(tab_title) == 1) or (len(tab_title) == 2)
    country = tab_title.pop(0)
    country_code = ''.join(tab_title)
    scan_table(table, country, country_code, db)

    with open('mccmnc.json', 'w') as f:
    json.dump(db, f, indent=4, sort_keys=True)


    if __name__ == '__main__':
    main()