Skip to content

Instantly share code, notes, and snippets.

@edrabc
Forked from vladimirgamalyan/mccmnc.py
Last active August 29, 2015 14:24
Show Gist options
  • Select an option

  • Save edrabc/5665e0043317ea7842f9 to your computer and use it in GitHub Desktop.

Select an option

Save edrabc/5665e0043317ea7842f9 to your computer and use it in GitHub Desktop.
```sh
pip install -r requirements.txt
python mccmnc.py
```
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import json
import re
def add_operator(mcc, mnc, brand, operator, status, country, country_code, db):
assert re.match('^\d{3}$', mcc)
assert re.match('^\d{2,3}$', mnc)
if mcc not in db:
db[mcc] = {}
# assert mnc not in db[mcc]
db[mcc][mnc] = {
'brand': brand,
'operator': operator,
'country': country,
'countryCode': country_code,
'status': status
}
def scan_table(table, country, country_code, db):
rows = table.find_all('tr')
hdr = rows.pop(0).find_all('th')
assert hdr[0].text == u'MCC'
assert hdr[1].text == u'MNC'
assert hdr[2].text == u'Brand'
assert hdr[3].text == u'Operator'
assert hdr[4].text == u'Status'
for row in rows:
td = row.find_all('td')
mcc = td[0].text
mnc = td[1].text
brand = td[2].text.replace('[citation needed]', '')
operator = td[3].text.replace('[citation needed]', '')
status = td[4].text
if mcc and mnc and '?' not in mnc:
if '-' in mnc:
# TODO: mnc range
pass
else:
add_operator(mcc, mnc, brand, operator, status, country, country_code, db)
def contains_headline(tag):
return tag.find(class_='mw-headline') is not None
def main():
db = {}
soup = BeautifulSoup(requests.get('https://en.wikipedia.org/wiki/Mobile_country_code').text, 'xml')
for th in soup.find_all('th', text='MCC'):
table = th.find_parent('table')
tab_title = table.find_previous_sibling(contains_headline).find(class_='mw-headline').findAll(text=True)
tab_title = ''.join(tab_title).split(' - ')
assert (len(tab_title) == 1) or (len(tab_title) == 2)
country = tab_title.pop(0)
country_code = ''.join(tab_title)
scan_table(table, country, country_code, db)
with open('mccmnc.json', 'w') as f:
json.dump(db, f, indent=4, sort_keys=True)
if __name__ == '__main__':
main()
beautifulsoup4==4.3.2
lxml==3.4.4
requests==2.7.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment