edrabc · August 29, 2015 14:24 · Jul 1, 2015 · Jun 14, 2015
diff --git a/mcc-mnc-wiki b/mcc-mnc-wiki
@@ -0,0 +1,4 @@
+```sh
+pip install -r requirements.txt
+python mccmnc.py
+```
diff --git a/mccmnc.py b/mccmnc.py
@@ -7,7 +7,7 @@
 import re
 
 
-def add_operator(mcc, mnc, brand, operator, country, country_code, db):
+def add_operator(mcc, mnc, brand, operator, status, country, country_code, db):
     assert re.match('^\d{3}$', mcc)
     assert re.match('^\d{2,3}$', mnc)
     if mcc not in db:
@@ -17,7 +17,8 @@ def add_operator(mcc, mnc, brand, operator, country, country_code, db):
         'brand': brand,
         'operator': operator,
         'country': country,
-        'countryCode': country_code
+        'countryCode': country_code,
+        'status': status
     }
 
 
@@ -28,23 +29,25 @@ def scan_table(table, country, country_code, db):
     assert hdr[1].text == u'MNC'
     assert hdr[2].text == u'Brand'
     assert hdr[3].text == u'Operator'
+    assert hdr[4].text == u'Status'
     for row in rows:
         td = row.find_all('td')
         mcc = td[0].text
         mnc = td[1].text
         brand = td[2].text.replace('[citation needed]', '')
         operator = td[3].text.replace('[citation needed]', '')
+        status = td[4].text
         if mcc and mnc and '?' not in mnc:
             if '-' in mnc:
                 # TODO: mnc range
                 pass
             else:
-                add_operator(mcc, mnc, brand, operator, country, country_code, db)
-    
+                add_operator(mcc, mnc, brand, operator, status, country, country_code, db)
+
 
 def contains_headline(tag):
     return tag.find(class_='mw-headline') is not None
-    
+
 
 def main():
     db = {}
@@ -57,7 +60,7 @@ def main():
         country = tab_title.pop(0)
         country_code = ''.join(tab_title)
         scan_table(table, country, country_code, db)
-  
+
     with open('mccmnc.json', 'w') as f:
         json.dump(db, f, indent=4, sort_keys=True)
 

diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,3 @@
+beautifulsoup4==4.3.2
+lxml==3.4.4
+requests==2.7.0
diff --git a/mccmnc.py b/mccmnc.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import requests
+from bs4 import BeautifulSoup
+import json
+import re
+
+
+def add_operator(mcc, mnc, brand, operator, country, country_code, db):
+    assert re.match('^\d{3}$', mcc)
+    assert re.match('^\d{2,3}$', mnc)
+    if mcc not in db:
+        db[mcc] = {}
+    # assert mnc not in db[mcc]
+    db[mcc][mnc] = {
+        'brand': brand,
+        'operator': operator,
+        'country': country,
+        'countryCode': country_code
+    }
+
+
+def scan_table(table, country, country_code, db):
+    rows = table.find_all('tr')
+    hdr = rows.pop(0).find_all('th')
+    assert hdr[0].text == u'MCC'
+    assert hdr[1].text == u'MNC'
+    assert hdr[2].text == u'Brand'
+    assert hdr[3].text == u'Operator'
+    for row in rows:
+        td = row.find_all('td')
+        mcc = td[0].text
+        mnc = td[1].text
+        brand = td[2].text.replace('[citation needed]', '')
+        operator = td[3].text.replace('[citation needed]', '')
+        if mcc and mnc and '?' not in mnc:
+            if '-' in mnc:
+                # TODO: mnc range
+                pass
+            else:
+                add_operator(mcc, mnc, brand, operator, country, country_code, db)
+
+
+def contains_headline(tag):
+    return tag.find(class_='mw-headline') is not None
+
+
+def main():
+    db = {}
+    soup = BeautifulSoup(requests.get('https://en.wikipedia.org/wiki/Mobile_country_code').text, 'xml')
+    for th in soup.find_all('th', text='MCC'):
+        table = th.find_parent('table')
+        tab_title = table.find_previous_sibling(contains_headline).find(class_='mw-headline').findAll(text=True)
+        tab_title = ''.join(tab_title).split(' - ')
+        assert (len(tab_title) == 1) or (len(tab_title) == 2)
+        country = tab_title.pop(0)
+        country_code = ''.join(tab_title)
+        scan_table(table, country, country_code, db)
+
+    with open('mccmnc.json', 'w') as f:
+        json.dump(db, f, indent=4, sort_keys=True)
+
+
+if __name__ == '__main__':
+    main()
No results found