Last active
June 14, 2022 18:48
-
-
Save nickolasclarke/fe353a1801bb6c91902f27f5d974b5b7 to your computer and use it in GitHub Desktop.
Revisions
-
nickolasclarke revised this gist
Jun 14, 2022 . 2 changed files with 101 additions and 372 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,372 +0,0 @@ This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,101 @@ import requests import pandas as pd from bs4 import BeautifulSoup BASE_URL = 'https://www.usbr.gov/projects/index.php?id=' def scrape_power_plant(id: int) -> dict: """ Returns power plant information from "Main-well" div as a dict note: highly brittle. """ try: page = requests.get(BASE_URL + str(id)) soup = BeautifulSoup(page.content) main_div = soup.find("div", {"class": "Main-well"}) pairwise = lambda x, n: zip(*[iter(x)] * n) #main div class "Main-well" attempt = lambda q: q if q else "missing" res = { "name": attempt(main_div.h1.string), "state": attempt(soup.select('a[href^="/projects/facilities.php?state="]')[0].text), "region": attempt(soup.select('a[href^="/projects/facilities.php?region="]')[0].text), #"related_links": attempt([link.get('href') for link in main_div.find_all("p")[3].find_all('a')]), "overview": attempt(main_div.find("div", id='History').p.text), "plan": attempt(main_div.find("div", id='Plan').p.text), "contact": attempt(list(main_div.find("div", {"class":"contactRow"}).stripped_strings)), } #extract details and flatten into the main dict details = dict(pairwise([cell.text for cell in main_div.find("div", {"id":"Details"}).find_all('td')], 2)) res.update(details) print(f'{res["name"]} processed. . .') return res except Exception as e: print(f'Plant {id} failed to process. . .', e) return {} power_plants = [{'Alcova Powerplant': 524}, {'Anderson Ranch Powerplant': 525}, {'Big Thompson Powerplant': 578}, {'Black Canyon Powerplant': 527}, {'Blue Mesa Powerplant': 529}, {'Boise River Diversion Powerplant': 530}, {'Boysen Powerplant': 531}, {'Buffalo Bill Powerplant': 533}, {'Canyon Ferry Powerplant': 536}, {'Chandler Powerplant': 538}, {'Crystal Powerplant': 539}, {'Davis Powerplant': 541}, {'Deer Creek Powerplant': 542}, {'Elephant Butte Powerplant': 543}, {'Estes Powerplant': 544}, {'Flaming Gorge Powerplant': 545}, {'Flatiron Powerplant': 546}, {'Folsom Powerplant': 547}, {'Fontenelle Powerplant': 549}, {'Fremont Canyon Powerplant': 550}, {'Glen Canyon Powerplant': 522}, {'Glendo Powerplant': 523}, {'Grand Coulee Powerplant': 526}, {'Green Mountain Powerplant': 528}, {'Green Springs Powerplant': 534}, {'Guernsey Powerplant': 535}, {'Heart Mountain Powerplant': 537}, {'Hoover Powerplant': 540}, {'Hungry Horse Powerplant': 548}, {'Judge Francis Carr Powerplant': 532}, {'Keswick Powerplant': 579}, {'Kortes Powerplant': 555}, {'Lewiston Powerplant': 557}, {'Lower Molina Powerplant': 558}, {'Marys Lake Powerplant': 561}, {'McPhee Powerplant': 563}, {'Minidoka Powerplant': 565}, {'Morrow Point Powerplant': 567}, {'Mount Elbert Powerplant': 568}, {'New Melones Powerplant': 569}, {'Nimbus Powerplant': 570}, {'O`Neill Powerplant': 571}, {'Palisades Powerplant': 572}, {'Parker Powerplant': 573}, {'Pilot Butte Powerplant': 574}, {'Pole Hill Powerplant': 575}, {'Roza Powerplant': 576}, {'San Luis (William R. Gianelli) Powerplant': 577}, {'Seminoe Powerplant': 551}, {'Shasta Powerplant': 552}, {'Shoshone Powerplant': 553}, {'Spirit Mountain Powerplant': 554}, {'Spring Creek Powerplant': 556}, {'Stampede Powerplant': 559}, {'Towaoc Powerplant': 560}, {'Trinity Powerplant': 562}, {'Upper Molina Powerplant': 564}, {'Yellowtail Powerplant': 566} ] results = [scrape_power_plant(list(plant.values())[0]) for plant in power_plants] pd.DataFrame.from_dict(results).to_csv('power_plant_data.csv') -
nickolasclarke created this gist
Jun 14, 2022 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,372 @@ import requests import pandas as pd from bs4 import BeautifulSoup BASE_URL = 'https://www.usbr.gov/projects/index.php?id=' def scrape_dam(id: int) -> dict: """ Returns dam information from "Main-well" div as a dict note: highly brittle. """ try: page = requests.get(BASE_URL + id) soup = BeautifulSoup(page.content) main_div = soup.find("div", {"class": "Main-well"}) pairwise = lambda x, n: zip(*[iter(x)] * n) #main div class "Main-well" res = { "name": main_div.h1.string, "state": main_div.find_all("p")[0].find_all('a')[0].string, "region": main_div.find_all("p")[0].find_all('a')[1].string, "related_links": [link.get('href') for link in main_div.find_all("p")[3].find_all('a')], "overview": main_div.find_all("p")[4].find_all('p')[0].string, "geology": main_div.find_all("p")[4].find_all('p')[2].string, "contact": main_div.find_all("div", {"class":"contactRow"})[0].text, } #extract details and flatten into the main dict details = dict(pairwise([cell.text for cell in main_div.find("div", {"id":"Details"}).find_all('td')], 2)) res.update(details) print(f'Dam {res["name"]} processed. . .') return res except Exception as e: print(f'Dam {id} failed to process. . .', e) return {} dams = { "Agate Dam":{'id':'20'}, "Agency Valley Dam":{'id':'261'}, "Alcova Dam":{'id':'21'}, # "Almena Diversion Dam":{'id':'22'}, # "Altus Dam":{'id':'23'}, # "American Diversion Dam":{'id':'263'}, # "American Falls Dam":{'id':'24'}, # "Anchor Dam":{'id':'25'}, # "Anderson Ranch Dam":{'id':'259'}, # "Angostura Dam":{'id':'4'}, # "Angostura Diversion Dam":{'id':'3'}, # "Anita Dam":{'id':'5'}, # "Arbuckle Dam":{'id':'269'}, # "Arrowrock Dam":{'id':'6'}, # "Arthur R Bowman Dam":{'id':'45'}, # "Arthur V Watkins Dam":{'id':'48'}, # "Avalon Dam":{'id':'49'}, # "B F Sisk Dam":{'id':'57'}, # "Barretts Diversion Dam":{'id':'271'}, # "Bartlett Dam":{'id':'51'}, # "Bartley Diversion Dam":{'id':'54'}, # "Belle Fourche Dam":{'id':'71'}, # "Belle Fourche Diversion Dam":{'id':'56'}, # "Big Sandy Dam":{'id':'272'}, # "Black Canyon Diversion Dam":{'id':'7'}, # "Blue Mesa Dam":{'id':'62'}, # "Boca Dam":{'id':'64'}, # "Boise River Diversion Dam":{'id':'8'}, # "Bonny Dam":{'id':'66'}, # "Box Butte Dam":{'id':'67'}, # "Boysen Dam":{'id':'26'}, # "Bradbury Dam":{'id':'254'}, # "Brantley Dam":{'id':'28'}, # "Bretch Diversion Dam":{'id':'273'}, # "Broadhead Diversion Dam":{'id':'274'}, # "Buckhorn Dam":{'id':'31'}, # "Buffalo Bill Dam":{'id':'33'}, # "Bull Lake Dam":{'id':'36'}, # "Bully Creek Dam":{'id':'37'}, # "Bumping Lake Dam":{'id':'40'}, # "Caballo Dam":{'id':'42'}, # "Cambridge Diversion Dam":{'id':'43'}, # "Camp Creek Diversion Dam":{'id':'275'}, # "Camp Dyer Diversion Dam":{'id':'46'}, # "Canyon Ferry Dam":{'id':'50'}, # "Carpinteria Dam":{'id':'9'}, # "Carson River Diversion Dam":{'id':'52'}, # "Carter Creek Diversion Dam":{'id':'53'}, # "Carter Lake Dam":{'id':'55'}, # "Cascade Dam":{'id':'10'}, # "Casitas Dam":{'id':'276'}, # "Causey Dam":{'id':'58'}, # "Cedar Bluff Dam":{'id':'59'}, # "Chapman Diversion Dam":{'id':'60'}, # "Cheney Dam":{'id':'63'}, # "Choke Canyon Dam":{'id':'65'}, # "Clark Canyon Dam":{'id':'68'}, # "Cle Elum Dam":{'id':'306'}, # "Clear Creek Dam":{'id':'69'}, # "Clear Lake Dam":{'id':'27'}, # "Cold Springs Dam":{'id':'29'}, # "Como Dam":{'id':'30'}, # "Conconully Dam":{'id':'32'}, # "Contra Loma Dam":{'id':'34'}, # "Corbett Diversion Dam":{'id':'35'}, # "Crane Prairie Dam":{'id':'38'}, # "Crawford Dam":{'id':'39'}, # "Crystal Dam":{'id':'41'}, # "Culbertson Diversion Dam":{'id':'277'}, # "Currant Creek Dam":{'id':'44'}, # "Davis Creek Dam":{'id':'70'}, # "Davis Dam":{'id':'47'}, # "Deadwood Dam":{'id':'11'}, # "Deaver Dam":{'id':'75'}, # "Deer Creek Dam":{'id':'76'}, # "Deer Flat Lower Embankment":{'id':'13'}, # "Deer Flat Middle Embankment":{'id':'14'}, # "Deer Flat Upper Embankment":{'id':'15'}, # "Deerfield Dam":{'id':'100'}, # "Derby Diversion Dam":{'id':'77'}, # "Dickinson Dam":{'id':'78'}, # "Dille Diversion Dam":{'id':'61'}, # "Dixon Canyon Dam":{'id':'79'}, # "Dodson Diversion Dam":{'id':'80'}, # "Dressler Diversion Dam":{'id':'81'}, # "Dry Creek Diversion Dam":{'id':'82'}, # "Dry Falls Dam":{'id':'84'}, # "Dry Spotted Tail Diversion Dam":{'id':'285'}, # "Dunlap Diversion Dam":{'id':'286'}, # "East Canyon Dam":{'id':'88'}, # "East Park Dam":{'id':'89'}, # "East Portal Diversion Dam":{'id':'287'}, # "Easton Diversion Dam":{'id':'103'}, # "Echo Dam":{'id':'92'}, # "Eden Dam":{'id':'93'}, # "El Vado Dam":{'id':'96'}, # "Elephant Butte Dam":{'id':'94'}, # "Emigrant Dam":{'id':'97'}, # "Enders Dam":{'id':'99'}, # "Fish Lake Dam":{'id':'104'}, # "Flaming Gorge Dam":{'id':'105'}, # "Flatiron Afterbay Dam":{'id':'72'}, # "Folsom Dam":{'id':'74'}, # "Fontenelle Dam":{'id':'288'}, # "Fort Cobb Dam":{'id':'289'}, # "Fort Shaw Diversion Dam and Canal":{'id':'117'}, # "Foss Dam":{'id':'121'}, # "French Canyon Dam":{'id':'127'}, # "Fresno Dam":{'id':'128'}, # "Friant Dam":{'id':'133'}, # "Fruitgrowers Dam":{'id':'138'}, # "Fryingpan Diversion Dam":{'id':'296'}, # "Funks Dam":{'id':'139'}, # "Garnet Diversion Dam":{'id':'283'}, # "Gerber Dam":{'id':'73'}, # "Gibson Dam":{'id':'284'}, # "Glen Anne Dam":{'id':'16'}, # "Glen Canyon Dam":{'id':'144'}, # "Glen Elder Dam":{'id':'149'}, # "Glendo Dam":{'id':'290'}, # "Granby Dam":{'id':'291'}, # "Grand Coulee Dam":{'id':'155'}, # "Granite Creek Diversion Dam":{'id':'161'}, # "Granite Reef Diversion Dam":{'id':'165'}, # "Grassy Lake Dam":{'id':'166'}, # "Gray Reef Dam":{'id':'292'}, # "Green Mountain Dam":{'id':'174'}, # "Guernsey Dam":{'id':'175'}, # "Halfmoon Diversion Dam":{'id':'179'}, # "Haystack Dam":{'id':'183'}, # "Heart Butte Dam":{'id':'112'}, # "Helena Valley Dam":{'id':'113'}, # "Heron Dam":{'id':'118'}, # "Hoover Dam":{'id':'122'}, # "Horse Creek Diversion Dam":{'id':'123'}, # "Horse Mesa Dam":{'id':'129'}, # "Horseshoe Dam":{'id':'134'}, # "Horsetooth Dam":{'id':'119'}, # "Howard Prairie Dam":{'id':'124'}, # "Hubbard Dam":{'id':'17'}, # "Hungry Horse Dam":{'id':'255'}, # "Hunter Creek Diversion Dam":{'id':'293'}, # "Huntington North Dam":{'id':'140'}, # "Hyatt Dam":{'id':'141'}, # "Hyrum Dam":{'id':'145'}, # "Imperial Diversion Dam":{'id':'150'}, # "Island Park Dam":{'id':'151'}, # "Isleta Diversion Dam":{'id':'130'}, # "Ivanhoe Diversion Dam":{'id':'131'}, # "Jackson Gulch Dam":{'id':'156'}, # "Jackson Lake Dam":{'id':'162'}, # "James Diversion Dam":{'id':'157'}, # "Jamestown Dam":{'id':'167'}, # "Joes Valley Dam":{'id':'168'}, # "John Franchi Diversion Dam":{'id':'171'}, # "Jordanelle Dam":{'id':'176'}, # "Kachess Dam":{'id':'135'}, # "Keechelus Dam":{'id':'294'}, # "Keene Creek Dam":{'id':'295'}, # "Kent Diversion Dam":{'id':'184'}, # "Keswick Dam":{'id':'185'}, # "Keyhole Dam":{'id':'107'}, # "Kirwin Dam":{'id':'108'}, # "Kortes Dam":{'id':'114'}, # "Laguna Diversion Dam":{'id':'297'}, # "Lahontan Dam":{'id':'142'}, # "Lake Alice No 1 Dam":{'id':'146'}, # "Lake Alice No 1and 1 Half Dam":{'id':'298'}, # "Lake Alice No 2 Dam":{'id':'152'}, # "Lake Sherburne Dam":{'id':'158'}, # "Lake Tahoe Dam":{'id':'159'}, # "Lauro Dam":{'id':'18'}, # "Lemon Dam":{'id':'163'}, # "Lewiston Dam":{'id':'169'}, # "Lily Pad Diversion Inlet Dam":{'id':'170'}, # "Link River Diversion Dam":{'id':'172'}, # "Little Hell Creek Diversion Dam":{'id':'278'}, # "Little Panoche Detention Dam":{'id':'279'}, # "Little Wood River Dam":{'id':'180'}, # "Los Banos Creek Detention Dam":{'id':'280'}, # "Lost Creek Dam":{'id':'109'}, # "Lost River Diversion Dam":{'id':'115'}, # "Lovewell Dam":{'id':'116'}, # "Lower Yellowstone Diversion Dam":{'id':'120'}, # "Malone Diversion Dam":{'id':'125'}, # "Mann Creek Dam":{'id':'126'}, # "Marble Bluff Dam":{'id':'132'}, # "Martinez Dam":{'id':'136'}, # "Marys Lake Dike Dam":{'id':'137'}, # "Mason Dam":{'id':'143'}, # "McGee Creek Dam":{'id':'147'}, # "McKay Dam":{'id':'148'}, # "McPhee Dam":{'id':'153'}, # "Medicine Creek Dam":{'id':'154'}, # "Meeks Cabin Dam":{'id':'160'}, # "Merritt Dam":{'id':'164'}, # "Middle Cunningham Creek Diversion Dam":{'id':'303'}, # "Midview Dam":{'id':'304'}, # "Midway Creek Diversion Dam":{'id':'173'}, # "Miller Diversion Dam":{'id':'177'}, # "Minatare Dam":{'id':'178'}, # "Minidoka Dam":{'id':'181'}, # "Monticello Dam":{'id':'186'}, # "Moon Lake Dam":{'id':'187'}, # "Mormon Creek Diversion Dam":{'id':'106'}, # "Mormon Flat Dam":{'id':'110'}, # "Mormon Island Auxilliary Dam":{'id':'111'}, # "Morrow Point Dam":{'id':'197'}, # "Mountain Park Dam":{'id':'200'}, # "Mt Elbert Forebay Dam":{'id':'308'}, # "Nambe Falls Dam":{'id':'309'}, # "Navajo Dam":{'id':'310'}, # "Nelson Dam":{'id':'206'}, # "New Melones Dam":{'id':'211'}, # "New Waddell Dam":{'id':'311'}, # "Newton Dam":{'id':'217'}, # "Nimbus Dam":{'id':'222'}, # "No Name Creek Diversion Dam":{'id':'223'}, # "Norman Dam":{'id':'227'}, # "North Cunningham Creek Diversion Dam":{'id':'312'}, # "North Dam":{'id':'228'}, # "North Fork Diversion Dam":{'id':'313'}, # "Northside Diversion Dam":{'id':'237'}, # "Norton Dam":{'id':'238'}, # "Ochoco Dam":{'id':'248'}, # "Olympus Dam":{'id':'249'}, # "Ortega Dam":{'id':'19'}, # "Owyhee Dam":{'id':'315'}, # "Pactola Dam":{'id':'316'}, # "Palisades Dam":{'id':'317'}, # "Palo Verde Diversion Dam":{'id':'201'}, # "Paonia Dam":{'id':'203'}, # "Paradise Diversion Dam":{'id':'204'}, # "Parker Dam":{'id':'207'}, # "Pathfinder Dam":{'id':'212'}, # "Pathfinder Dike Dam":{'id':'213'}, # "Pilot Butte Dam":{'id':'318'}, # "Pineview Dam":{'id':'218'}, # "Pinto Dam":{'id':'219'}, # "Pishkun Dikes":{'id':'319'}, # "Platoro Dam":{'id':'224'}, # "Pole Hill Afterbay Dam":{'id':'229'}, # "Prosser Creek Dam":{'id':'320'}, # "Pueblo Dam":{'id':'232'}, # "Putah Dam":{'id':'234'}, # "Rainbow Diversion Dam":{'id':'239'}, # "Ralston Dam":{'id':'240'}, # "Rattlesnake Dam":{'id':'243'}, # "Red Bluff Diversion Dam":{'id':'244'}, # "Red Fleet Dam":{'id':'250'}, # "Red Willow Creek Diversion Dam":{'id':'189'}, # "Red Willow Dam":{'id':'321'}, # "Reservoir A Dam":{'id':'267'}, # "Ridges Basin Dam":{'id':'581'}, # "Ridgway Dam":{'id':'195'}, # "Rifle Gap Dam":{'id':'198'}, # "Ririe Dam":{'id':'199'}, # "Robles Dam":{'id':'322'}, # "Roza Diversion Dam":{'id':'323'}, # "Ruedi Dam":{'id':'208'}, # "Rye Patch Dam":{'id':'268'}, # "Salmon Lake Dam":{'id':'214'}, # "San Acacia Diversion Dam":{'id':'324'}, # "San Justo Dam":{'id':'325'}, # "Sanford Dam":{'id':'270'}, # "Satanka Dike Dam":{'id':'220'}, # "Sawyer Diversion Dam":{'id':'326'}, # "Scofield Dam":{'id':'327'}, # "Scoggins Dam":{'id':'225'}, # "Seminoe Dam":{'id':'233'}, # "Senator Wash Dam":{'id':'328'}, # "Shadehill Dam":{'id':'329'}, # "Shadow Mountain Dam":{'id':'235'}, # "Shasta Dam":{'id':'241'}, # "Silver Jack Dam":{'id':'245'}, # "Sly Park Dam":{'id':'246'}, # "Soldier Canyon Dam":{'id':'251'}, # "Soldier Creek Dam":{'id':'190'}, # "Soldiers Meadow Dam":{'id':'191'}, # "South Cunningham Creek Diversion Dam":{'id':'196'}, # "South Fork Diversion Dam":{'id':'330'}, # "Spring Canyon Dam":{'id':'331'}, # "Spring Creek Debris Dam":{'id':'202'}, # "Stampede Dam":{'id':'205'}, # "Starvation Dam":{'id':'209'}, # "Stateline Dam":{'id':'210'}, # "Steinaker Dam":{'id':'215'}, # "Stewart Mountain Dam":{'id':'216'}, # "Stony Gorge Dam":{'id':'333'}, # "Sugar Loaf Dam":{'id':'221'}, # "Sumner Dam":{'id':'226'}, # "Sun River Diversion Dam":{'id':'230'}, # "Superior Courtland Diversion Dam":{'id':'231'}, # "Swift Current Dike":{'id':'334'}, # "Taylor Park Dam":{'id':'236'}, # "Terminal Dam":{'id':'299'}, # "Theodore Roosevelt Dam":{'id':'242'}, # "Thief Valley Dam":{'id':'300'}, # "Three Mile Falls Diversion Dam":{'id':'247'}, # "Tiber Dam":{'id':'252'}, # "Tieton Dam":{'id':'253'}, # "Trenton Dam":{'id':'188'}, # "Trinity Dam":{'id':'266'}, # "Tub Springs Creek Diversion Dam":{'id':'192'}, # "Twin Buttes Dam":{'id':'307'}, # "Twin Lakes Dam":{'id':'305'}, # "Twitchell Dam":{'id':'193'}, # "Unity Dam":{'id':'256'}, # "Upper Slaven Diversion Dam":{'id':'301'}, # "Upper Stillwater Dam":{'id':'257'}, # "Vallecito Dam":{'id':'258'}, # "Vandalia Diversion Dam":{'id':'335'}, # "Vega Dam":{'id':'260'}, # "Virginia Smith Dam":{'id':'302'}, # "Wanship Dam":{'id':'83'}, # "Warm Springs Dam":{'id':'85'}, # "Wasco Dam":{'id':'86'}, # "Webster Dam":{'id':'281'}, # "Whalen Diversion Dam":{'id':'87'}, # "Whiskeytown Dam":{'id':'90'}, # "Wickiup Dam":{'id':'91'}, # "Willow Creek Co Dam":{'id':'98'}, # "Willow Creek Dam":{'id':'95'}, # "Willwood Diversion Dam":{'id':'282'}, # "Wind River Diversion Dam":{'id':'101'}, # "Woodston Diversion Dam":{'id':'102'}, # "Yellowstone River Diversion Dam":{'id':'262'}, # "Yellowtail Afterbay Dam":{'id':'265'}, # "Yellowtail Dam":{'id':'264'} } results = [scrape_dam(dam['id']) for dam in dams.values()] pd.DataFrame.from_dict(results).to_csv('dam_data.csv')