Skip to content

Instantly share code, notes, and snippets.

@akshaybaweja
Created April 5, 2021 03:08
Show Gist options
  • Select an option

  • Save akshaybaweja/81b9a223f78828f44b9a077abf45f373 to your computer and use it in GitHub Desktop.

Select an option

Save akshaybaweja/81b9a223f78828f44b9a077abf45f373 to your computer and use it in GitHub Desktop.
import re
import urllib.request
import markovify
from time import sleep
def scrape():
originalLyrics = open('lyrics.txt', 'w')
url = "https://www.azlyrics.com/l/linkinpark.html"
artistHtml = urllib.request.urlopen(url)
artistHtmlStr = str(artistHtml.read())
links = re.findall('href="([^"]+)"', artistHtmlStr)
songLinks = []
songLinksTxt = open("linkinpark-links.txt", 'w')
for x in links:
if "lyrics/linkinpark" in x:
x = x.replace("..", "")
x = "https://www.azlyrics.com" + x
songLinks.append(x)
songLinksTxt.write(x+'\n')
songLinksTxt.close()
# songLinksTxt = open("linkinpark-links.txt", 'r').readlines()
# for x in songLinksTxt:
# songLinks.append(x)
print("# Links:", len(songLinks))
for x in songLinks:
songHtml = urllib.request.urlopen(x)
songHtmlStr = str(songHtml.read())
split = songHtmlStr.split('<!-- Usage of azlyrics.com content by any third-party lyrics provider is prohibited by our licensing agreement. Sorry about that. -->', 1)
splitHtml = split[1]
split = splitHtml.split('</div>', 1)
lyrics = split[0]
lyrics = lyrics.replace('<br>', '\n')
lyrics = lyrics.replace('\\', '')
lyrics = lyrics.replace('\nn', '\n')
lyrics = lyrics.replace('<i>', '')
lyrics = lyrics.replace('</i>', '')
lyrics = lyrics.replace('[Chorus]', '')
originalLyrics.write(lyrics)
print("\t--", x)
sleep(5)
originalLyrics.close()
# scrape()
generatedLyrics = ()
file = open('lyrics.txt', 'r')
text = file.read()
markovifyTextModel = markovify.Text(text)
generatedLyrics = markovifyTextModel.make_sentence()
print(generatedLyrics)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment