Skip to content

Instantly share code, notes, and snippets.

@akshaybaweja
Created April 5, 2021 03:08
Show Gist options
  • Save akshaybaweja/81b9a223f78828f44b9a077abf45f373 to your computer and use it in GitHub Desktop.
Save akshaybaweja/81b9a223f78828f44b9a077abf45f373 to your computer and use it in GitHub Desktop.

Revisions

  1. akshaybaweja created this gist Apr 5, 2021.
    59 changes: 59 additions & 0 deletions MarkovLinkinPark.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,59 @@
    import re
    import urllib.request
    import markovify
    from time import sleep

    def scrape():
    originalLyrics = open('lyrics.txt', 'w')

    url = "https://www.azlyrics.com/l/linkinpark.html"
    artistHtml = urllib.request.urlopen(url)
    artistHtmlStr = str(artistHtml.read())

    links = re.findall('href="([^"]+)"', artistHtmlStr)

    songLinks = []

    songLinksTxt = open("linkinpark-links.txt", 'w')
    for x in links:
    if "lyrics/linkinpark" in x:
    x = x.replace("..", "")
    x = "https://www.azlyrics.com" + x
    songLinks.append(x)
    songLinksTxt.write(x+'\n')
    songLinksTxt.close()

    # songLinksTxt = open("linkinpark-links.txt", 'r').readlines()
    # for x in songLinksTxt:
    # songLinks.append(x)

    print("# Links:", len(songLinks))

    for x in songLinks:
    songHtml = urllib.request.urlopen(x)
    songHtmlStr = str(songHtml.read())
    split = songHtmlStr.split('<!-- Usage of azlyrics.com content by any third-party lyrics provider is prohibited by our licensing agreement. Sorry about that. -->', 1)
    splitHtml = split[1]
    split = splitHtml.split('</div>', 1)
    lyrics = split[0]
    lyrics = lyrics.replace('<br>', '\n')
    lyrics = lyrics.replace('\\', '')
    lyrics = lyrics.replace('\nn', '\n')
    lyrics = lyrics.replace('<i>', '')
    lyrics = lyrics.replace('</i>', '')
    lyrics = lyrics.replace('[Chorus]', '')
    originalLyrics.write(lyrics)
    print("\t--", x)
    sleep(5)

    originalLyrics.close()

    # scrape()

    generatedLyrics = ()
    file = open('lyrics.txt', 'r')
    text = file.read()

    markovifyTextModel = markovify.Text(text)
    generatedLyrics = markovifyTextModel.make_sentence()
    print(generatedLyrics)