Trusted97 · July 30, 2022 11:10
diff --git a/video_sitemap_generator.py b/video_sitemap_generator.py
 import re
 from bs4 import BeautifulSoup
 import urllib.request
 from urllib.request import urlopen
 import json
 import requests



 def get_embed_code(youtube_url):
    html = urlopen(youtube_url) # Insert your URL to extract
    bsObj = BeautifulSoup(html.read(),features="html.parser");
    embed_codes = [] #Create list for store embed code

    for link in bsObj.find_all('iframe'): #Find all iframe in page from given url
        src = link.get('src') #get src attribute

        if "https://www.youtube.com/embed/" in src: #get only src attribute that belonge to iframe youtube
            embed_codes.append(src.replace('https://www.youtube.com/embed/','')) #clean and add iframe code

    return embed_codes #return the list of codes


 def get_info_from_embed(embed_code): #get info of the video from youtube endpoint then return a dictionary
    raw_url = 'https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v={EMBED_CODE}&format=json'
    url = raw_url.replace('{EMBED_CODE}',embed_code)

    video_info = {}

    r = requests.head(url) #check if url still exist
    if r.status_code == 200:
        req = urllib.request.Request(url)
        ##parsing response
        r = urllib.request.urlopen(req).read()
        cont = json.loads(r.decode('utf-8'))
        counter = 0
        video_info['title'] = cont['title']
        video_info['thumb_url'] = cont['thumbnail_url']


        return video_info

 def get_sitemap_section(thumbs_pic_url,title,desc,embed_url):
    sitemap_section = """
    <video:video>
        <video:thumbnail_loc>{THUMB_PIC_URL}</video:thumbnail_loc>
        <video:title>{TITLE}</video:title>
        <video:description>{DESC}</video:description>
        <video:content_loc>{EMBED_URL}</video:content_loc>
        <video:family_friendly>yes</video:family_friendly>
    </video:video>"""
    formatted_section = sitemap_section.replace('{THUMB_PIC_URL}',thumbs_pic_url).replace('{TITLE}',title).replace('{DESC}',desc).replace('{EMBED_URL}',embed_url)

    return formatted_section




 filepath = 'urls.txt' #File that contains the urls that you want add in Video Sitemap
 song_file = open("sitemap_videos.txt","w")

 with open(filepath) as fp:
   line = fp.readline()
   cnt = 1
   while line:
       s = line.strip()
       line_component = re.split('\s+', s)
       for i in range(len(line_component)):
           currentUrl = line_component[i]
           embed_codes = get_embed_code(currentUrl)
           raw_header = '<url><loc>{URL}</loc>'
           header = raw_header.replace('{URL}',currentUrl)
           song_file.write(header)
           for k in range(len(embed_codes)):
               embed_video_url = 'https://www.youtube.com/embed/'+embed_codes[k]

               info = get_info_from_embed(embed_codes[k])
               #print(currentUrl+'\n')

               #print(type(info))
               #print('\n')
               if info is not None:
                   title = info.get('title')
                   thumb_url = info.get('thumb_url')
                   desc = 'Custom Text:'+title
                   video_section = get_sitemap_section(thumb_url,title,desc,embed_video_url)
                   song_file.write(video_section)

       song_file.write('</url>')
       line = fp.readline()
       cnt += 1

 song_file.close()
	import re
	from bs4 import BeautifulSoup
	import urllib.request
	from urllib.request import urlopen
	import json
	import requests



	def get_embed_code(youtube_url):
	html = urlopen(youtube_url) # Insert your URL to extract
	bsObj = BeautifulSoup(html.read(),features="html.parser");
	embed_codes = [] #Create list for store embed code

	for link in bsObj.find_all('iframe'): #Find all iframe in page from given url
	src = link.get('src') #get src attribute

	if "https://www.youtube.com/embed/" in src: #get only src attribute that belonge to iframe youtube
	embed_codes.append(src.replace('https://www.youtube.com/embed/','')) #clean and add iframe code

	return embed_codes #return the list of codes


	def get_info_from_embed(embed_code): #get info of the video from youtube endpoint then return a dictionary
	raw_url = 'https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v={EMBED_CODE}&format=json'
	url = raw_url.replace('{EMBED_CODE}',embed_code)

	video_info = {}

	r = requests.head(url) #check if url still exist
	if r.status_code == 200:
	req = urllib.request.Request(url)
	##parsing response
	r = urllib.request.urlopen(req).read()
	cont = json.loads(r.decode('utf-8'))
	counter = 0
	video_info['title'] = cont['title']
	video_info['thumb_url'] = cont['thumbnail_url']


	return video_info

	def get_sitemap_section(thumbs_pic_url,title,desc,embed_url):
	sitemap_section = """
	<video:video>
	<video:thumbnail_loc>{THUMB_PIC_URL}</video:thumbnail_loc>
	<video:title>{TITLE}</video:title>
	<video:description>{DESC}</video:description>
	<video:content_loc>{EMBED_URL}</video:content_loc>
	<video:family_friendly>yes</video:family_friendly>
	</video:video>"""
	formatted_section = sitemap_section.replace('{THUMB_PIC_URL}',thumbs_pic_url).replace('{TITLE}',title).replace('{DESC}',desc).replace('{EMBED_URL}',embed_url)

	return formatted_section




	filepath = 'urls.txt' #File that contains the urls that you want add in Video Sitemap
	song_file = open("sitemap_videos.txt","w")

	with open(filepath) as fp:
	line = fp.readline()
	cnt = 1
	while line:
	s = line.strip()
	line_component = re.split('\s+', s)
	for i in range(len(line_component)):
	currentUrl = line_component[i]
	embed_codes = get_embed_code(currentUrl)
	raw_header = '<url><loc>{URL}</loc>'
	header = raw_header.replace('{URL}',currentUrl)
	song_file.write(header)
	for k in range(len(embed_codes)):
	embed_video_url = 'https://www.youtube.com/embed/'+embed_codes[k]

	info = get_info_from_embed(embed_codes[k])
	#print(currentUrl+'\n')

	#print(type(info))
	#print('\n')
	if info is not None:
	title = info.get('title')
	thumb_url = info.get('thumb_url')
	desc = 'Custom Text:'+title
	video_section = get_sitemap_section(thumb_url,title,desc,embed_video_url)
	song_file.write(video_section)

	song_file.write('</url>')
	line = fp.readline()
	cnt += 1

	song_file.close()