# Youtube video: https://youtu.be/dwCe9l_geK4
import requests
import json
import os
import unicodedata
from bs4 import BeautifulSoup

urls = ['https://www.bigbearcoolcabins.com/big-bear-cabin-rentals/moonridge-cali-bear-cabin/', 
       'https://www.bigbearcoolcabins.com/big-bear-cabin-rentals/switzerland', 
       "https://www.bigbearcoolcabins.com/big-bear-cabin-rentals/pines"
       ]

# Remove the trailing slash if present
urls = [url.rstrip('/') for url in urls]

for url in urls:
  response = requests.get(url)
  soup = BeautifulSoup(response.text, 'html.parser')

  # Extract the content within the HTML paragraph tags
  paragraphs = soup.select('#node-vr-listing-full-group-vr-property-desc p, #node-vr-listing-full-group-vr-property-desc ul li')
  content = ''.join([paragraph.get_text() + ' ' for paragraph in paragraphs])

  # Remove special Unicode characters
  content = unicodedata.normalize('NFKD', content).encode('ascii', 'ignore').decode()

  # Extract the file name from the URL
  file_name = os.path.basename(url)

  # Create a dictionary with the data
  data = {'content': content}

  # Save the data to a .json file
  with open(file_name + '.json', 'w') as outfile:
    json.dump(data, outfile, separators=(',', ':'), indent=2)