#!/usr/bin/env python # Requires Python > 3.5.2 import aiohttp import asyncio from wallabag_api.wallabag import Wallabag # If you use wallabag.it this is: https://app.wallabag.it my_host = 'MY_HOST' # Default get_entries query fetches just 30 entries. I guess you can use any big enough number here # to just make one request to get all entries. ITEMS_PER_PAGE = 2137 async def main(loop): # IMPORTANT: Define callback url when you create client id and secret or you'll get HTTP 400 getting the token params = {'username': 'MY_USERNAME', 'password': 'MY_PASSWORD', 'client_id': 'CLIENT_ID', 'client_secret': 'CLIENT_SECRET'} duplicates = {} urls_to_delete = [] # get a new token token = await Wallabag.get_token(host=my_host, **params) # initializing async with aiohttp.ClientSession(loop=loop) as session: wall = Wallabag(host=my_host, client_secret=params.get('client_secret'), client_id=params.get('client_id'), token=token, aio_sess=session) # get all the articles my_wallabag = await wall.get_entries(perPage=ITEMS_PER_PAGE) all_article = my_wallabag['_embedded']['items'] # create duplicates dict for article in all_article: if article['url'] not in duplicates: duplicates[article['url']] = [] duplicates[article['url']].append(article['id']) # Delete duplicates for url, articles in duplicates.items(): n = len(articles) if n > 1: for article_id in articles[1:]: d = await wall.delete_entries(article_id) print("Done") if __name__ == '__main__': loop = asyncio.get_event_loop() loop.run_until_complete(main(loop))