From 29172a729e713ddf394caad9e114b103ec8e1638 Mon Sep 17 00:00:00 2001 From: Aleteoryx Date: Sun, 5 Oct 2025 21:50:29 -0400 Subject: [PATCH] add items.txt --- README.md | 8 ++++---- bcdl.py | 29 +++++++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 4fa2e88ce057c502ff8f492bf41bffc2042ac2a1..f29dcca695010b3685b8f31e8db8a144fc482a23 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Not Shit Bandcamp collection downloader. This tool automatically downloads your entire bandcamp collection, in flac quality, to a folder. -If it fails or crashes, the download can be resumed. +If it fails or crashes, the download can be restarted. This is NOT a piracy tool. It uses your Bandcamp token to get the download links for music you have purchased. @@ -19,9 +19,9 @@ Download the script [here](https://git.amehut.dev/~aleteoryx/bcdl/blob/master/bc ## usage -Run the single `bcdl.py` script with no arguments. +Run the single `bcdl.py` script with no arguments. Follow the instructions. The './bcdl/' folder is currently hardcoded, and you should download the script to a device with a lot of free space. If './bcdl/' is symlinked to a directory, bcdl will use the symlinked path. -If you do not want to keep every downloaded file in the './bcdl/' directory, replace each file with an empty one. -bcdl does not check the contents of files, only filenames. +'items.txt' is used to log previous downloads. To redo a download for any reason, remove the line +with the name of the item in question. diff --git a/bcdl.py b/bcdl.py index e0f0a05d114fc63f543ef1775318017623c4fc3f..8ebb71c7342b1560511b928a738967ae18fadf6f 100644 --- a/bcdl.py +++ b/bcdl.py @@ -22,6 +22,7 @@ import sys import time import os import math +from datetime import datetime failed = 0 @@ -132,15 +133,38 @@ else: print('Creating directory...') p.mkdir() + +if pathlib.Path('./bcdl/items.txt').exists(): + with open('./bcdl/items.txt', 'rt') as fp: + blacklist = set(line.split(':', 1)[0] for line in fp.readlines() if line[0] != '#') + logfile = open('./bcdl/items.txt', 'at') +else: + blacklist = set() + logfile = open('./bcdl/items.txt', 'wt') + print('# bcdl download log. lines are of the form ID:TITLE, but only ID is checked', file=logfile) + +print(f'# bcdl.py started at {datetime.now().isoformat()}', file=logfile) + + for i,item in enumerate(items): i += 1 + dlid = f'{item["sale_item_type"]}{item["sale_item_id"]}' testglob = clean_filename(f'{item["band_name"]} - {item["item_title"]}')+'.*' - if len([*p.glob(testglob)]): + + skip = False + if dlid in blacklist: + skip = True + elif len([*p.glob(testglob)]): + print(f'{dlid}:{item["band_name"]} - {item["item_title"]}', file=logfile) + skip = True + + if skip: print(f'Already have {i}/{len(items)}...', end="\r", flush=True) skipped += 1 continue - url = download_urls[f'{item["sale_item_type"]}{item["sale_item_id"]}'] + + url = download_urls[dlid] doc = bs4.BeautifulSoup(requests.get(url).text, 'html.parser') pagedata = json.loads(doc.find(id='pagedata')['data-blob']) for n,dlitem in enumerate(pagedata['download_items']): @@ -171,6 +195,7 @@ for i,item in enumerate(items): print(f'{i}/{len(items)}: {filename}') download_bc_file(download, filepath) + print(f'{dlid}:{item["band_name"]} - {item["item_title"]}', file=logfile) nlen = str(int( max(