From a789b4081c67a898930b64b447bd53d36e194579 Mon Sep 17 00:00:00 2001 From: Aleteoryx Date: Tue, 29 Jul 2025 16:38:00 -0400 Subject: [PATCH] initial --- COPYING | 13 ++++ README.md | 27 ++++++++ bcdl.py | 199 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 239 insertions(+) create mode 100644 COPYING create mode 100644 README.md create mode 100644 bcdl.py diff --git a/COPYING b/COPYING new file mode 100644 index 0000000000000000000000000000000000000000..b8ca881d06291d9bdc1abcee2738aa1d783ac8a4 --- /dev/null +++ b/COPYING @@ -0,0 +1,13 @@ +Copyright (C) 2025 by Aleteoryx + +Permission to use, copy, modify, and/or distribute this software for +any purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL +WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR +BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES +OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4fa2e88ce057c502ff8f492bf41bffc2042ac2a1 --- /dev/null +++ b/README.md @@ -0,0 +1,27 @@ +# bcdl + +Not Shit Bandcamp collection downloader. + +This tool automatically downloads your entire bandcamp collection, in flac quality, to a folder. +If it fails or crashes, the download can be resumed. + +This is NOT a piracy tool. +It uses your Bandcamp token to get the download links for music you have purchased. +I pinky-promise to not send your token to evilserver.c2.virus.aleteoryx.me. + +*** + +bcdl is written in Python. It requires the `bs4` and `requests` libraries. +On Linux, your distro should package these as `python-XYZ` or `python3-XYZ`. +On MacOS or Windows, IDK, google around. + +Download the script [here](https://git.amehut.dev/~aleteoryx/bcdl/blob/master/bcdl.py). + +## usage + +Run the single `bcdl.py` script with no arguments. +The './bcdl/' folder is currently hardcoded, and you should download the script to a device with a lot of free space. +If './bcdl/' is symlinked to a directory, bcdl will use the symlinked path. + +If you do not want to keep every downloaded file in the './bcdl/' directory, replace each file with an empty one. +bcdl does not check the contents of files, only filenames. diff --git a/bcdl.py b/bcdl.py new file mode 100644 index 0000000000000000000000000000000000000000..aa5a37082836daf72e73e25540f9515246ed34b3 --- /dev/null +++ b/bcdl.py @@ -0,0 +1,199 @@ +''' +Copyright (C) 2025 by Aleteoryx + +Permission to use, copy, modify, and/or distribute this software for +any purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL +WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR +BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES +OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS +SOFTWARE. +''' + +import bs4 +import requests +import json +import pathlib +import sys +import time +import os +import math + + +failed = 0 +skipped = 0 +downloaded = 0 +transfer = 0 + +def download_bc_file(url, path): + global failed,transfer,downloaded + url = url.replace('/download/', '/statdownload/') + '&.vrs=1' + + for i in range(6): + if i != 0: + print('Download not ready, sleeping 5 seconds...') + time.sleep(5) + else: + time.sleep(1) + + res = requests.get(url, headers={'Accept':'application/json','Cookie':cookie}).json() + if res['result'] == 'ok': + download = res['download_url'] + break + else: + print('Skipping download: timed out!') + if res['result'] == 'err' and 'errortype' in res: + print(f'Last error: {res["errortype"]}') + failed += 1 + return + + print('Starting download...') + try: + with open(path, 'wb') as output: + res = requests.get(download, stream=True) + length = int(res.headers['content-length']) + progress = 0 + for chunk in res.iter_content(32768): + progress += len(chunk) + print(f'{progress/length:7.2%} of {length:14,d} bytes downloaded...', end='\r', flush=True) + output.write(chunk) + transfer += length + except KeyboardInterrupt as e: + path.unlink() + print('\nExiting!') + sys.exit(1) + except Exception as e: + path.unlink() + raise e + + print(f"100.00% of {length:14,d} bytes downloaded...") + downloaded += 1 + +def clean_filename(filename): + return filename \ + .replace('/','_').replace('\\','_').replace('*','_') \ + .replace('"','_').replace('<','_').replace('>','_') \ + .replace(':','_').replace('|','_').replace('?', '_')\ + [:os.pathconf('.','PC_NAME_MAX')-5] # file extension + +print(""" +First, login to Bandcamp in a web browser, then open devtools. Open the +"network" tab, and click on the first row with "document" in the +initiator column. In the sidebar that opens, scroll down to +"request headers". + +Find the line starting with "Cookie", and copy everything after the +colon. Then, paste it here.""".strip()) +cookie = input('$ ') + +user = input('\nWhat is your bandcamp username? ') + + +print("\nAttempting to get collection download URLs...") +headers = {'Cookie': cookie, 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0'} + +res = requests.get(f'https://bandcamp.com/{user}', headers=headers) +doc = bs4.BeautifulSoup(res.text, 'html.parser') +pagedata = json.loads(doc.find(id='pagedata')['data-blob']) + +items = [*pagedata['item_cache']['collection'].values()] +download_urls = pagedata['collection_data']['redownload_urls'] +download_token = pagedata['collection_data']['last_token'] +fan_id = pagedata['fan_data']['fan_id'] +collection_count = pagedata['collection_count'] + +del res +del doc +del pagedata + + +res = requests.post( + 'https://bandcamp.com/api/fancollection/1/collection_items', + json={'fan_id':fan_id,'count':collection_count,'older_than_token':download_token}, + headers=headers).json() +download_urls = {**download_urls, **res['redownload_urls']} +items += res['items'] +items = [*filter(lambda x: (f'{x["sale_item_type"]}{x["sale_item_id"]}' in download_urls), items)] + +del res + + +print(f'Downloading {len(items)} albums to ./bcdl/...') +p = pathlib.Path('./bcdl') +if p.exists(): + if not p.is_dir() and not p.is_symlink(): + print('Error: ./bcdl already exists, but it\'s not a directory!') + sys.exit(-1) +else: + print('Creating directory...') + p.mkdir() + +for i,item in enumerate(items): + i += 1 + testglob = clean_filename(f'{item["band_name"]} - {item["item_title"]}')+'.*' + if len([*p.glob(testglob)]): + print(f'Already have {i}/{len(items)}...', end="\r", flush=True) + skipped += 1 + continue + + url = download_urls[f'{item["sale_item_type"]}{item["sale_item_id"]}'] + doc = bs4.BeautifulSoup(requests.get(url).text, 'html.parser') + pagedata = json.loads(doc.find(id='pagedata')['data-blob']) + for n,dlitem in enumerate(pagedata['download_items']): + if dlitem['download_type'] == 'a': + filename = clean_filename(f'{dlitem["artist"]} - {dlitem["title"]}')+'.zip' + for codec in ['flac','alac','aiff-lossless','wav','mp3-320']: + if codec not in dlitem['downloads']: + continue + download = dlitem['downloads'][codec]['url'] + break + elif dlitem['download_type'] == 't': + filename = clean_filename(f'{dlitem["artist"]} - {dlitem["title"]}')+'.' + for codec in ['flac','alac','aiff-lossless','wav','mp3-320']: + if codec not in dlitem['downloads']: + continue + download = dlitem['downloads'][codec]['url'] + filename += codec.split('-')[0] + break + else: + print(f'Skipping {dlitem["artist"]} - {dlitem["title"]}: unknown type {item["download_type"]}') + skipped += 1 + continue + filepath = p.joinpath(filename) + if filepath.exists(): + print(f'Already have {i}/{len(items)}...', end="\r", flush=True) + skipped += 1 + continue + + print(f'{i}/{len(items)}: {filename}') + download_bc_file(download, filepath) + +nlen = str(int( + max( + math.log10(downloaded+0.1), + math.log10(skipped+0.1), + math.log10(failed+0.1)) + )+1) +fmt = '{n:'+nlen+'} items {v}.' + +print('\nFinished!\n') +if transfer: + print(f'{transfer:,d} bytes transferred.') +if downloaded: + print(fmt.format(n=downloaded,v='downloaded')) +if skipped: + print(fmt.format(n=skipped,v='skipped')) +if failed: + print(fmt.format(n=failed,v='failed')) + +if failed == 0: + print('Full collection downloaded!') +else: + print('{(len(items)-failed)/len(items):.02%} of collection downloaded.') + print('Try rerunning the script! If issues persist, contact the dev.') + print('Make sure to include the errors printed above.') +