From a789b4081c67a898930b64b447bd53d36e194579 Mon Sep 17 00:00:00 2001
From: Aleteoryx <alyx@aleteoryx.me>
Date: Tue, 29 Jul 2025 16:38:00 -0400
Subject: [PATCH] initial

---
 COPYING   |  13 ++++
 README.md |  27 ++++++++
 bcdl.py   | 199 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 239 insertions(+)
 create mode 100644 COPYING
 create mode 100644 README.md
 create mode 100644 bcdl.py

diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000000000000000000000000000000000000..b8ca881d06291d9bdc1abcee2738aa1d783ac8a4
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,13 @@
+Copyright (C) 2025 by Aleteoryx <alyx@aleteoryx.me>
+
+Permission to use, copy, modify, and/or distribute this software for
+any purpose with or without fee is hereby granted.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR
+BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
+OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4fa2e88ce057c502ff8f492bf41bffc2042ac2a1
--- /dev/null
+++ b/README.md
@@ -0,0 +1,27 @@
+# bcdl
+
+Not Shit Bandcamp collection downloader.
+
+This tool automatically downloads your entire bandcamp collection, in flac quality, to a folder.
+If it fails or crashes, the download can be resumed.
+
+This is NOT a piracy tool.
+It uses your Bandcamp token to get the download links for music you have purchased.
+I pinky-promise to not send your token to evilserver.c2.virus.aleteoryx.me.
+
+***
+
+bcdl is written in Python. It requires the `bs4` and `requests` libraries.
+On Linux, your distro should package these as `python-XYZ` or `python3-XYZ`.
+On MacOS or Windows, IDK, google around.
+
+Download the script [here](https://git.amehut.dev/~aleteoryx/bcdl/blob/master/bcdl.py).
+
+## usage
+
+Run the single `bcdl.py` script with no arguments.
+The './bcdl/' folder is currently hardcoded, and you should download the script to a device with a lot of free space.
+If './bcdl/' is symlinked to a directory, bcdl will use the symlinked path.
+
+If you do not want to keep every downloaded file in the './bcdl/' directory, replace each file with an empty one.
+bcdl does not check the contents of files, only filenames.
diff --git a/bcdl.py b/bcdl.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa5a37082836daf72e73e25540f9515246ed34b3
--- /dev/null
+++ b/bcdl.py
@@ -0,0 +1,199 @@
+'''
+Copyright (C) 2025 by Aleteoryx <alyx@aleteoryx.me>
+
+Permission to use, copy, modify, and/or distribute this software for
+any purpose with or without fee is hereby granted.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR
+BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
+OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+'''
+
+import bs4
+import requests
+import json
+import pathlib
+import sys
+import time
+import os
+import math
+
+
+failed = 0
+skipped = 0
+downloaded = 0
+transfer = 0
+
+def download_bc_file(url, path):
+	global failed,transfer,downloaded
+	url = url.replace('/download/', '/statdownload/') + '&.vrs=1'
+	
+	for i in range(6):
+		if i != 0:
+			print('Download not ready, sleeping 5 seconds...')
+			time.sleep(5)
+		else:
+			time.sleep(1)
+
+		res = requests.get(url, headers={'Accept':'application/json','Cookie':cookie}).json()
+		if res['result'] == 'ok':
+			download = res['download_url']
+			break
+	else:
+		print('Skipping download: timed out!')
+		if res['result'] == 'err' and 'errortype' in res:
+			print(f'Last error: {res["errortype"]}')
+		failed += 1
+		return
+	
+	print('Starting download...')
+	try:
+		with open(path, 'wb') as output:
+			res = requests.get(download, stream=True)
+			length = int(res.headers['content-length'])
+			progress = 0
+			for chunk in res.iter_content(32768):
+				progress += len(chunk)
+				print(f'{progress/length:7.2%} of {length:14,d} bytes downloaded...', end='\r', flush=True)
+				output.write(chunk)
+			transfer += length
+	except KeyboardInterrupt as e:
+		path.unlink()
+		print('\nExiting!')
+		sys.exit(1)
+	except Exception as e:
+		path.unlink()
+		raise e
+
+	print(f"100.00% of {length:14,d} bytes downloaded...")
+	downloaded += 1
+
+def clean_filename(filename):
+	return filename \
+		.replace('/','_').replace('\\','_').replace('*','_') \
+		.replace('"','_').replace('<','_').replace('>','_') \
+		.replace(':','_').replace('|','_').replace('?', '_')\
+		[:os.pathconf('.','PC_NAME_MAX')-5] # file extension
+
+print("""
+First, login to Bandcamp in a web browser, then open devtools. Open the
+"network" tab, and click on the first row with "document" in the
+initiator column. In the sidebar that opens, scroll down to
+"request headers".
+
+Find the line starting with "Cookie", and copy everything after the
+colon. Then, paste it here.""".strip())
+cookie = input('$ ')
+
+user = input('\nWhat is your bandcamp username? ')
+
+
+print("\nAttempting to get collection download URLs...")
+headers = {'Cookie': cookie, 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0'}
+
+res = requests.get(f'https://bandcamp.com/{user}', headers=headers)
+doc = bs4.BeautifulSoup(res.text, 'html.parser')
+pagedata = json.loads(doc.find(id='pagedata')['data-blob'])
+
+items = [*pagedata['item_cache']['collection'].values()]
+download_urls = pagedata['collection_data']['redownload_urls']
+download_token = pagedata['collection_data']['last_token']
+fan_id = pagedata['fan_data']['fan_id']
+collection_count = pagedata['collection_count']
+
+del res
+del doc
+del pagedata
+
+
+res = requests.post(
+	'https://bandcamp.com/api/fancollection/1/collection_items',
+	json={'fan_id':fan_id,'count':collection_count,'older_than_token':download_token},
+	headers=headers).json()
+download_urls = {**download_urls, **res['redownload_urls']}
+items += res['items']
+items = [*filter(lambda x: (f'{x["sale_item_type"]}{x["sale_item_id"]}' in download_urls), items)]
+
+del res
+
+
+print(f'Downloading {len(items)} albums to ./bcdl/...')
+p = pathlib.Path('./bcdl')
+if p.exists():
+	if not p.is_dir() and not p.is_symlink():
+		print('Error: ./bcdl already exists, but it\'s not a directory!')
+		sys.exit(-1)
+else:
+	print('Creating directory...')
+	p.mkdir()
+
+for i,item in enumerate(items):
+	i += 1
+	testglob = clean_filename(f'{item["band_name"]} - {item["item_title"]}')+'.*'
+	if len([*p.glob(testglob)]):
+		print(f'Already have {i}/{len(items)}...', end="\r", flush=True)
+		skipped += 1
+		continue
+
+	url = download_urls[f'{item["sale_item_type"]}{item["sale_item_id"]}']
+	doc = bs4.BeautifulSoup(requests.get(url).text, 'html.parser')
+	pagedata = json.loads(doc.find(id='pagedata')['data-blob'])
+	for n,dlitem in enumerate(pagedata['download_items']):
+		if dlitem['download_type'] == 'a':
+			filename = clean_filename(f'{dlitem["artist"]} - {dlitem["title"]}')+'.zip'
+			for codec in ['flac','alac','aiff-lossless','wav','mp3-320']:
+				if codec not in dlitem['downloads']:
+					continue
+				download = dlitem['downloads'][codec]['url']
+				break
+		elif dlitem['download_type'] == 't':
+			filename = clean_filename(f'{dlitem["artist"]} - {dlitem["title"]}')+'.'
+			for codec in ['flac','alac','aiff-lossless','wav','mp3-320']:
+				if codec not in dlitem['downloads']:
+					continue
+				download = dlitem['downloads'][codec]['url']
+				filename += codec.split('-')[0]
+				break
+		else:
+			print(f'Skipping {dlitem["artist"]} - {dlitem["title"]}: unknown type {item["download_type"]}')
+			skipped += 1
+			continue
+		filepath = p.joinpath(filename)
+		if filepath.exists():
+			print(f'Already have {i}/{len(items)}...', end="\r", flush=True)
+			skipped += 1
+			continue
+		
+		print(f'{i}/{len(items)}: {filename}')
+		download_bc_file(download, filepath)
+
+nlen = str(int(
+	max(
+		math.log10(downloaded+0.1),
+		math.log10(skipped+0.1),
+		math.log10(failed+0.1))
+	)+1)
+fmt = '{n:'+nlen+'} items {v}.'
+
+print('\nFinished!\n')
+if transfer:
+	print(f'{transfer:,d} bytes transferred.')
+if downloaded:
+	print(fmt.format(n=downloaded,v='downloaded'))
+if skipped:
+	print(fmt.format(n=skipped,v='skipped'))
+if failed:
+	print(fmt.format(n=failed,v='failed'))
+
+if failed == 0:
+	print('Full collection downloaded!')
+else:
+	print('{(len(items)-failed)/len(items):.02%} of collection downloaded.')
+	print('Try rerunning the script! If issues persist, contact the dev.')
+	print('Make sure to include the errors printed above.')
+