1
0
Fork 0
mirror of https://github.com/Eggbertx/gochan.git synced 2025-08-27 11:26:22 -07:00

Add option to move orphans to a backup directory instead of deleting them

This commit is contained in:
Eggbertx 2025-04-05 20:41:08 -07:00
parent 739fdcd3fc
commit d0dbf82574

View file

@ -3,17 +3,25 @@
from argparse import ArgumentParser
from os import path
import os
import glob
from glob import glob
from shutil import move
"""
Searches for and deletes orphaned files that don't appear to be attached to a post but weren't deleted
"""
def delete_orphans(board:str, dry_run:bool=False, thread_subdir:str="res", thumb_subdir:str="thumb", upload_subdir:str="src"):
def delete_orphans(board:str, dry_run:bool=False, backup_dir:str="", thread_subdir:str="res", thumb_subdir:str="thumb", upload_subdir:str="src"):
board_path = path.abspath(board)
if not path.exists(board) or not path.isdir(board):
raise FileNotFoundError(f"Board directory '{board_path}' does not exist or is not a directory")
use_backup = backup_dir != "" and backup_dir is not None
if use_backup:
if not path.exists(backup_dir):
os.mkdir(backup_dir)
elif not path.isdir(backup_dir):
raise FileNotFoundError(f"Backup directory '{backup_dir}' already exists but is not a directory")
print(f"Checking for orphaned files in {board_path} (dry run: {dry_run})")
res_path = path.abspath(path.join(board, thread_subdir))
src_path = path.abspath(path.join(board, upload_subdir))
@ -27,27 +35,31 @@ def delete_orphans(board:str, dry_run:bool=False, thread_subdir:str="res", thumb
# load all HTML files in res_path into a single string, not as efficient as parsing and storing thread info but more portable
thread_data = ""
for root, _, files in os.walk(res_path):
for file in files:
with open(path.join(root, file), "r", encoding="utf-8") as f:
thread_data += f.read()
for file in glob(path.join(res_path, "*.html")):
with open(file, "r", encoding="utf-8") as f:
thread_data += f.read()
for root, _, files in os.walk(src_path):
for file in files:
if not file in thread_data:
file_path = path.join(src_path, file)
print(f"Deleting {file_path}")
if not dry_run:
os.remove(file_path)
remove_orphan(file_path, backup_dir, dry_run)
for root, _, files in os.walk(thumb_path):
for file in files:
if not file in thread_data:
file_path = path.join(thumb_path, file)
print(f"Deleting {file_path}")
if not dry_run:
os.remove(file_path)
remove_orphan(file_path, backup_dir, dry_run)
def remove_orphan(file_path:str, backup_dir:str, dry_run:bool=False):
if backup_dir != None and backup_dir != "":
backup_path = path.join(backup_dir, path.basename(file_path))
print(f"Backing up {file_path} to {backup_path}")
if not dry_run:
move(file_path, backup_path)
else:
print(f"Deleting {file_path}")
if not dry_run:
os.remove(file_path)
if __name__ == "__main__":
parser = ArgumentParser(description="Delete orphaned files (files not associated with a post) in the specified board directory.")
@ -69,6 +81,9 @@ if __name__ == "__main__":
type=str,
default="src",
help="The subdirectory in the board directory where uploads are stored (default: 'src')")
parser.add_argument("--backup-dir",
type=str,
help="If set, the directory to back up files to isntead of deleting them")
args = parser.parse_args()
for board in args.boards:
delete_orphans(board, args.dry_run)
delete_orphans(board, args.dry_run, args.backup_dir, args.thread_subdir, args.thumb_subdir, args.upload_subdir)