• 0 Posts
  • 4 Comments
Joined 1 month ago
cake
Cake day: January 13th, 2025

help-circle
  • m_f@lemm.eetoCalvin and Hobbes@lemmy.world15 February 1988
    link
    fedilink
    English
    arrow-up
    11
    ·
    5 days ago

    Neat! You should definitely post then, it’s better if I’m not the single-point-of-failure for comics posting. I’ll post my still-somewhat-wip script for posting the Peanuts comic, which uses GoComic and probably has a lot of overlap with C&H.

    I run python post_stuff.py, which imports other scripts for each comic and runs them. get_lemmy.py handles caching auth credentials for up to several days (I’m sure it could go longer but I haven’t checked the docs yet). Caching the creds and running it all from a wrapper entry point keeps the number of logins down, which I was getting rate limited on. It doesn’t currently handle accounts with MFA enabled, but it should be pretty resilient to stuff like file upload failing.

    You can just ignore the image munging bits and probably some other bits, that was for turning the 1x4 Peanuts strip into a 2x2 post here. You might find interesting stuff by poking around on the site, Peanuts has a transcript for each comic in the HTML, though I don’t see that for C&H.

    I’ve also been meaning to publish the git repo somewhere, hopefully this will motivate me to get around to doing that.

    pyproject.toml dependencies section
    dependencies = [
        "backoff~=2.2",
        "beautifulsoup4~=4.12",
        "pythorhead~=0.26",
        "requests~=2.32",
        "urllib3>=2.2.3",
    ]
    
    get_lemmy.py
    import json
    import logging
    import os
    from base64 import b64decode
    from datetime import datetime
    
    from pythorhead import Lemmy
    
    
    def load_token(lemmy: Lemmy):
        token_path = "./tmp/lemmy-token.txt"
    
        try:
            with open(token_path) as f:
                token = json.load(f)
    
                print("Cached token found")
                iat = json.loads(b64decode(token["token"].split(".")[1]))["iat"]
                iat = datetime.fromtimestamp(iat)
                now = datetime.now()
    
                if (now - iat).seconds > 3 * 24 * 3600:
                    print("Token has expired, ignoring")
                    raise FileNotFoundError
    
                print("Token hasn't expired, using")
                lemmy._requestor._auth.token = token["token"]
                lemmy._requestor.logged_in_username = token["logged_in_username"]
        except FileNotFoundError:
            print("Cached token not found, logging in")
            username = os.environ["LEMMY_USERNAME"]
            password = os.environ["LEMMY_PASSWORD"]
            if not lemmy.log_in(username, password):
                raise Exception("Couldn't log in!")
    
            with open(token_path, "w") as f:
                json.dump({"token": lemmy._requestor._auth.token, "logged_in_username": username}, f)
    
    
    def get_authed_lemmy() -> Lemmy:
        with open(".env") as f:
            os.environ.update(dict(line.strip().split("=") for line in f.readlines()))
    
        logging.getLogger("backoff").addHandler(logging.StreamHandler())
    
        lemmy = Lemmy(os.environ["LEMMY_DOMAIN"])
    
        load_token(lemmy)
    
        return lemmy
    
    post_peanuts.py
    import json
    import subprocess
    from dataclasses import dataclass
    from datetime import date, datetime
    from html import unescape
    from pathlib import Path
    from tempfile import TemporaryFile
    
    import backoff
    import requests
    from bs4 import BeautifulSoup
    from pythorhead import Lemmy
    from pythorhead.types import LanguageType, SortType
    
    from get_lemmy import get_authed_lemmy
    
    
    @dataclass
    class Strip:
        local_path: str
        transcript: str
        image_url: str | None = None
    
    
    def munge_image(input_image_file) -> bytes:
        crop_command = ["magick", "-", "-crop", "25%x100%", "+repage", "+adjoin", "miff:-"]
        montage_command = ["montage", "-", "-tile", "2x2", "-geometry", "+0+0", "miff:-"]
        final_command = [
            "magick",
            "-",
            "-resize",
            "200%",
            "-colors",
            "256",
            "-depth",
            "8",
            "-filter",
            "Box",
            "-sharpen",
            "0x2.0",
            "png:-",
        ]
    
        with subprocess.Popen(
            crop_command,
            stdin=input_image_file,
            stdout=subprocess.PIPE,
        ) as crop_result:
            with subprocess.Popen(
                montage_command,
                stdin=crop_result.stdout,
                stdout=subprocess.PIPE,
            ) as montage_result:
                final_bytes = subprocess.run(
                    final_command,
                    stdin=montage_result.stdout,
                    stdout=subprocess.PIPE,
                    check=True,
                )
                return final_bytes.stdout
    
    
    def ensure_downloaded(date: date) -> Strip | None:
        isodate = date.isoformat()
        formatted_date = date.strftime("%Y/%m/%d")
    
        local_path = Path("./tmp/") / f"peanuts-{isodate}.png"
        if local_path.exists():
            strip = json.loads(local_path.with_suffix(".json").read_text())
            transcript = strip["transcript"]
            print(f"Found existing file, using {local_path}")
            return Strip(local_path=str(local_path), transcript=transcript)
    
        base_url = "https://www.gocomics.com/peanuts/"
        url = f"{base_url}{formatted_date}"
    
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")
    
        picture_element = soup.find("picture", class_="item-comic-image")
        if picture_element is None:
            if date.today().weekday() == 1:
                print("No comic, expected since it's Tuesday")
                return
            else:
                raise Exception("It's not Tuesday and found no comic!")
        image_url = picture_element.find("img").get("src")
        print(f"Image URL found: {image_url}")
    
        with TemporaryFile("wb") as f:
            response = requests.get(image_url)
            response.raise_for_status()
            f.write(response.content)
            f.seek(0)
            munged_bytes = munge_image(f)
            with open(local_path, "wb") as rf:
                rf.write(munged_bytes)
    
        print(f"Comic saved as: {local_path}")
    
        description = soup.find("meta", property="og:description")
        transcript = "\n>\n".join(
            f"> {line.strip()}"
            for line in unescape(description.get("content")).replace("<BR>", "\n").splitlines()
            if line
        )
    
        local_path.with_suffix(".json").write_text(json.dumps({"transcript": transcript}))
    
        return Strip(local_path=str(local_path), transcript=transcript)
    
    
    @backoff.on_exception(
        backoff.expo,
        (requests.exceptions.RequestException, ValueError),
        max_tries=16,
        base=6,
        max_time=120,
    )
    def ensure_uploaded(lemmy, path):
        print(f"Ensuring {path=} is uploaded...")
    
        marker_path = Path(path).with_suffix(".json")
    
        if marker_path.exists():
            print(f"Reading uploaded path from existing {marker_path=}")
            metadata = json.loads(marker_path.read_text())
            if "image_url" in metadata:
                return metadata["image_url"]
        else:
            raise Exception(f"Does not exist! {marker_path=}")
    
        print(f"Didn't find {marker_path=}, uploading")
        uploaded = lemmy.image.upload(path)
        if uploaded is None:
            raise ValueError("Got `None` for `uploaded`")
        metadata["image_url"] = uploaded[0]["image_url"]
        marker_path.write_text(json.dumps(metadata))
        return metadata["image_url"]
    
    
    def ensure_posted(lemmy: Lemmy, community_id: int, date: date, strip: Strip):
        if date.weekday() == 6:
            print("It's Tuesday (i.e. Sunday in comic time), skipping post.")
            return
    
        posts = lemmy.post.list(community_id, sort=SortType.New)
        non_tribute_posts = []
        for post in posts:
            try:
                comic_date = datetime.strptime(post["post"]["name"], "%d %B %Y").date()
            except ValueError:
                # Tribute Tuesday post, probably. Ignore
                continue
            non_tribute_posts.append((comic_date, post))
    
        newest_post_date, newest_post = sorted(non_tribute_posts, key=lambda x: x[0])[-1]
        if newest_post_date == date:
            print("All caught up!")
            return
    
        print("Need to post latest comic")
    
        body = f"Transcript:\n\n{strip.transcript}"
    
        post = lemmy.post(
            community_id,
            date.strftime("%-d %B %Y"),
            url=strip.image_url,
            body=body,
            language_id=LanguageType.EN,
        )
    
        if post:
            print(post)
        else:
            raise Exception("Failed to post!")
    
    
    def main(lemmy: Lemmy):
        year_offset = date.today().year - 2024
        today = date.today().replace(year=1950 + year_offset)
    
        strip = ensure_downloaded(today)
    
        if not strip:
            print(f"Got {strip=}, it's probably Tuesday, so ignoring...")
            return
        else:
            print(strip)
    
        community_id = lemmy.discover_community("peanuts@midwest.social")
        strip.image_url = ensure_uploaded(lemmy, strip.local_path)
        ensure_posted(lemmy, community_id, today, strip)
    
    
    if __name__ == "__main__":
        main(get_authed_lemmy())
    
    post_stuff.py
    import post_oglaf
    import post_peanuts
    import post_smbc
    from get_lemmy import get_authed_lemmy
    
    
    def main():
        lemmy = get_authed_lemmy()
    
        post_peanuts.main(lemmy=lemmy)
        post_smbc.main(lemmy=lemmy)
        post_oglaf.main(lemmy=lemmy)
    
    
    if __name__ == "__main__":
        main()
    

    cc @db0@lemmy.dbzer0.com on ☝️btw, pythorhead has been super useful! You might be interested in the use case of handling creds like I do there, maybe that’s something the library could do?