Add write-image option to bulk-download command

This commit is contained in:
Babibubebon 2023-04-03 04:16:28 +09:00
parent c16282aa28
commit d7fa51f7d5
Signed by: Babibubebon
GPG key ID: 78C8FB2A2FEA1EE3
3 changed files with 43 additions and 13 deletions

View file

@ -5,13 +5,20 @@ import requests
from . import ISDNRecord, __version__
from .parser import ISDNJpXMLParser
ISDN_API_ENDPOINT = "https://isdn.jp/xml/"
ISDN_XML_ENDPOINT = "https://isdn.jp/xml/{isdn}"
ISDN_IMAGE_ENDPOINT = "https://isdn.jp/images/thumbs/{isdn}.png"
ISDN_SITEMAP = "https://isdn.jp/sitemap.xml"
class ISDNClient:
def __init__(self, endpoint_url: str = ISDN_API_ENDPOINT, sitemap_url: str = ISDN_SITEMAP):
self.endpoint_url = endpoint_url
def __init__(
self,
xml_endpoint_url: str = ISDN_XML_ENDPOINT,
image_endpoint_url: str = ISDN_IMAGE_ENDPOINT,
sitemap_url: str = ISDN_SITEMAP,
):
self.xml_endpoint_url = xml_endpoint_url
self.image_endpoint_url = image_endpoint_url
self.sitemap_url = sitemap_url
self.s = requests.Session()
self.set_user_agent(f"isdn-python/{__version__}")
@ -23,17 +30,21 @@ class ISDNClient:
def normalize_isdn(isdn: str) -> str:
return isdn.replace("-", "").strip()
def _get(self, isdn: str) -> requests.Response:
r = self.s.get(self.endpoint_url + self.normalize_isdn(isdn))
def _get(self, isdn: str, endpoint_url: str) -> requests.Response:
r = self.s.get(endpoint_url.format(isdn=self.normalize_isdn(isdn)))
r.raise_for_status()
return r
def get(self, isdn: str) -> ISDNRecord:
r = self._get(isdn)
r = self._get(isdn, self.xml_endpoint_url)
return ISDNJpXMLParser.parse_record(r.content)
def get_raw(self, isdn: str) -> bytes:
r = self._get(isdn)
r = self._get(isdn, self.xml_endpoint_url)
return r.content
def get_image(self, isdn: str) -> bytes:
r = self._get(isdn, self.image_endpoint_url)
return r.content
def _list(self) -> requests.Response:

View file

@ -7,6 +7,7 @@ from requests.exceptions import HTTPError
from . import ISDN, __version__
from .client import ISDNClient
from .parser import ISDNJpXMLParser
@click.group()
@ -46,24 +47,42 @@ def list_isdns():
@cli.command(help="Download all xml record files from isdn.jp")
@click.argument("directory", type=click.Path(exists=True, file_okay=False, writable=True))
@click.option("--force", "-f", is_flag=True, help="Overwrite existing files")
@click.option("--stop-on-error", is_flag=True, help="Stops when error occurs during download")
@click.option("--stop-on-error", is_flag=True, help="Stop when error occurs during download")
@click.option("--sleep-time", "-s", type=int, default=500, help="Sleep interval for download (millisecond)")
def bulk_download(directory: str, force: bool, stop_on_error: bool, sleep_time: int):
@click.option("--write-image", is_flag=True, help="Write cover image to file")
@click.option(
"--write-image-path",
type=click.Path(exists=True, file_okay=False, writable=True),
help="Directory path to write cover images",
)
def bulk_download(
directory: str, force: bool, stop_on_error: bool, sleep_time: int, write_image: bool, write_image_path: str
):
c = ISDNClient()
with click.progressbar(list(c.list()), show_pos=True) as bar:
for isdn in bar:
path = os.path.join(directory, f"{isdn}.xml")
if not force and os.path.exists(path):
image_path = os.path.join(write_image_path or directory, f"{isdn}.png")
if not force and os.path.exists(path) and (not write_image or write_image and os.path.exists(image_path)):
continue
try:
res = c.get_raw(isdn)
with open(path, "wb") as out:
out.write(res)
if write_image:
record = ISDNJpXMLParser.parse_record(res)
if record.sample_image_uri:
img = c.get_image(isdn)
with open(image_path, "wb") as out:
out.write(img)
except HTTPError as err:
if stop_on_error:
raise err
else:
continue
with open(path, "wb") as out:
out.write(res)
time.sleep(sleep_time / 1000)

View file

@ -53,7 +53,7 @@ class ISDNJpXMLParser:
for event, elm in etree.iterparse(
file, events=("end",), tag=[f"{{{namespaces['sitemap']}}}loc"], remove_blank_text=True
):
m = re.match("https://isdn.jp/(\d{13})", elm.text)
m = re.match(r"https://isdn.jp/(\d{13})", elm.text)
if not m:
continue
yield m.group(1)