mirror of
https://github.com/Babibubebon/isdn-python.git
synced 2024-09-22 17:24:20 +09:00
Add write-image option to bulk-download command
This commit is contained in:
parent
c16282aa28
commit
d7fa51f7d5
3 changed files with 43 additions and 13 deletions
|
@ -5,13 +5,20 @@ import requests
|
|||
from . import ISDNRecord, __version__
|
||||
from .parser import ISDNJpXMLParser
|
||||
|
||||
ISDN_API_ENDPOINT = "https://isdn.jp/xml/"
|
||||
ISDN_XML_ENDPOINT = "https://isdn.jp/xml/{isdn}"
|
||||
ISDN_IMAGE_ENDPOINT = "https://isdn.jp/images/thumbs/{isdn}.png"
|
||||
ISDN_SITEMAP = "https://isdn.jp/sitemap.xml"
|
||||
|
||||
|
||||
class ISDNClient:
|
||||
def __init__(self, endpoint_url: str = ISDN_API_ENDPOINT, sitemap_url: str = ISDN_SITEMAP):
|
||||
self.endpoint_url = endpoint_url
|
||||
def __init__(
|
||||
self,
|
||||
xml_endpoint_url: str = ISDN_XML_ENDPOINT,
|
||||
image_endpoint_url: str = ISDN_IMAGE_ENDPOINT,
|
||||
sitemap_url: str = ISDN_SITEMAP,
|
||||
):
|
||||
self.xml_endpoint_url = xml_endpoint_url
|
||||
self.image_endpoint_url = image_endpoint_url
|
||||
self.sitemap_url = sitemap_url
|
||||
self.s = requests.Session()
|
||||
self.set_user_agent(f"isdn-python/{__version__}")
|
||||
|
@ -23,17 +30,21 @@ class ISDNClient:
|
|||
def normalize_isdn(isdn: str) -> str:
|
||||
return isdn.replace("-", "").strip()
|
||||
|
||||
def _get(self, isdn: str) -> requests.Response:
|
||||
r = self.s.get(self.endpoint_url + self.normalize_isdn(isdn))
|
||||
def _get(self, isdn: str, endpoint_url: str) -> requests.Response:
|
||||
r = self.s.get(endpoint_url.format(isdn=self.normalize_isdn(isdn)))
|
||||
r.raise_for_status()
|
||||
return r
|
||||
|
||||
def get(self, isdn: str) -> ISDNRecord:
|
||||
r = self._get(isdn)
|
||||
r = self._get(isdn, self.xml_endpoint_url)
|
||||
return ISDNJpXMLParser.parse_record(r.content)
|
||||
|
||||
def get_raw(self, isdn: str) -> bytes:
|
||||
r = self._get(isdn)
|
||||
r = self._get(isdn, self.xml_endpoint_url)
|
||||
return r.content
|
||||
|
||||
def get_image(self, isdn: str) -> bytes:
|
||||
r = self._get(isdn, self.image_endpoint_url)
|
||||
return r.content
|
||||
|
||||
def _list(self) -> requests.Response:
|
||||
|
|
|
@ -7,6 +7,7 @@ from requests.exceptions import HTTPError
|
|||
|
||||
from . import ISDN, __version__
|
||||
from .client import ISDNClient
|
||||
from .parser import ISDNJpXMLParser
|
||||
|
||||
|
||||
@click.group()
|
||||
|
@ -46,24 +47,42 @@ def list_isdns():
|
|||
@cli.command(help="Download all xml record files from isdn.jp")
|
||||
@click.argument("directory", type=click.Path(exists=True, file_okay=False, writable=True))
|
||||
@click.option("--force", "-f", is_flag=True, help="Overwrite existing files")
|
||||
@click.option("--stop-on-error", is_flag=True, help="Stops when error occurs during download")
|
||||
@click.option("--stop-on-error", is_flag=True, help="Stop when error occurs during download")
|
||||
@click.option("--sleep-time", "-s", type=int, default=500, help="Sleep interval for download (millisecond)")
|
||||
def bulk_download(directory: str, force: bool, stop_on_error: bool, sleep_time: int):
|
||||
@click.option("--write-image", is_flag=True, help="Write cover image to file")
|
||||
@click.option(
|
||||
"--write-image-path",
|
||||
type=click.Path(exists=True, file_okay=False, writable=True),
|
||||
help="Directory path to write cover images",
|
||||
)
|
||||
def bulk_download(
|
||||
directory: str, force: bool, stop_on_error: bool, sleep_time: int, write_image: bool, write_image_path: str
|
||||
):
|
||||
c = ISDNClient()
|
||||
with click.progressbar(list(c.list()), show_pos=True) as bar:
|
||||
for isdn in bar:
|
||||
path = os.path.join(directory, f"{isdn}.xml")
|
||||
if not force and os.path.exists(path):
|
||||
image_path = os.path.join(write_image_path or directory, f"{isdn}.png")
|
||||
if not force and os.path.exists(path) and (not write_image or write_image and os.path.exists(image_path)):
|
||||
continue
|
||||
|
||||
try:
|
||||
res = c.get_raw(isdn)
|
||||
with open(path, "wb") as out:
|
||||
out.write(res)
|
||||
|
||||
if write_image:
|
||||
record = ISDNJpXMLParser.parse_record(res)
|
||||
if record.sample_image_uri:
|
||||
img = c.get_image(isdn)
|
||||
with open(image_path, "wb") as out:
|
||||
out.write(img)
|
||||
except HTTPError as err:
|
||||
if stop_on_error:
|
||||
raise err
|
||||
else:
|
||||
continue
|
||||
with open(path, "wb") as out:
|
||||
out.write(res)
|
||||
|
||||
time.sleep(sleep_time / 1000)
|
||||
|
||||
|
||||
|
|
|
@ -53,7 +53,7 @@ class ISDNJpXMLParser:
|
|||
for event, elm in etree.iterparse(
|
||||
file, events=("end",), tag=[f"{{{namespaces['sitemap']}}}loc"], remove_blank_text=True
|
||||
):
|
||||
m = re.match("https://isdn.jp/(\d{13})", elm.text)
|
||||
m = re.match(r"https://isdn.jp/(\d{13})", elm.text)
|
||||
if not m:
|
||||
continue
|
||||
yield m.group(1)
|
||||
|
|
Loading…
Reference in a new issue