Support gzip compressed output

This commit is contained in:
Babibubebon 2024-06-19 01:29:40 +09:00
parent 18970dd8aa
commit 79df6f1a90
Signed by: Babibubebon
GPG key ID: 78C8FB2A2FEA1EE3
3 changed files with 21 additions and 7 deletions

View file

@ -1,4 +1,5 @@
import csv
import gzip
import os
import shutil
import time
@ -127,11 +128,18 @@ MAPPER_TYPES = [
@click.option(
"--format",
"-f",
"_format",
type=click.Choice([v.name for v in RDFFormatType]),
default=RDFFormatType.nq.name,
)
@click.option("--compress", "-c", is_flag=True)
def convert(
work_dir: str, mappers: list[str], processes: int, output_dir: str, format: str
work_dir: str,
mappers: list[str],
processes: int,
output_dir: str,
_format: str,
compress: bool,
):
if not mappers:
mappers = MAPPER_TYPES
@ -175,11 +183,15 @@ def convert(
case _:
raise NotImplementedError
output_file = os.path.join(output_dir, f"{m}.{format}")
output_file = os.path.join(
output_dir, f"{m}.{_format}" + (".gz" if compress else "")
)
click.echo(f"output: {output_file}")
click.echo(f"Running {m} mapper ...")
with open(output_file, "w") as f:
mapper.run(n_jobs=processes, output=f, format=RDFFormatType[format])
f = gzip.open(output_file, "wt") if compress else open(output_file, "w")
mapper.run(n_jobs=processes, output=f, format=RDFFormatType[_format])
f.close()
@cli.command(help="Fetch CSV data from OutputCSV endpoint")

View file

@ -1,16 +1,17 @@
import csv
import sys
from abc import ABC, abstractmethod
from typing import IO, Iterator, Tuple, Union
from enum import Enum
from typing import IO, Iterator, Tuple, Union
from joblib import Parallel, delayed
from rdflib import BNode, URIRef
from rdflib import BNode
from rdflib import Literal as LiteralRdflib
from rdflib import URIRef
from rdflib.graph import _ObjectType, _PredicateType, _SubjectType, _TripleType
from rdflib.namespace import RDF
from rdflib.plugins.serializers.nt import _nt_row
from rdflib.plugins.serializers.nquads import _nq_row
from rdflib.plugins.serializers.nt import _nt_row
_TripleMapType = Tuple[
_SubjectType, _PredicateType, Union[str, _ObjectType, "BlankPredicateObjectMap"]

View file

@ -1,4 +1,5 @@
from rdflib import URIRef
from ..namespace import *
from . import _TripleMapType
from ._katsudo import GbizInfoKatsudoMapper