mirror of
https://github.com/Babibubebon/gbizinfo-lod.git
synced 2024-09-22 22:54:21 +09:00
Support gzip compressed output
This commit is contained in:
parent
18970dd8aa
commit
79df6f1a90
3 changed files with 21 additions and 7 deletions
|
@ -1,4 +1,5 @@
|
||||||
import csv
|
import csv
|
||||||
|
import gzip
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import time
|
import time
|
||||||
|
@ -127,11 +128,18 @@ MAPPER_TYPES = [
|
||||||
@click.option(
|
@click.option(
|
||||||
"--format",
|
"--format",
|
||||||
"-f",
|
"-f",
|
||||||
|
"_format",
|
||||||
type=click.Choice([v.name for v in RDFFormatType]),
|
type=click.Choice([v.name for v in RDFFormatType]),
|
||||||
default=RDFFormatType.nq.name,
|
default=RDFFormatType.nq.name,
|
||||||
)
|
)
|
||||||
|
@click.option("--compress", "-c", is_flag=True)
|
||||||
def convert(
|
def convert(
|
||||||
work_dir: str, mappers: list[str], processes: int, output_dir: str, format: str
|
work_dir: str,
|
||||||
|
mappers: list[str],
|
||||||
|
processes: int,
|
||||||
|
output_dir: str,
|
||||||
|
_format: str,
|
||||||
|
compress: bool,
|
||||||
):
|
):
|
||||||
if not mappers:
|
if not mappers:
|
||||||
mappers = MAPPER_TYPES
|
mappers = MAPPER_TYPES
|
||||||
|
@ -175,11 +183,15 @@ def convert(
|
||||||
case _:
|
case _:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
output_file = os.path.join(output_dir, f"{m}.{format}")
|
output_file = os.path.join(
|
||||||
|
output_dir, f"{m}.{_format}" + (".gz" if compress else "")
|
||||||
|
)
|
||||||
click.echo(f"output: {output_file}")
|
click.echo(f"output: {output_file}")
|
||||||
click.echo(f"Running {m} mapper ...")
|
click.echo(f"Running {m} mapper ...")
|
||||||
with open(output_file, "w") as f:
|
|
||||||
mapper.run(n_jobs=processes, output=f, format=RDFFormatType[format])
|
f = gzip.open(output_file, "wt") if compress else open(output_file, "w")
|
||||||
|
mapper.run(n_jobs=processes, output=f, format=RDFFormatType[_format])
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
|
||||||
@cli.command(help="Fetch CSV data from OutputCSV endpoint")
|
@cli.command(help="Fetch CSV data from OutputCSV endpoint")
|
||||||
|
|
|
@ -1,16 +1,17 @@
|
||||||
import csv
|
import csv
|
||||||
import sys
|
import sys
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import IO, Iterator, Tuple, Union
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
from typing import IO, Iterator, Tuple, Union
|
||||||
|
|
||||||
from joblib import Parallel, delayed
|
from joblib import Parallel, delayed
|
||||||
from rdflib import BNode, URIRef
|
from rdflib import BNode
|
||||||
from rdflib import Literal as LiteralRdflib
|
from rdflib import Literal as LiteralRdflib
|
||||||
|
from rdflib import URIRef
|
||||||
from rdflib.graph import _ObjectType, _PredicateType, _SubjectType, _TripleType
|
from rdflib.graph import _ObjectType, _PredicateType, _SubjectType, _TripleType
|
||||||
from rdflib.namespace import RDF
|
from rdflib.namespace import RDF
|
||||||
from rdflib.plugins.serializers.nt import _nt_row
|
|
||||||
from rdflib.plugins.serializers.nquads import _nq_row
|
from rdflib.plugins.serializers.nquads import _nq_row
|
||||||
|
from rdflib.plugins.serializers.nt import _nt_row
|
||||||
|
|
||||||
_TripleMapType = Tuple[
|
_TripleMapType = Tuple[
|
||||||
_SubjectType, _PredicateType, Union[str, _ObjectType, "BlankPredicateObjectMap"]
|
_SubjectType, _PredicateType, Union[str, _ObjectType, "BlankPredicateObjectMap"]
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
from rdflib import URIRef
|
from rdflib import URIRef
|
||||||
|
|
||||||
from ..namespace import *
|
from ..namespace import *
|
||||||
from . import _TripleMapType
|
from . import _TripleMapType
|
||||||
from ._katsudo import GbizInfoKatsudoMapper
|
from ._katsudo import GbizInfoKatsudoMapper
|
||||||
|
|
Loading…
Reference in a new issue