mirror of
https://github.com/Babibubebon/gbizinfo-lod.git
synced 2024-09-22 14:50:52 +09:00
Support gzip compressed output
This commit is contained in:
parent
18970dd8aa
commit
79df6f1a90
3 changed files with 21 additions and 7 deletions
|
@ -1,4 +1,5 @@
|
|||
import csv
|
||||
import gzip
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
|
@ -127,11 +128,18 @@ MAPPER_TYPES = [
|
|||
@click.option(
|
||||
"--format",
|
||||
"-f",
|
||||
"_format",
|
||||
type=click.Choice([v.name for v in RDFFormatType]),
|
||||
default=RDFFormatType.nq.name,
|
||||
)
|
||||
@click.option("--compress", "-c", is_flag=True)
|
||||
def convert(
|
||||
work_dir: str, mappers: list[str], processes: int, output_dir: str, format: str
|
||||
work_dir: str,
|
||||
mappers: list[str],
|
||||
processes: int,
|
||||
output_dir: str,
|
||||
_format: str,
|
||||
compress: bool,
|
||||
):
|
||||
if not mappers:
|
||||
mappers = MAPPER_TYPES
|
||||
|
@ -175,11 +183,15 @@ def convert(
|
|||
case _:
|
||||
raise NotImplementedError
|
||||
|
||||
output_file = os.path.join(output_dir, f"{m}.{format}")
|
||||
output_file = os.path.join(
|
||||
output_dir, f"{m}.{_format}" + (".gz" if compress else "")
|
||||
)
|
||||
click.echo(f"output: {output_file}")
|
||||
click.echo(f"Running {m} mapper ...")
|
||||
with open(output_file, "w") as f:
|
||||
mapper.run(n_jobs=processes, output=f, format=RDFFormatType[format])
|
||||
|
||||
f = gzip.open(output_file, "wt") if compress else open(output_file, "w")
|
||||
mapper.run(n_jobs=processes, output=f, format=RDFFormatType[_format])
|
||||
f.close()
|
||||
|
||||
|
||||
@cli.command(help="Fetch CSV data from OutputCSV endpoint")
|
||||
|
|
|
@ -1,16 +1,17 @@
|
|||
import csv
|
||||
import sys
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import IO, Iterator, Tuple, Union
|
||||
from enum import Enum
|
||||
from typing import IO, Iterator, Tuple, Union
|
||||
|
||||
from joblib import Parallel, delayed
|
||||
from rdflib import BNode, URIRef
|
||||
from rdflib import BNode
|
||||
from rdflib import Literal as LiteralRdflib
|
||||
from rdflib import URIRef
|
||||
from rdflib.graph import _ObjectType, _PredicateType, _SubjectType, _TripleType
|
||||
from rdflib.namespace import RDF
|
||||
from rdflib.plugins.serializers.nt import _nt_row
|
||||
from rdflib.plugins.serializers.nquads import _nq_row
|
||||
from rdflib.plugins.serializers.nt import _nt_row
|
||||
|
||||
_TripleMapType = Tuple[
|
||||
_SubjectType, _PredicateType, Union[str, _ObjectType, "BlankPredicateObjectMap"]
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
from rdflib import URIRef
|
||||
|
||||
from ..namespace import *
|
||||
from . import _TripleMapType
|
||||
from ._katsudo import GbizInfoKatsudoMapper
|
||||
|
|
Loading…
Reference in a new issue