Module `library.validator`

Expand source code

from functools import cached_property
from typing import List, Literal

import yaml
from pydantic import BaseModel, ValidationError

VALID_ACL_VALUES = ("public-read", "private")
VALID_GEOMETRY_TYPES = (
    "NONE",
    "GEOMETRY",
    "POINT",
    "LINESTRING",
    "POLYGON",
    "GEOMETRYCOLLECTION",
    "MULTIPOINT",
    "MULTIPOLYGON",
    "MULTILINESTRING",
    "CIRCULARSTRING",
    "COMPOUNDCURVE",
    "CURVEPOLYGON",
    "MULTICURVE",
)
VALID_SOCRATA_FORMATS = ("csv", "geojson")

# Create schema
class GeometryType(BaseModel):
    SRS: str
    type: Literal[VALID_GEOMETRY_TYPES]


class Url(BaseModel):
    path: str  # Specify field name and data type
    subpath: str = ""  # Set default value


class Socrata(BaseModel):
    uid: str
    format: Literal[
        VALID_SOCRATA_FORMATS
    ]  # Use Literal[tuple(dtype)] to define specific, valid values


class SourceSection(BaseModel):
    url: Url = None  # Pass another schema as a data type
    socrata: Socrata = None
    script: str = None
    geometry: GeometryType
    options: List[str] = []  # Use List[dtype] for a list field value


class DestinationSection(BaseModel):
    name: str
    geometry: GeometryType
    options: List[str] = []
    fields: List[str] = []
    sql: str = None


class InfoSection(BaseModel):
    info: str = None
    url: str = None
    dependents: List[str] = None


class Dataset(BaseModel):
    name: str
    version: str
    acl: Literal[VALID_ACL_VALUES]
    source: SourceSection
    destination: DestinationSection
    info: InfoSection = None


class Validator:
    """
    Validator takes as input the path of a configuration file
    and will run the necessary checks to determine wether the structure
    and values of the files are valid according to the requirements of
    the library.
    """

    def __init__(self, path):
        # Abort if file path is not valid
        if not self.__check_extension(path):
            raise Exception("File path must point to a .yml or .yaml file")

        self.path = path
        self.fname = path.split("/")[-1].split(".")[0]

    def __check_extension(self, path):
        # Check if path ends with a .yml file
        extension = path.split("/")[-1].split(".")[-1]
        return extension in ["yml", "yaml"]

    @cached_property
    def __file(self):
        with open(self.path, "r") as stream:
            y = yaml.load(stream, Loader=yaml.FullLoader)
            return y

    def __call__(self):
        assert self.tree_is_valid, "Some fields are not valid. Please review your file"
        assert (
            self.dataset_name_matches
        ), "Dataset name must match file and destination name"
        assert (
            self.has_only_one_source
        ), "Source can only have one property from either url, socrata or script"

        return True

    @property
    def tree_is_valid(self) -> bool:
        if self.__file["dataset"] == None:
            return False

        try:
            input_ds = Dataset(**self.__file["dataset"])

        except ValidationError as e:
            print(e.json())
            return False

        return True

    # Check that source name matches filename and destination
    @property
    def dataset_name_matches(self) -> bool:
        dataset = self.__file["dataset"]
        return (dataset["name"] == self.fname) and (
            dataset["name"] == dataset["destination"]["name"]
        )

    # Check that source has only one source from either url, socrata or script
    @property
    def has_only_one_source(self):
        dataset = self.__file["dataset"]
        source_fields = list(dataset["source"].keys())
        # In other words: if url is in source, socrata or script cannot be.
        # If url is NOT in source. Only one from socrata or url can be. (XOR operator ^)
        return (
            (("socrata" not in source_fields) and ("script" not in source_fields))
            if ("url" in source_fields)
            else (("socrata" in source_fields) ^ ("script" in source_fields))
        )

Classes

class Dataset (**data: Any)

Create a new model by parsing and validating input data from keyword arguments.

Raises ValidationError if the input data cannot be parsed to form a valid model.

Expand source code

class Dataset(BaseModel):
    name: str
    version: str
    acl: Literal[VALID_ACL_VALUES]
    source: SourceSection
    destination: DestinationSection
    info: InfoSection = None

Ancestors

pydantic.main.BaseModel
pydantic.utils.Representation

Class variables

var acl : Literal['public-read', 'private']
var destination : DestinationSection
var info : InfoSection
var name : str
var source : SourceSection
var version : str

class DestinationSection (**data: Any)

Create a new model by parsing and validating input data from keyword arguments.

Raises ValidationError if the input data cannot be parsed to form a valid model.

Expand source code

class DestinationSection(BaseModel):
    name: str
    geometry: GeometryType
    options: List[str] = []
    fields: List[str] = []
    sql: str = None

Ancestors

pydantic.main.BaseModel
pydantic.utils.Representation

Class variables

var fields : List[str]
var geometry : GeometryType
var name : str
var options : List[str]
var sql : str

class GeometryType (**data: Any)

Create a new model by parsing and validating input data from keyword arguments.

Raises ValidationError if the input data cannot be parsed to form a valid model.

Expand source code

class GeometryType(BaseModel):
    SRS: str
    type: Literal[VALID_GEOMETRY_TYPES]

Ancestors

pydantic.main.BaseModel
pydantic.utils.Representation

Class variables

var SRS : str
var type : Literal['NONE', 'GEOMETRY', 'POINT', 'LINESTRING', 'POLYGON', 'GEOMETRYCOLLECTION', 'MULTIPOINT', 'MULTIPOLYGON', 'MULTILINESTRING', 'CIRCULARSTRING', 'COMPOUNDCURVE', 'CURVEPOLYGON', 'MULTICURVE']

class InfoSection (**data: Any)

Create a new model by parsing and validating input data from keyword arguments.

Raises ValidationError if the input data cannot be parsed to form a valid model.

Expand source code

class InfoSection(BaseModel):
    info: str = None
    url: str = None
    dependents: List[str] = None

Ancestors

pydantic.main.BaseModel
pydantic.utils.Representation

Class variables

var dependents : List[str]
var info : str
var url : str

class Socrata (**data: Any)

Create a new model by parsing and validating input data from keyword arguments.

Raises ValidationError if the input data cannot be parsed to form a valid model.

Expand source code

class Socrata(BaseModel):
    uid: str
    format: Literal[
        VALID_SOCRATA_FORMATS
    ]  # Use Literal[tuple(dtype)] to define specific, valid values

Ancestors

pydantic.main.BaseModel
pydantic.utils.Representation

Class variables

var format : Literal['csv', 'geojson']
var uid : str

class SourceSection (**data: Any)

Create a new model by parsing and validating input data from keyword arguments.

Raises ValidationError if the input data cannot be parsed to form a valid model.

Expand source code

class SourceSection(BaseModel):
    url: Url = None  # Pass another schema as a data type
    socrata: Socrata = None
    script: str = None
    geometry: GeometryType
    options: List[str] = []  # Use List[dtype] for a list field value

Ancestors

pydantic.main.BaseModel
pydantic.utils.Representation

Class variables

var geometry : GeometryType
var options : List[str]
var script : str
var socrata : Socrata
var url : Url

class Url (**data: Any)

Create a new model by parsing and validating input data from keyword arguments.

Raises ValidationError if the input data cannot be parsed to form a valid model.

Expand source code

class Url(BaseModel):
    path: str  # Specify field name and data type
    subpath: str = ""  # Set default value

Ancestors

pydantic.main.BaseModel
pydantic.utils.Representation

Class variables

var path : str
var subpath : str

class Validator (path)

Validator takes as input the path of a configuration file and will run the necessary checks to determine wether the structure and values of the files are valid according to the requirements of the library.

Expand source code

class Validator:
    """
    Validator takes as input the path of a configuration file
    and will run the necessary checks to determine wether the structure
    and values of the files are valid according to the requirements of
    the library.
    """

    def __init__(self, path):
        # Abort if file path is not valid
        if not self.__check_extension(path):
            raise Exception("File path must point to a .yml or .yaml file")

        self.path = path
        self.fname = path.split("/")[-1].split(".")[0]

    def __check_extension(self, path):
        # Check if path ends with a .yml file
        extension = path.split("/")[-1].split(".")[-1]
        return extension in ["yml", "yaml"]

    @cached_property
    def __file(self):
        with open(self.path, "r") as stream:
            y = yaml.load(stream, Loader=yaml.FullLoader)
            return y

    def __call__(self):
        assert self.tree_is_valid, "Some fields are not valid. Please review your file"
        assert (
            self.dataset_name_matches
        ), "Dataset name must match file and destination name"
        assert (
            self.has_only_one_source
        ), "Source can only have one property from either url, socrata or script"

        return True

    @property
    def tree_is_valid(self) -> bool:
        if self.__file["dataset"] == None:
            return False

        try:
            input_ds = Dataset(**self.__file["dataset"])

        except ValidationError as e:
            print(e.json())
            return False

        return True

    # Check that source name matches filename and destination
    @property
    def dataset_name_matches(self) -> bool:
        dataset = self.__file["dataset"]
        return (dataset["name"] == self.fname) and (
            dataset["name"] == dataset["destination"]["name"]
        )

    # Check that source has only one source from either url, socrata or script
    @property
    def has_only_one_source(self):
        dataset = self.__file["dataset"]
        source_fields = list(dataset["source"].keys())
        # In other words: if url is in source, socrata or script cannot be.
        # If url is NOT in source. Only one from socrata or url can be. (XOR operator ^)
        return (
            (("socrata" not in source_fields) and ("script" not in source_fields))
            if ("url" in source_fields)
            else (("socrata" in source_fields) ^ ("script" in source_fields))
        )

Instance variables

var dataset_name_matches : bool

Expand source code

@property
def dataset_name_matches(self) -> bool:
    dataset = self.__file["dataset"]
    return (dataset["name"] == self.fname) and (
        dataset["name"] == dataset["destination"]["name"]
    )

var has_only_one_source

Expand source code

@property
def has_only_one_source(self):
    dataset = self.__file["dataset"]
    source_fields = list(dataset["source"].keys())
    # In other words: if url is in source, socrata or script cannot be.
    # If url is NOT in source. Only one from socrata or url can be. (XOR operator ^)
    return (
        (("socrata" not in source_fields) and ("script" not in source_fields))
        if ("url" in source_fields)
        else (("socrata" in source_fields) ^ ("script" in source_fields))
    )

var tree_is_valid : bool

Expand source code

@property
def tree_is_valid(self) -> bool:
    if self.__file["dataset"] == None:
        return False

    try:
        input_ds = Dataset(**self.__file["dataset"])

    except ValidationError as e:
        print(e.json())
        return False

    return True