Module library.validator

Expand source code
from functools import cached_property
from typing import List, Literal

import yaml
from pydantic import BaseModel, ValidationError

VALID_ACL_VALUES = ("public-read", "private")
VALID_GEOMETRY_TYPES = (
    "NONE",
    "GEOMETRY",
    "POINT",
    "LINESTRING",
    "POLYGON",
    "GEOMETRYCOLLECTION",
    "MULTIPOINT",
    "MULTIPOLYGON",
    "MULTILINESTRING",
    "CIRCULARSTRING",
    "COMPOUNDCURVE",
    "CURVEPOLYGON",
    "MULTICURVE",
)
VALID_SOCRATA_FORMATS = ("csv", "geojson")

# Create schema
class GeometryType(BaseModel):
    SRS: str
    type: Literal[VALID_GEOMETRY_TYPES]


class Url(BaseModel):
    path: str  # Specify field name and data type
    subpath: str = ""  # Set default value


class Socrata(BaseModel):
    uid: str
    format: Literal[
        VALID_SOCRATA_FORMATS
    ]  # Use Literal[tuple(dtype)] to define specific, valid values


class SourceSection(BaseModel):
    url: Url = None  # Pass another schema as a data type
    socrata: Socrata = None
    script: str = None
    geometry: GeometryType
    options: List[str] = []  # Use List[dtype] for a list field value


class DestinationSection(BaseModel):
    name: str
    geometry: GeometryType
    options: List[str] = []
    fields: List[str] = []
    sql: str = None


class InfoSection(BaseModel):
    info: str = None
    url: str = None
    dependents: List[str] = None


class Dataset(BaseModel):
    name: str
    version: str
    acl: Literal[VALID_ACL_VALUES]
    source: SourceSection
    destination: DestinationSection
    info: InfoSection = None


class Validator:
    """
    Validator takes as input the path of a configuration file
    and will run the necessary checks to determine wether the structure
    and values of the files are valid according to the requirements of
    the library.
    """

    def __init__(self, path):
        # Abort if file path is not valid
        if not self.__check_extension(path):
            raise Exception("File path must point to a .yml or .yaml file")

        self.path = path
        self.fname = path.split("/")[-1].split(".")[0]

    def __check_extension(self, path):
        # Check if path ends with a .yml file
        extension = path.split("/")[-1].split(".")[-1]
        return extension in ["yml", "yaml"]

    @cached_property
    def __file(self):
        with open(self.path, "r") as stream:
            y = yaml.load(stream, Loader=yaml.FullLoader)
            return y

    def __call__(self):
        assert self.tree_is_valid, "Some fields are not valid. Please review your file"
        assert (
            self.dataset_name_matches
        ), "Dataset name must match file and destination name"
        assert (
            self.has_only_one_source
        ), "Source can only have one property from either url, socrata or script"

        return True

    @property
    def tree_is_valid(self) -> bool:
        if self.__file["dataset"] == None:
            return False

        try:
            input_ds = Dataset(**self.__file["dataset"])

        except ValidationError as e:
            print(e.json())
            return False

        return True

    # Check that source name matches filename and destination
    @property
    def dataset_name_matches(self) -> bool:
        dataset = self.__file["dataset"]
        return (dataset["name"] == self.fname) and (
            dataset["name"] == dataset["destination"]["name"]
        )

    # Check that source has only one source from either url, socrata or script
    @property
    def has_only_one_source(self):
        dataset = self.__file["dataset"]
        source_fields = list(dataset["source"].keys())
        # In other words: if url is in source, socrata or script cannot be.
        # If url is NOT in source. Only one from socrata or url can be. (XOR operator ^)
        return (
            (("socrata" not in source_fields) and ("script" not in source_fields))
            if ("url" in source_fields)
            else (("socrata" in source_fields) ^ ("script" in source_fields))
        )

Classes

class Dataset (**data: Any)

Create a new model by parsing and validating input data from keyword arguments.

Raises ValidationError if the input data cannot be parsed to form a valid model.

Expand source code
class Dataset(BaseModel):
    name: str
    version: str
    acl: Literal[VALID_ACL_VALUES]
    source: SourceSection
    destination: DestinationSection
    info: InfoSection = None

Ancestors

  • pydantic.main.BaseModel
  • pydantic.utils.Representation

Class variables

var acl : Literal['public-read', 'private']
var destinationDestinationSection
var infoInfoSection
var name : str
var sourceSourceSection
var version : str
class DestinationSection (**data: Any)

Create a new model by parsing and validating input data from keyword arguments.

Raises ValidationError if the input data cannot be parsed to form a valid model.

Expand source code
class DestinationSection(BaseModel):
    name: str
    geometry: GeometryType
    options: List[str] = []
    fields: List[str] = []
    sql: str = None

Ancestors

  • pydantic.main.BaseModel
  • pydantic.utils.Representation

Class variables

var fields : List[str]
var geometryGeometryType
var name : str
var options : List[str]
var sql : str
class GeometryType (**data: Any)

Create a new model by parsing and validating input data from keyword arguments.

Raises ValidationError if the input data cannot be parsed to form a valid model.

Expand source code
class GeometryType(BaseModel):
    SRS: str
    type: Literal[VALID_GEOMETRY_TYPES]

Ancestors

  • pydantic.main.BaseModel
  • pydantic.utils.Representation

Class variables

var SRS : str
var type : Literal['NONE', 'GEOMETRY', 'POINT', 'LINESTRING', 'POLYGON', 'GEOMETRYCOLLECTION', 'MULTIPOINT', 'MULTIPOLYGON', 'MULTILINESTRING', 'CIRCULARSTRING', 'COMPOUNDCURVE', 'CURVEPOLYGON', 'MULTICURVE']
class InfoSection (**data: Any)

Create a new model by parsing and validating input data from keyword arguments.

Raises ValidationError if the input data cannot be parsed to form a valid model.

Expand source code
class InfoSection(BaseModel):
    info: str = None
    url: str = None
    dependents: List[str] = None

Ancestors

  • pydantic.main.BaseModel
  • pydantic.utils.Representation

Class variables

var dependents : List[str]
var info : str
var url : str
class Socrata (**data: Any)

Create a new model by parsing and validating input data from keyword arguments.

Raises ValidationError if the input data cannot be parsed to form a valid model.

Expand source code
class Socrata(BaseModel):
    uid: str
    format: Literal[
        VALID_SOCRATA_FORMATS
    ]  # Use Literal[tuple(dtype)] to define specific, valid values

Ancestors

  • pydantic.main.BaseModel
  • pydantic.utils.Representation

Class variables

var format : Literal['csv', 'geojson']
var uid : str
class SourceSection (**data: Any)

Create a new model by parsing and validating input data from keyword arguments.

Raises ValidationError if the input data cannot be parsed to form a valid model.

Expand source code
class SourceSection(BaseModel):
    url: Url = None  # Pass another schema as a data type
    socrata: Socrata = None
    script: str = None
    geometry: GeometryType
    options: List[str] = []  # Use List[dtype] for a list field value

Ancestors

  • pydantic.main.BaseModel
  • pydantic.utils.Representation

Class variables

var geometryGeometryType
var options : List[str]
var script : str
var socrataSocrata
var urlUrl
class Url (**data: Any)

Create a new model by parsing and validating input data from keyword arguments.

Raises ValidationError if the input data cannot be parsed to form a valid model.

Expand source code
class Url(BaseModel):
    path: str  # Specify field name and data type
    subpath: str = ""  # Set default value

Ancestors

  • pydantic.main.BaseModel
  • pydantic.utils.Representation

Class variables

var path : str
var subpath : str
class Validator (path)

Validator takes as input the path of a configuration file and will run the necessary checks to determine wether the structure and values of the files are valid according to the requirements of the library.

Expand source code
class Validator:
    """
    Validator takes as input the path of a configuration file
    and will run the necessary checks to determine wether the structure
    and values of the files are valid according to the requirements of
    the library.
    """

    def __init__(self, path):
        # Abort if file path is not valid
        if not self.__check_extension(path):
            raise Exception("File path must point to a .yml or .yaml file")

        self.path = path
        self.fname = path.split("/")[-1].split(".")[0]

    def __check_extension(self, path):
        # Check if path ends with a .yml file
        extension = path.split("/")[-1].split(".")[-1]
        return extension in ["yml", "yaml"]

    @cached_property
    def __file(self):
        with open(self.path, "r") as stream:
            y = yaml.load(stream, Loader=yaml.FullLoader)
            return y

    def __call__(self):
        assert self.tree_is_valid, "Some fields are not valid. Please review your file"
        assert (
            self.dataset_name_matches
        ), "Dataset name must match file and destination name"
        assert (
            self.has_only_one_source
        ), "Source can only have one property from either url, socrata or script"

        return True

    @property
    def tree_is_valid(self) -> bool:
        if self.__file["dataset"] == None:
            return False

        try:
            input_ds = Dataset(**self.__file["dataset"])

        except ValidationError as e:
            print(e.json())
            return False

        return True

    # Check that source name matches filename and destination
    @property
    def dataset_name_matches(self) -> bool:
        dataset = self.__file["dataset"]
        return (dataset["name"] == self.fname) and (
            dataset["name"] == dataset["destination"]["name"]
        )

    # Check that source has only one source from either url, socrata or script
    @property
    def has_only_one_source(self):
        dataset = self.__file["dataset"]
        source_fields = list(dataset["source"].keys())
        # In other words: if url is in source, socrata or script cannot be.
        # If url is NOT in source. Only one from socrata or url can be. (XOR operator ^)
        return (
            (("socrata" not in source_fields) and ("script" not in source_fields))
            if ("url" in source_fields)
            else (("socrata" in source_fields) ^ ("script" in source_fields))
        )

Instance variables

var dataset_name_matches : bool
Expand source code
@property
def dataset_name_matches(self) -> bool:
    dataset = self.__file["dataset"]
    return (dataset["name"] == self.fname) and (
        dataset["name"] == dataset["destination"]["name"]
    )
var has_only_one_source
Expand source code
@property
def has_only_one_source(self):
    dataset = self.__file["dataset"]
    source_fields = list(dataset["source"].keys())
    # In other words: if url is in source, socrata or script cannot be.
    # If url is NOT in source. Only one from socrata or url can be. (XOR operator ^)
    return (
        (("socrata" not in source_fields) and ("script" not in source_fields))
        if ("url" in source_fields)
        else (("socrata" in source_fields) ^ ("script" in source_fields))
    )
var tree_is_valid : bool
Expand source code
@property
def tree_is_valid(self) -> bool:
    if self.__file["dataset"] == None:
        return False

    try:
        input_ds = Dataset(**self.__file["dataset"])

    except ValidationError as e:
        print(e.json())
        return False

    return True