Module `library.script.uscourts_courts`

Expand source code

import json
import ssl
from urllib.request import Request, urlopen

import pandas as pd
import requests
from bs4 import BeautifulSoup

from . import df_to_tempfile


class Scriptor:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)
        self.sites = {
            "NY": "https://www.uscourts.gov/fedcf-query?query={%22by%22:%22location%22,%22page%22:0,%22description%22:%22New%20York,%20NY,%20USA%22,%22county%22:%22New%20York%22,%22state%22:%22NY%22,%22zip%22:%2210007%22,%22country%22:%22US%22,%22locationType%22:%22other%22,%22lat%22:40.7127503,%22lng%22:-74.00597649999997,%22filters%22:%22default%22}",
            "BK": "https://www.uscourts.gov/fedcf-query?query={%22by%22:%22location%22,%22page%22:0,%22description%22:%22Brooklyn,%20NY,%20USA%22,%22county%22:%22Kings%22,%22state%22:%22NY%22,%22zip%22:%2211216%22,%22country%22:%22US%22,%22locationType%22:%22other%22,%22lat%22:40.6781281,%22lng%22:-73.94416899999999,%22filters%22:%22default%22}",
        }

    def get_location(self, boro):
        response = requests.get(self.sites.get(boro)).content
        locations = json.loads(response).get("results").get("locations")
        return locations

    # deduplicate, --> in reality there's no duplicated items
    def removeduplicate(self, it):
        seen = []
        for x in it:
            t = tuple(x.items())
            if t not in seen:
                yield x
                seen.append(t)

    def ingest(self) -> pd.DataFrame:
        data = self.get_location("NY") + self.get_location("BK")
        data = list(self.removeduplicate(data))
        df = pd.DataFrame.from_dict(data, orient="columns")
        return df

    def runner(self) -> str:
        df = self.ingest()
        local_path = df_to_tempfile(df)
        return local_path

Classes

class Scriptor (**kwargs)

Expand source code

class Scriptor:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)
        self.sites = {
            "NY": "https://www.uscourts.gov/fedcf-query?query={%22by%22:%22location%22,%22page%22:0,%22description%22:%22New%20York,%20NY,%20USA%22,%22county%22:%22New%20York%22,%22state%22:%22NY%22,%22zip%22:%2210007%22,%22country%22:%22US%22,%22locationType%22:%22other%22,%22lat%22:40.7127503,%22lng%22:-74.00597649999997,%22filters%22:%22default%22}",
            "BK": "https://www.uscourts.gov/fedcf-query?query={%22by%22:%22location%22,%22page%22:0,%22description%22:%22Brooklyn,%20NY,%20USA%22,%22county%22:%22Kings%22,%22state%22:%22NY%22,%22zip%22:%2211216%22,%22country%22:%22US%22,%22locationType%22:%22other%22,%22lat%22:40.6781281,%22lng%22:-73.94416899999999,%22filters%22:%22default%22}",
        }

    def get_location(self, boro):
        response = requests.get(self.sites.get(boro)).content
        locations = json.loads(response).get("results").get("locations")
        return locations

    # deduplicate, --> in reality there's no duplicated items
    def removeduplicate(self, it):
        seen = []
        for x in it:
            t = tuple(x.items())
            if t not in seen:
                yield x
                seen.append(t)

    def ingest(self) -> pd.DataFrame:
        data = self.get_location("NY") + self.get_location("BK")
        data = list(self.removeduplicate(data))
        df = pd.DataFrame.from_dict(data, orient="columns")
        return df

    def runner(self) -> str:
        df = self.ingest()
        local_path = df_to_tempfile(df)
        return local_path

Methods

def get_location(self, boro)

Expand source code

def get_location(self, boro):
    response = requests.get(self.sites.get(boro)).content
    locations = json.loads(response).get("results").get("locations")
    return locations

def ingest(self) ‑> pandas.core.frame.DataFrame

Expand source code

def ingest(self) -> pd.DataFrame:
    data = self.get_location("NY") + self.get_location("BK")
    data = list(self.removeduplicate(data))
    df = pd.DataFrame.from_dict(data, orient="columns")
    return df

def removeduplicate(self, it)

Expand source code

def removeduplicate(self, it):
    seen = []
    for x in it:
        t = tuple(x.items())
        if t not in seen:
            yield x
            seen.append(t)

def runner(self) ‑> str

Expand source code

def runner(self) -> str:
    df = self.ingest()
    local_path = df_to_tempfile(df)
    return local_path