💦

Code ✂️ : Data Wrangling with Functional Programming

💡
The following is an implementation of a utility class, similar to a numpy array, but for arbitrary items that allows one to work on lists of objects with modular and composable operations, passed in as callbacks.
import importlib.machinery
import pandas as pd
import numpy as np
from functools import partial, wraps
import functools
import itertools
from operator import mul, add
from typing import Iterable
from tqdm.auto import tqdm
from collections import UserList
from project.utils import parallel, asynchronous

class ItemArray(UserList):
    """List extension utility class for functional programming"""

    def __init__(self, items=None):
        super().__init__(items)

    def reduce(self, f, initializer=None):
        return functools.reduce(f, self.data, initializer)

    def filter(self, f, **kwargs):
        if kwargs:
            f = partial(f, **kwargs)
        return ItemArray(filter(f, self))

    def map(self, f, **kwargs):
        if kwargs:
            f = partial(f, **kwargs)
        return ItemArray(map(f, self))

    def map_parallel(self, f, processes, **kwargs):  # opts
        cb = kwargs.pop('cb', None)
        prefer = kwargs.pop('prefer', None)
        if kwargs:
            f = partial(f, **kwargs)
        res = parallel.map_parallel(
            f,
            *self.toTuple(),
            n_workers=processes,
            prefer=prefer,
            cb=cb
        )
        return ItemArray(res)

    def map_async(self, f, processes, **kwargs):  # opts
        cb = kwargs.pop('cb', None)
        if kwargs:
            f = partial(f, **kwargs)
        res = asynchronous.map_async(f, self.data, n_workers=processes, cb=cb)
        return ItemArray(res)

    def chunk(self, n):
        chunks = [ItemArray(self[i:i + n]) for i in range(0, len(self), n)]
        return ItemArray(chunks)

    def chain(self):
        return ItemArray(itertools.chain.from_iterable(self))

    def push(self, item):
        self.data.append(item)

    def sum(self):
        return self.reduce(add)

    def product(self):
        return self.reduce(mul)

    def unique(self):
        return ItemArray(dict.fromkeys(self).keys())

    def toDataFrame(self, **kwargs):
        return pd.DataFrame(self.data, **kwargs)

    def toSeries(self, **kwargs):
        return pd.Series(self.data, **kwargs)

    def toTuple(self):
        data = tuple(self.data)
        if isinstance(data[0], (list, ItemArray)):
            data = (tuple(d) for d in data)
        return data