How to Get Started With Testing¶

Once you have reviewed the tutorials and have a basic understanding of datatest, you should be ready to start testing your own data.

1. Create a File and Add Some Sample Code¶

A simple way to get started is to create a .py file in the same folder as the data you want to test. It’s a good idea to follow established testing conventions and make sure your filename starts with “test_”.

Then, copy one of following the pytest or unittest code samples to use as a template for writing your own tests:

Pytest Samples

#!/usr/bin/env python3
import pytest
import pandas as pd
import datatest as dt
from datatest import (
    Missing,
    Extra,
    Invalid,
    Deviation,
)


@pytest.fixture(scope='session')
@dt.working_directory(__file__)
def df():
    return pd.read_csv('example.csv')  # Returns DataFrame.


@pytest.mark.mandatory
def test_column_names(df):
    required_names = {'A', 'B', 'C'}
    dt.validate(df.columns, required_names)


def test_a(df):
    requirement = {'x', 'y', 'z'}
    dt.validate(df['A'], requirement)


# ...add more tests here...


if __name__ == '__main__':
    import sys
    sys.exit(pytest.main(sys.argv))

#!/usr/bin/env python3
import pytest
import pandas as pd
import datatest as dt
from datatest import (
    Missing,
    Extra,
    Invalid,
    Deviation,
)


@pytest.fixture(scope='session')
@dt.working_directory(__file__)
def df():
    return pd.read_csv('example.csv')  # Returns DataFrame.


@pytest.fixture(scope='session', autouse=True)
def pandas_integration():
    dt.register_accessors()


@pytest.mark.mandatory
def test_column_names(df):
    required_names = {'A', 'B', 'C'}
    df.columns.validate(required_names)


def test_a(df):
    requirement = {'x', 'y', 'z'}
    df['A'].validate(requirement)


# ...add more tests here...


if __name__ == '__main__':
    import sys
    sys.exit(pytest.main(sys.argv))

#!/usr/bin/env python3
import pytest
import squint
from datatest import (
    validate,
    accepted,
    working_directory,
    Missing,
    Extra,
    Invalid,
    Deviation,
)


@pytest.fixture(scope='session')
@working_directory(__file__)
def select():
    return squint.Select('example.csv')


@pytest.mark.mandatory
def test_column_names(select):
    required_names = {'A', 'B', 'C'}
    validate(select.fieldnames, required_names)


def test_a(select):
    requirement = {'x', 'y', 'z'}
    validate(select('A'), requirement)


# ...add more tests here...


if __name__ == '__main__':
    import sys
    sys.exit(pytest.main(sys.argv))

#!/usr/bin/env python3
import pytest
import sqlite3
from datatest import (
    validate,
    accepted,
    working_directory,
    Missing,
    Extra,
    Invalid,
    Deviation,
)


@pytest.fixture(scope='session')
def connection():
    with working_directory(__file__):
        conn = sqlite3.connect('example.sqlite3')
    yield conn
    conn.close()


@pytest.fixture(scope='function')
def cursor(connection):
    cur = connection.cursor()
    yield cur
    cur.close()


@pytest.mark.mandatory
def test_column_names(cursor):
    cursor.execute('SELECT * FROM mytable LIMIT 0;')
    column_names = [item[0] for item in cursor.description]
    required_names = {'A', 'B', 'C'}
    validate(column_names, required_names)


def test_a(cursor):
    cursor.execute('SELECT A FROM mytable;')
    requirement = {'x', 'y', 'z'}
    validate(cursor, requirement)


# ...add more tests here...


if __name__ == '__main__':
    import sys
    sys.exit(pytest.main(sys.argv))

Unittest Samples

#!/usr/bin/env python3
import pandas as pd
import datatest as dt
from datatest import (
    Missing,
    Extra,
    Invalid,
    Deviation,
)


@dt.working_directory(__file__)
def setUpModule():
    global df
    df = pd.read_csv('example.csv')


class TestMyData(dt.DataTestCase):
    @dt.mandatory
    def test_column_names(self):
        required_names = {'A', 'B', 'C'}
        self.assertValid(df.columns, required_names)

    def test_a(self):
        requirement = {'x', 'y', 'z'}
        self.assertValid(df['A'], requirement)

    # ...add more tests here...


if __name__ == '__main__':
    from datatest import main
    main()

#!/usr/bin/env python3
import pandas as pd
import datatest as dt
from datatest import (
    Missing,
    Extra,
    Invalid,
    Deviation,
)


@dt.working_directory(__file__)
def setUpModule():
    global df
    df = pd.read_csv('example.csv')
    dt.register_accessors()  # Register pandas accessors.


class TestMyData(dt.DataTestCase):
    @dt.mandatory
    def test_column_names(self):
        required_names = {'A', 'B', 'C'}
        df.columns.validate(required_names)

    def test_a(self):
        requirement = {'x', 'y', 'z'}
        df['A'].validate(requirement)

    # ...add more tests here...


if __name__ == '__main__':
    from datatest import main
    main()

#!/usr/bin/env python3
import squint
from datatest import (
    DataTestCase,
    mandatory,
    working_directory,
    Missing,
    Extra,
    Invalid,
    Deviation,
)


@working_directory(__file__)
def setUpModule():
    global select
    select = squint.Select('example.csv')


class TestMyData(DataTestCase):
    @mandatory
    def test_column_names(self):
        required_names = {'A', 'B', 'C'}
        self.assertValid(select.fieldnames, required_names)

    def test_a(self):
        requirement = {'x', 'y', 'z'}
        self.assertValid(select('A'), requirement)

    # ...add more tests here...


if __name__ == '__main__':
    from datatest import main
    main()

#!/usr/bin/env python3
import sqlite3
from datatest import (
    DataTestCase,
    mandatory,
    working_directory,
    Missing,
    Extra,
    Invalid,
    Deviation,
)


@working_directory(__file__)
def setUpModule():
    global connection
    connection = sqlite3.connect('example.sqlite3')


def tearDownModule():
    connection.close()


class MyTest(DataTestCase):
    def setUp(self):
        cursor = connection.cursor()
        self.addCleanup(cursor.close)

        self.cursor = cursor

    @mandatory
    def test_column_names(self):
        self.cursor.execute('SELECT * FROM mytable LIMIT 0;')
        column_names = [item[0] for item in self.cursor.description]
        required_names = {'A', 'B', 'C'}
        self.assertValid(column_names, required_names)

    def test_a(self):
        self.cursor.execute('SELECT A FROM mytable;')
        requirement = {'x', 'y', 'z'}
        self.assertValid(self.cursor, requirement)


if __name__ == '__main__':
    from datatest import main
    main()

2. Adapt the Sample Code to Suit Your Data¶

After copying the sample code into your own file, begin adapting it to suit your data:

Change the fixture to use your data (instead of “example.csv”).
Update the set in test_column_names() to require the names your data should contain (instead of “A”, “B”, and “C”).
Rename test_a() and change it to check values in one of the columns in your data.
Add more tests appropriate for your own data requirements.

3. Refactor Your Tests as They Grow¶

As your tests grow, look to structure them into related groups. Start by creating separate classes to contain groups of related test cases. And as you develop more and more classes, create separate modules to hold groups of related classes. If you are using pytest, move your fixtures into a conftest.py file.