How to Test File Properties

In some cases, you might need to check the properties of several files at once. This can be accomplished by loading the properties into a DataFrame or other object using a fixture.

Example

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import datetime
import os
import pathlib
import pytest
import pandas as pd
import datatest as dt


def get_properties(file_path):
    """Accepts a pathlib.Path and returns a dict of file properties."""
    stats = file_path.stat()

    size_in_mb = stats.st_size / 1024 / 1024  # Convert bytes to megabytes.

    return {
        'path': str(file_path),
        'name': file_path.name,
        'modified_date': datetime.date.fromtimestamp(stats.st_mtime),
        'size': round(size_in_mb, 2),
        'readable': os.access(file_path, os.R_OK),
        'writable': os.access(file_path, os.W_OK),
    }


@pytest.fixture(scope='session')
@dt.working_directory(__file__)
def df():
    directory = '.'  # Current directory.
    pattern = '*.csv'  # Matches CSV files.
    paths = (p for p in pathlib.Path(directory).glob(pattern) if p.is_file())
    dict_records = (get_properties(p) for p in paths)
    df = pd.DataFrame.from_records(dict_records)
    df = df.set_index(['path'])
    return df


def test_filename(df):
    def is_lower_case(x):  # <- Helper function.
        return x.islower()

    msg = 'Must be lowercase.'
    dt.validate(df['name'], is_lower_case, msg=msg)


def test_freshness(df):
    one_week_ago = datetime.date.today() - datetime.timedelta(days=7)
    msg = 'Must be no older than one week.'
    dt.validate.interval(df['modified_date'], min=one_week_ago, msg=msg)


def test_filesize(df):
    msg = 'Must be 1 MB or less in size.'
    dt.validate.interval(df['size'], max=1.0, msg=msg)


def test_permissions(df):
    msg = 'Must have read and write permissions.'
    dt.validate(df[['readable', 'writable']], (True, True), msg=msg)


if __name__ == '__main__':
    import sys
    sys.exit(pytest.main(sys.argv))

Other Properties

To check other file properties, you can modify or add to the get_properties() function.

Below, we count the number of lines in each file and add a line_count to the dictionary of properties:

1
2
3
4
import datetime
import os

...
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
...

def get_properties(file_path):
    """Accepts a pathlib.Path and returns a dict of file properties."""
    stats = file_path.stat()

    size_in_mb = stats.st_size / 1024 / 1024  # Convert bytes to megabytes.

    with open(file_path) as fh:
        line_count = len(fh.readlines())

    return {
        'path': str(file_path),
        'name': file_path.name,
        'modified_date': datetime.date.fromtimestamp(stats.st_mtime),
        'size': round(size_in_mb, 2),
        'readable': os.access(file_path, os.R_OK),
        'writable': os.access(file_path, os.W_OK),
        'line_count': line_count,
    }

...