Commit 61f1f1d1 authored by Grant, Josh's avatar Grant, Josh
Browse files

Merge branch 'feature/generate' into 'develop'

Add a method to generate random file sizes

See merge request !42
parents b2c02a1b d205c04f
Loading
Loading
Loading
Loading

src/common/gen.py

0 → 100644
+105 −0
Original line number Diff line number Diff line
import csv
import random
import string
import os
import argparse


def generate_gibberish(length=5):
    """Generates a random string of alphabetic gibberish."""
    return ''.join(random.choices(string.ascii_letters, k=length))


def generate_data_by_type(column_type):
    """Generates data based on the specified column type."""
    if column_type == 'int':
        return str(random.randint(0, 1000))
    elif column_type == 'float':
        return f"{random.uniform(0, 1000):.2f}"
    elif column_type == 'string':
        return generate_gibberish(random.randint(3, 10))
    else:
        return generate_gibberish(random.randint(3, 10))


def convert_size_to_bytes(size_str):
    """Convert a human-readable file size (e.g., 512MB) into bytes."""
    size_str = size_str.upper()
    size_units = {'KB': 1024, 'MB': 1024 ** 2, 'GB': 1024 ** 3, 'TB': 1024 ** 4}

    # Split the number and the unit (assume space or no space between)
    size_value, size_unit = ''.join(filter(str.isdigit, size_str)), ''.join(filter(str.isalpha, size_str))

    if size_unit not in size_units:
        raise ValueError(f"Invalid size unit: {size_unit}. Use KB, MB, GB, or TB.")

    return int(size_value) * size_units[size_unit]


def generate_csv(file_path, n_columns=5, target_file_size=None, target_row_count=None, column_types={}):
    """Generates a CSV file with either a target size or target row count."""
    if target_file_size and target_row_count:
        raise ValueError("Please specify either target file size or target row count, not both.")

    # Generate gibberish column names
    columns = [generate_gibberish() for _ in range(n_columns)]

    # Write CSV
    with open(file_path, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(columns)  # Write header

        row_count = 0
        current_file_size = 0

        while True:
            row = []
            for i in range(n_columns):
                column_type = column_types.get(columns[i], 'string')
                row.append(generate_data_by_type(column_type))

            writer.writerow(row)
            row_count += 1

            if target_row_count and row_count >= target_row_count:
                break

            current_file_size = os.path.getsize(file_path)
            if target_file_size and current_file_size >= target_file_size:
                break

    print(f"CSV file generated at {file_path} with {row_count} rows.")


def main():
    parser = argparse.ArgumentParser(description="Generate a large CSV file with random data.")

    parser.add_argument('-c', '--col', type=int, required=True, help="Number of columns in the CSV.")

    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('-r', '--row', type=int, help="Number of rows in the CSV.")
    group.add_argument('-s', '--size', type=str, help="Target file size (e.g., 512MB or 1GB).")

    args = parser.parse_args()

    # Parse size if provided
    target_file_size = None
    if args.size:
        try:
            target_file_size = convert_size_to_bytes(args.size)
        except ValueError as e:
            print(e)
            return

    # Generate the CSV
    generate_csv(
        file_path='output.csv',
        n_columns=args.col,
        target_row_count=args.row,
        target_file_size=target_file_size,
        column_types={}  # You can define the column types here if needed
    )


if __name__ == "__main__":
    main()
 No newline at end of file
+60 −0
Original line number Diff line number Diff line
import os
import pytest
from common.env import boolify, check_environment


def test_boolify_true():
    true_val = [1, "1", "TRUE", "True", "true", 't', 'T', True]

    for val in true_val:
        assert boolify(val) is True, f"Expected true {val} but did not get it."


def test_boolify_false():
    false_val = [0, "0", "FALSE", "False", "false",'f','F', False]
    for val in false_val:
        assert boolify(val) is False, f"Expected False {val} but did not get it."


def test_boolify_raises_typeerror():
    non_bool_values = ["maybe", 2, [], (), 0.5]
    for val in non_bool_values:
        with pytest.raises(TypeError, match="unable to evaluate expected boolean"):
            boolify(val)


def test_check_environment_existing_variables():
    os.environ["TEST_VAR"] = "test_value"
    assert (
        check_environment("TEST_VAR", default="default_value") == "test_value"
    ), "Something really bad happened"


def test_check_environment_non_existing_with_default():
    if "NON_EXISTING_VAR" in os.environ:
        del os.environ["NON_EXISTING_VAR"]
    assert (
        check_environment("NON_EXISTING_VAR", default="default_value")
        == "default_value"
    ), "Default value is broken"


def test_check_environment_type_conversion_to_bool():
    os.environ["BOOL_VAR"] = "True"
    assert (
        check_environment("BOOL_VAR", default=False) is True
    ), "Failed to boolify and check_environment"

# TODO - checks for conversion to int, etc

def check_environment_type_conversion_to_int():
    os.environ["INT_VAR"] = "0"
    assert (
        check_environment("INT_VAR", default=1) == 0
    ), "Failed to cast to int and check_environment"


def teardown_function(function):
    for var in ["TEST_VAR", "BOOL_VAR"]:
        if var in os.environ:
            del os.environ[var] 
 No newline at end of file
+116 −0
Original line number Diff line number Diff line
"""Tests for File Operations"""
from pathlib import Path
import json
import pytest
import os
from common.file_operations import (
    path_exists,
    create_directory,
    delete_directory,
    tree,
    delete_file,
    read_file,
    append_file,
    write_file,
)


@pytest.fixture
def setup_file(tmp_path):
    """Create a temporary file for testing."""
    d = tmp_path / "sub"
    d.mkdir()
    p = d / "test.txt"
    p.write_text("content")
    return p


@pytest.fixture
def setup_directory(tmp_path):
    """Create a temporary directory for testing."""
    d = tmp_path / "subdir"
    d.mkdir()
    return d


@pytest.fixture
def setup_populated_directory(tmp_path):
    """Create a temporary directory for testing."""
    d = tmp_path / "subdir"
    d.mkdir()
    p = d / "subsubdir"
    p.mkdir()
    f = d / "myfile.txt"
    f.write_text("foo")
    return d


def test_path_exists(setup_file):
    """Test that the path exists function works."""
    assert path_exists(setup_file) is True
    assert path_exists("nonexistent_file") is False


def test_create_directory(tmp_path, setup_directory):
    """Test that you can create a directory."""
    assert (
        create_directory(str(setup_directory))
        == f"{str(setup_directory)} already exists."
    )
    new_dir = tmp_path / "newdir"
    assert create_directory(str(new_dir)) is True


def test_delete_directory(setup_directory):
    """Test that deleting a directory works as expected."""
    assert (
        delete_directory(str(setup_directory))
        == f"{str(setup_directory)} deleted successfully."
    )
    assert delete_directory("nodir") == f"nodir does not exist."


def test_tree(setup_populated_directory, setup_file, capsys):
    """Test that the tree command works as expected."""
    print(setup_populated_directory)
    tree(str(setup_populated_directory))
    captured = capsys.readouterr()
    print(captured)
    assert "Directory" in captured.out
    assert "File" in captured.out


def test_read_file(setup_file):
    """Test reading an existing file."""
    assert read_file(str(setup_file)) == "content"
    assert read_file("nonexistent") is None


def test_append_file(setup_file):
    """Test that appending to a file works."""
    content_to_append = " is my favorite content"
    append_file(str(setup_file), content_to_append)
    assert setup_file.read_text() == "content is my favorite content"


def test_write_file(tmp_path):
    """Test that writing a file works."""
    file_path = tmp_path / "write_test.txt"
    write_file(str(file_path), "new content")
    assert file_path.read_text() == "new content"
    json_path = tmp_path / "path.json"
    dictionary = {"key": "value"}
    dict_str = str(dictionary).replace("'", '"')
    json_path.write_text(dict_str)
    assert json_path.read_text() == dict_str
    # TODO @Bhaskar, fix this so that the write_file method works here
    # dict_path = tmp_path / "dict.json"
    # write_file(str(dict_path), dictionary)
    # assert dict_path.read_text() == dict_str
    # write_file('new_dict', dictionary | dictionary2)


def test_delete_file(setup_file):
    """Test deletion of an existing file."""
    assert delete_file(setup_file) == f"File: {str(setup_file)} deleted successfully."
    assert delete_file("nonexistent") == f"Error in deleting the file: nonexistent."