#!/usr/bin/env python3
# Customizable script to blend multiple raw csv files into a single labeled training file.

import numpy as np

# load files in label order
names = ['near.csv', 'receding.csv', 'approaching.csv', 'far.csv']

all = list()
for i, name in enumerate(names):
    print(f"Loading {name} and labeling it {i}.")
    data = np.loadtxt(name, delimiter=',', dtype=int)
    samples = len(data)
    indices = i * np.ones((samples,1), dtype=int)
    data = np.hstack((indices, data))
    all.append(data)

with open('training.csv', 'w') as output:
    output.write("""\
# Each line is a sample array prefixed with an integer label, with values separated by commas.
# E.g. in this example:
#   data is label,distance,velocity
#   labels: 0 is 'near', 1 is 'receding', 2 is 'approaching', 3 is 'far'
"""    )

    # emit each block of labeled samples
    for block in all:
        np.savetxt(output, block, fmt='%d', delimiter=',')
