import os
img_dir = os.path.join(base_dir, "images")
bb_file = os.path.join(base_dir, "bounding_boxes.txt")
classes_translation_file = os.path.join(base_dir, "classes_fixed.txt")
class_labels_file = os.path.join(base_dir, "image_class_labels.txt")
img_file = os.path.join(base_dir, "images.txt")
photographers_file = os.path.join(base_dir, "photographers_fixed.txt")
sizes_file = os.path.join(base_dir, "sizes.txt")
train_test_split_file = os.path.join(base_dir, "train_test_split.txt")Compiling the metadata
In this section, we process some of the metadata associated with the NABirds dataset by creating a Polars DataFrame collecting all the information we will need while processing the images and training our model.
Polars is a modern and ultra fast package that you should use instead of pandas whenever you can if you care about performance.
Fix problem files
The metadata comes in various space-separated CSV files. Two of them are problematic because they are jagged: the number of elements per line is inconsistent.
First of all, let’s create new files without comma from these files. This should be easy to do in the Python function that we will use later, but for some reason that I could never figure out, I failed to implement it in Python. For this reason, I am cheating and doing it in Bash (or Zsh) using the utility sed that makes such transformations so easy.
In Bash or Zsh:
sed 's/,//g' <path-of-the-nabirds-dir>/photographers.txt >
<path-of-the-nabirds-dir>/photographers_nocommas.txt
sed 's/,//g' <path-of-the-nabirds-dir>/classes.txt >
<path-of-the-nabirds-dir>/classes_nocommas.txt<path-of-the-nabirds-dir> is the path in which you downloaded the nabirds dataset.
Alternatively:
cd <path-of-the-nabirds-dir>
sed 's/,//g' photographers.txt > photographers_nocommas.txt
sed 's/,//g' classes.txt > classes_nocommas.txtWe could finish fixing these files in Bash (which would be a lot easier and much less wordy!) but because this is a Python course, let’s do the rest in Python.
Let’s create a function that will do the rest of the cleaning and write two new files that won’t be problematic:
base_dir = "<path-of-the-nabirds-dir>"To be replaced by proper path.
import os
import csv
def replace_spaces_except_first(input_filepath, output_filepath):
"""
Replaces all spaces with underscores in a CSV file, except the first space
on each line.
Args:
input_filepath (str): the path of the input file.
output_filepath (str): the path of the output file.
"""
with open(input_filepath, 'r') as infile, \
open(output_filepath, 'w') as outfile:
reader = csv.reader(infile)
writer = csv.writer(outfile)
for row in reader:
processed_row = []
for item in row:
# Remove quotes
item = item.replace('"', '')
# Find the first space
first_space_index = item.find(' ')
# Keep the part up to the first space
# and replace subsequent spaces with underscores
part_with_first_space = item[:first_space_index + 1]
part_after_first_space = item[first_space_index + 1:].replace(' ', '_')
processed_item = part_with_first_space + part_after_first_space
processed_row.append(processed_item)
writer.writerow(processed_row)Then we can apply the function on our files:
replace_spaces_except_first(
os.path.join(base_dir, "photographers_nocommas.txt"),
os.path.join(base_dir, "photographers_fixed.txt")
)
replace_spaces_except_first(
os.path.join(base_dir, "classes_nocommas.txt"),
os.path.join(base_dir, "classes_fixed.txt")
)Create variables
Create a metadata Dataframe
First, we create a series of DataFrames from each CSV file:
import polars as pl
bb = pl.read_csv(
bb_file,
separator=" ",
has_header=False,
new_columns=["UUID", "bb_x", "bb_y", "bb_width", "bb_height"]
)
classes = pl.read_csv(
class_labels_file,
separator=" ",
has_header=False,
new_columns=["UUID", "class"]
)
classes_translation = pl.read_csv(
classes_translation_file,
separator=" ",
has_header=False,
new_columns=["class", "id"]
)
img_paths = pl.read_csv(
img_file,
separator=" ",
has_header=False,
new_columns=["UUID", "path"]
)
photographers = pl.read_csv(
photographers_file,
separator=" ",
has_header=False,
new_columns=["UUID", "photographer"]
)
sizes = pl.read_csv(
sizes_file,
separator=" ",
has_header=False,
new_columns=["UUID", "img_width", "img_height"]
)
train_test_split = pl.read_csv(
train_test_split_file,
separator=" ",
has_header=False,
new_columns=["UUID", "is_training_img"]
)Then we can combine the classes DataFrames so that the birds identifications becomes associated with the birds UUIDs:
classes_metadata = (
classes.join(classes_translation, on="class")
)Finally, we combine all the DataFrames:
metadata = (
bb.join(classes_metadata, on="UUID")
.join(img_paths, on="UUID")
.join(photographers, on="UUID")
.join(sizes, on="UUID")
.join(train_test_split, on="UUID")
)Sanity checks
Let’s see what our DataFrame looks like:
print(metadata)shape: (48_562, 12)
┌──────────────┬──────┬──────┬──────────┬───┬──────────────┬───────────┬────────────┬──────────────┐
│ UUID ┆ bb_x ┆ bb_y ┆ bb_width ┆ … ┆ photographer ┆ img_width ┆ img_height ┆ is_training_ │
│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ img │
│ str ┆ i64 ┆ i64 ┆ i64 ┆ ┆ str ┆ i64 ┆ i64 ┆ --- │
│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ i64 │
╞══════════════╪══════╪══════╪══════════╪═══╪══════════════╪═══════════╪════════════╪══════════════╡
│ 0000139e-21d ┆ 83 ┆ 59 ┆ 128 ┆ … ┆ Ruth_Cantwel ┆ 296 ┆ 341 ┆ 0 │
│ c-4d0c-bfe1- ┆ ┆ ┆ ┆ ┆ l ┆ ┆ ┆ │
│ 4cae3c… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ 0000d9fc-4e0 ┆ 328 ┆ 88 ┆ 163 ┆ … ┆ Christopher_ ┆ 640 ┆ 427 ┆ 0 │
│ 2-4c06-a0af- ┆ ┆ ┆ ┆ ┆ L._Wood_Chri ┆ ┆ ┆ │
│ a55cfb… ┆ ┆ ┆ ┆ ┆ s_Wood ┆ ┆ ┆ │
│ 00019306-9d8 ┆ 174 ┆ 367 ┆ 219 ┆ … ┆ Ryan_Schain ┆ 730 ┆ 1024 ┆ 0 │
│ 3-4334-b255- ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ a44774… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ 0001afd4-99a ┆ 307 ┆ 179 ┆ 492 ┆ … ┆ Laura_Ericks ┆ 1024 ┆ 680 ┆ 1 │
│ 1-4a67-b940- ┆ ┆ ┆ ┆ ┆ on ┆ ┆ ┆ │
│ d41941… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ 000332b8-997 ┆ 395 ┆ 139 ┆ 262 ┆ … ┆ Dan_Irizarry ┆ 1024 ┆ 682 ┆ 0 │
│ c-4540-9647- ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ 2f0a84… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │
│ fff86e8b-795 ┆ 344 ┆ 163 ┆ 291 ┆ … ┆ Nancy_Landry ┆ 1024 ┆ 819 ┆ 1 │
│ f-400a-91e8- ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ 565bbb… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ fff926d7-cca ┆ 330 ┆ 180 ┆ 339 ┆ … ┆ Ruth_Sulliva ┆ 1024 ┆ 956 ┆ 1 │
│ d-4788-839e- ┆ ┆ ┆ ┆ ┆ n ┆ ┆ ┆ │
│ 97af2d… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ fffa33ef-a76 ┆ 184 ┆ 94 ┆ 258 ┆ … ┆ Gerry_Dewagh ┆ 640 ┆ 800 ┆ 1 │
│ 5-408d-8d66- ┆ ┆ ┆ ┆ ┆ e ┆ ┆ ┆ │
│ 6efc7f… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ ffff0d87-bc8 ┆ 102 ┆ 210 ┆ 461 ┆ … ┆ Muriel_Nedde ┆ 731 ┆ 1024 ┆ 0 │
│ 4-4ef2-a47e- ┆ ┆ ┆ ┆ ┆ rmeyer ┆ ┆ ┆ │
│ a4bfa4… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ fffff3a5-2a7 ┆ 281 ┆ 164 ┆ 524 ┆ … ┆ Dominic_Sher ┆ 1024 ┆ 683 ┆ 0 │
│ 5-47d0-887f- ┆ ┆ ┆ ┆ ┆ ony ┆ ┆ ┆ │
│ 03871e… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
└──────────────┴──────┴──────┴──────────┴───┴──────────────┴───────────┴────────────┴──────────────┘
And then let’s explore a number of characteristics:
print(metadata.columns)
print(metadata.row(0))
print(metadata.row(-1))['UUID', 'bb_x', 'bb_y', 'bb_width', 'bb_height', 'class', 'id', 'path', 'photographer', 'img_width', 'img_height', 'is_training_img']
('0000139e-21dc-4d0c-bfe1-4cae3c85c829', 83, 59, 128, 228, 817, 'Oak_Titmouse', '0817/0000139e21dc4d0cbfe14cae3c85c829.jpg', 'Ruth_Cantwell', 296, 341, 0)
('fffff3a5-2a75-47d0-887f-03871e3f9a37', 281, 164, 524, 279, 880, 'Black-throated_Gray_Warbler', '0880/fffff3a52a7547d0887f03871e3f9a37.jpg', 'Dominic_Sherony', 1024, 683, 0)
print(metadata.head())shape: (5, 12)
┌──────────────┬──────┬──────┬──────────┬───┬──────────────┬───────────┬────────────┬──────────────┐
│ UUID ┆ bb_x ┆ bb_y ┆ bb_width ┆ … ┆ photographer ┆ img_width ┆ img_height ┆ is_training_ │
│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ img │
│ str ┆ i64 ┆ i64 ┆ i64 ┆ ┆ str ┆ i64 ┆ i64 ┆ --- │
│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ i64 │
╞══════════════╪══════╪══════╪══════════╪═══╪══════════════╪═══════════╪════════════╪══════════════╡
│ 0000139e-21d ┆ 83 ┆ 59 ┆ 128 ┆ … ┆ Ruth_Cantwel ┆ 296 ┆ 341 ┆ 0 │
│ c-4d0c-bfe1- ┆ ┆ ┆ ┆ ┆ l ┆ ┆ ┆ │
│ 4cae3c… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ 0000d9fc-4e0 ┆ 328 ┆ 88 ┆ 163 ┆ … ┆ Christopher_ ┆ 640 ┆ 427 ┆ 0 │
│ 2-4c06-a0af- ┆ ┆ ┆ ┆ ┆ L._Wood_Chri ┆ ┆ ┆ │
│ a55cfb… ┆ ┆ ┆ ┆ ┆ s_Wood ┆ ┆ ┆ │
│ 00019306-9d8 ┆ 174 ┆ 367 ┆ 219 ┆ … ┆ Ryan_Schain ┆ 730 ┆ 1024 ┆ 0 │
│ 3-4334-b255- ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ a44774… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ 0001afd4-99a ┆ 307 ┆ 179 ┆ 492 ┆ … ┆ Laura_Ericks ┆ 1024 ┆ 680 ┆ 1 │
│ 1-4a67-b940- ┆ ┆ ┆ ┆ ┆ on ┆ ┆ ┆ │
│ d41941… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ 000332b8-997 ┆ 395 ┆ 139 ┆ 262 ┆ … ┆ Dan_Irizarry ┆ 1024 ┆ 682 ┆ 0 │
│ c-4540-9647- ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ 2f0a84… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
└──────────────┴──────┴──────┴──────────┴───┴──────────────┴───────────┴────────────┴──────────────┘
print(metadata.tail())shape: (5, 12)
┌──────────────┬──────┬──────┬──────────┬───┬──────────────┬───────────┬────────────┬──────────────┐
│ UUID ┆ bb_x ┆ bb_y ┆ bb_width ┆ … ┆ photographer ┆ img_width ┆ img_height ┆ is_training_ │
│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ img │
│ str ┆ i64 ┆ i64 ┆ i64 ┆ ┆ str ┆ i64 ┆ i64 ┆ --- │
│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ i64 │
╞══════════════╪══════╪══════╪══════════╪═══╪══════════════╪═══════════╪════════════╪══════════════╡
│ fff86e8b-795 ┆ 344 ┆ 163 ┆ 291 ┆ … ┆ Nancy_Landry ┆ 1024 ┆ 819 ┆ 1 │
│ f-400a-91e8- ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ 565bbb… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ fff926d7-cca ┆ 330 ┆ 180 ┆ 339 ┆ … ┆ Ruth_Sulliva ┆ 1024 ┆ 956 ┆ 1 │
│ d-4788-839e- ┆ ┆ ┆ ┆ ┆ n ┆ ┆ ┆ │
│ 97af2d… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ fffa33ef-a76 ┆ 184 ┆ 94 ┆ 258 ┆ … ┆ Gerry_Dewagh ┆ 640 ┆ 800 ┆ 1 │
│ 5-408d-8d66- ┆ ┆ ┆ ┆ ┆ e ┆ ┆ ┆ │
│ 6efc7f… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ ffff0d87-bc8 ┆ 102 ┆ 210 ┆ 461 ┆ … ┆ Muriel_Nedde ┆ 731 ┆ 1024 ┆ 0 │
│ 4-4ef2-a47e- ┆ ┆ ┆ ┆ ┆ rmeyer ┆ ┆ ┆ │
│ a4bfa4… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ fffff3a5-2a7 ┆ 281 ┆ 164 ┆ 524 ┆ … ┆ Dominic_Sher ┆ 1024 ┆ 683 ┆ 0 │
│ 5-47d0-887f- ┆ ┆ ┆ ┆ ┆ ony ┆ ┆ ┆ │
│ 03871e… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
└──────────────┴──────┴──────┴──────────┴───┴──────────────┴───────────┴────────────┴──────────────┘
import random
random.seed(123)
print(metadata.sample())shape: (1, 12)
┌──────────────┬──────┬──────┬──────────┬───┬──────────────┬───────────┬────────────┬──────────────┐
│ UUID ┆ bb_x ┆ bb_y ┆ bb_width ┆ … ┆ photographer ┆ img_width ┆ img_height ┆ is_training_ │
│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ img │
│ str ┆ i64 ┆ i64 ┆ i64 ┆ ┆ str ┆ i64 ┆ i64 ┆ --- │
│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ i64 │
╞══════════════╪══════╪══════╪══════════╪═══╪══════════════╪═══════════╪════════════╪══════════════╡
│ b20cc001-80f ┆ 382 ┆ 236 ┆ 308 ┆ … ┆ Alex_Burdo ┆ 1024 ┆ 723 ┆ 0 │
│ 0-4280-9cd5- ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ b9b569… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
└──────────────┴──────┴──────┴──────────┴───┴──────────────┴───────────┴────────────┴──────────────┘
print(metadata.schema)
print(metadata.shape)Schema({'UUID': String, 'bb_x': Int64, 'bb_y': Int64, 'bb_width': Int64, 'bb_height': Int64, 'class': Int64, 'id': String, 'path': String, 'photographer': String, 'img_width': Int64, 'img_height': Int64, 'is_training_img': Int64})
(48562, 12)
print(metadata.glimpse())Rows: 48562
Columns: 12
$ UUID <str> '0000139e-21dc-4d0c-bfe1-4cae3c85c829', '0000d9fc-4e02-4c06-a0af-a55cfb16b12b', '00019306-9d83-4334-b255-a447742edce3', '0001afd4-99a1-4a67-b940-d419413e23b3', '000332b8-997c-4540-9647-2f0a8495aecf', '000343bd-5215-49ba-ab9c-7c97a70ac1a5', '0004ff8d-0cc8-47ee-94ba-43352a8b9eb4', '0007181f-a727-4481-ad89-591200c61b9d', '00071e20-8156-4bd8-b5ca-6445c2560ee5', '0007acfc-c0e6-4393-9ab6-02215a82ef63'
$ bb_x <i64> 83, 328, 174, 307, 395, 120, 417, 47, 260, 193
$ bb_y <i64> 59, 88, 367, 179, 139, 210, 109, 194, 146, 291
$ bb_width <i64> 128, 163, 219, 492, 262, 587, 221, 819, 578, 526
$ bb_height <i64> 228, 298, 378, 224, 390, 357, 467, 573, 516, 145
$ class <i64> 817, 860, 900, 645, 929, 652, 951, 900, 988, 400
$ id <str> 'Oak_Titmouse', 'Ovenbird', 'Savannah_Sparrow', 'Eared_Grebe_(Nonbreeding/juvenile)', 'Eastern_Phoebe', 'Yellow-crowned_Night-Heron_(Immature)', 'Florida_Scrub-Jay', 'Savannah_Sparrow', 'Yellow-headed_Blackbird_(Female/Immature_Male)', 'Herring_Gull_(Adult)'
$ path <str> '0817/0000139e21dc4d0cbfe14cae3c85c829.jpg', '0860/0000d9fc4e024c06a0afa55cfb16b12b.jpg', '0900/000193069d834334b255a447742edce3.jpg', '0645/0001afd499a14a67b940d419413e23b3.jpg', '0929/000332b8997c454096472f0a8495aecf.jpg', '0652/000343bd521549baab9c7c97a70ac1a5.jpg', '0951/0004ff8d0cc847ee94ba43352a8b9eb4.jpg', '0900/0007181fa7274481ad89591200c61b9d.jpg', '0988/00071e2081564bd8b5ca6445c2560ee5.jpg', '0400/0007acfcc0e643939ab602215a82ef63.jpg'
$ photographer <str> 'Ruth_Cantwell', 'Christopher_L._Wood_Chris_Wood', 'Ryan_Schain', 'Laura_Erickson', 'Dan_Irizarry', 'Ken_Schneider', 'Velma_Knowles', 'Matt_Tillett', 'Terry_Gray', 'Cory_Gregory'
$ img_width <i64> 296, 640, 730, 1024, 1024, 1024, 1024, 1024, 1024, 1024
$ img_height <i64> 341, 427, 1024, 680, 682, 768, 683, 819, 768, 681
$ is_training_img <i64> 0, 0, 0, 1, 0, 0, 0, 1, 1, 0
None
print(metadata.describe())shape: (9, 13)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ statistic ┆ UUID ┆ bb_x ┆ bb_y ┆ … ┆ photograp ┆ img_width ┆ img_heigh ┆ is_train │
│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ her ┆ --- ┆ t ┆ ing_img │
│ str ┆ str ┆ f64 ┆ f64 ┆ ┆ --- ┆ f64 ┆ --- ┆ --- │
│ ┆ ┆ ┆ ┆ ┆ str ┆ ┆ f64 ┆ f64 │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ count ┆ 48562 ┆ 48562.0 ┆ 48562.0 ┆ … ┆ 48562 ┆ 48562.0 ┆ 48562.0 ┆ 48562.0 │
│ null_coun ┆ 0 ┆ 0.0 ┆ 0.0 ┆ … ┆ 0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │
│ t ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ mean ┆ null ┆ 221.68531 ┆ 158.53412 ┆ … ┆ null ┆ 898.50866 ┆ 712.33555 ┆ 0.492752 │
│ ┆ ┆ ┆ 1 ┆ ┆ ┆ 9 ┆ ┆ │
│ std ┆ null ┆ 133.05486 ┆ 80.976264 ┆ … ┆ null ┆ 173.17803 ┆ 152.49441 ┆ 0.499953 │
│ ┆ ┆ 4 ┆ ┆ ┆ ┆ 8 ┆ ┆ │
│ min ┆ 0000139e- ┆ 0.0 ┆ 0.0 ┆ … ┆ A._Walton ┆ 90.0 ┆ 98.0 ┆ 0.0 │
│ ┆ 21dc-4d0c ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ ┆ -bfe1-4ca ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ ┆ e3c… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ 25% ┆ null ┆ 115.0 ┆ 99.0 ┆ … ┆ null ┆ 800.0 ┆ 639.0 ┆ 0.0 │
│ 50% ┆ null ┆ 205.0 ┆ 149.0 ┆ … ┆ null ┆ 1024.0 ┆ 683.0 ┆ 0.0 │
│ 75% ┆ null ┆ 315.0 ┆ 208.0 ┆ … ┆ null ┆ 1024.0 ┆ 780.0 ┆ 1.0 │
│ max ┆ fffff3a5- ┆ 837.0 ┆ 799.0 ┆ … ┆ www.burly ┆ 1024.0 ┆ 1024.0 ┆ 1.0 │
│ ┆ 2a75-47d0 ┆ ┆ ┆ ┆ bird.com ┆ ┆ ┆ │
│ ┆ -887f-038 ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ ┆ 71e… ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
└───────────┴───────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴──────────┘
Create a training DataFrame
Now we can get a subset of our metadata DataFrame with the training metadata data only:
metadata_train = metadata.filter(pl.col("is_training_img") == 1)Quick sanity checks:
print(metadata_train.shape)
print(metadata_train.row(0))
print(metadata_train.columns)(23929, 12)
('0001afd4-99a1-4a67-b940-d419413e23b3', 307, 179, 492, 224, 645, 'Eared_Grebe_(Nonbreeding/juvenile)', '0645/0001afd499a14a67b940d419413e23b3.jpg', 'Laura_Erickson', 1024, 680, 1)
['UUID', 'bb_x', 'bb_y', 'bb_width', 'bb_height', 'class', 'id', 'path', 'photographer', 'img_width', 'img_height', 'is_training_img']
Our metadata is ready. We can now start working with the pictures.