Skip to content

metrics

compute_patch_metrics(test_img_info, test_results, overlap, idx_to_class, patch_num_h=None, patch_num_w=None, patch_w=None, patch_h=None, return_polygon=False, patch_reconstruction_method='priority', annotated_good=None)

Compute the metrics of a patch dataset.

Parameters:

  • test_img_info (List[PatchDatasetFileFormat]) –

    List of observation paths and mask paths

  • test_results (pd.DataFrame) –

    Pandas dataframe containing the results of an SklearnClassificationTrainer utility

  • patch_num_h (Optional[int]) –

    Number of vertical patches (required if patch_w and patch_h are None)

  • patch_num_w (Optional[int]) –

    Number of horizontal patches (required if patch_w and patch_h are None)

  • patch_h (Optional[int]) –

    Patch height (required if patch_num_h and patch_num_w are None)

  • patch_w (Optional[int]) –

    Patch width (required if patch_num_h and patch_num_w are None)

  • overlap (float) –

    Percentage of overlap between the patches

  • idx_to_class (Dict) –

    Dict mapping an index to the corresponding class name

  • return_polygon (bool) –

    if set to true convert the reconstructed mask into polygons, otherwise return the mask

  • patch_reconstruction_method (str) –

    How to compute the label of overlapping patches, can either be: priority: Assign the top priority label (i.e the one with greater index) to overlapping regions major_voting: Assign the most present label among the patches label overlapping a pixel

  • annotated_good (Optional[List[int]]) –

    List of indices of annotations to be treated as good.

Returns:

  • Tuple[int, int, int, List[Dict]]

    Tuple containing: false_region_bad: Number of false bad regions detected in the dataset false_region_good: Number of missed defects true_region_bad: Number of correctly identified defects reconstructions: If polygon is true this is a List of dict containing { "file_path": image_path, "mask_path": mask_path, "file_name": observation_name, "prediction": [{ "label": predicted_label, "points": List of dict coordinates "x" and "y" representing the points of a polygon that surrounds an image area covered by patches of label = predicted_label }] } else its a list of dict containing { "file_path": image_path, "mask_path": mask_path, "file_name": observation_name, "prediction": numpy array containing the reconstructed mask }

Source code in quadra/utils/patch/metrics.py
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
def compute_patch_metrics(
    test_img_info: List[PatchDatasetFileFormat],
    test_results: pd.DataFrame,
    overlap: float,
    idx_to_class: Dict,
    patch_num_h: Optional[int] = None,
    patch_num_w: Optional[int] = None,
    patch_w: Optional[int] = None,
    patch_h: Optional[int] = None,
    return_polygon: bool = False,
    patch_reconstruction_method: str = "priority",
    annotated_good: Optional[List[int]] = None,
) -> Tuple[int, int, int, List[Dict]]:
    """Compute the metrics of a patch dataset.

    Args:
        test_img_info: List of observation paths and mask paths
        test_results: Pandas dataframe containing the results of an SklearnClassificationTrainer utility
        patch_num_h: Number of vertical patches (required if patch_w and patch_h are None)
        patch_num_w: Number of horizontal patches (required if patch_w and patch_h are None)
        patch_h: Patch height (required if patch_num_h and patch_num_w are None)
        patch_w: Patch width (required if patch_num_h and patch_num_w are None)
        overlap: Percentage of overlap between the patches
        idx_to_class: Dict mapping an index to the corresponding class name
        return_polygon: if set to true convert the reconstructed mask into polygons, otherwise return the mask
        patch_reconstruction_method: How to compute the label of overlapping patches, can either be:
            priority: Assign the top priority label (i.e the one with greater index) to overlapping regions
            major_voting: Assign the most present label among the patches label overlapping a pixel
        annotated_good: List of indices of annotations to be treated as good.

    Returns:
        Tuple containing:
            false_region_bad: Number of false bad regions detected in the dataset
            false_region_good: Number of missed defects
            true_region_bad: Number of correctly identified defects
            reconstructions: If polygon is true this is a List of dict containing
                {
                    "file_path": image_path,
                    "mask_path": mask_path,
                    "file_name": observation_name,
                    "prediction": [{
                        "label": predicted_label,
                        "points": List of dict coordinates "x" and "y" representing the points of a polygon that
                        surrounds an image area covered by patches of label = predicted_label
                    }]
                }
            else its a list of dict containing
                {
                    "file_path": image_path,
                    "mask_path": mask_path,
                    "file_name": observation_name,
                    "prediction": numpy array containing the reconstructed mask
                }
    """
    assert patch_reconstruction_method in [
        "priority",
        "major_voting",
    ], "Patch reconstruction method not recognized, valid values are priority, major_voting"

    if (patch_h is not None and patch_w is not None) and (patch_num_h is not None and patch_num_w is not None):
        raise ValueError("Either number of patches or patch size is required for reconstruction")

    assert (patch_h is not None and patch_w is not None) or (
        patch_num_h is not None and patch_num_w is not None
    ), "Either number of patches or patch size is required for reconstruction"

    if patch_h is not None and patch_w is not None and patch_num_h is not None and patch_num_w is not None:
        warnings.warn(
            "Both number of patches and patch dimension are specified, using number of patches by default", UserWarning
        )

    log.info("Computing patch metrics!")

    false_region_bad = 0
    false_region_good = 0
    true_region_bad = 0
    reconstructions = []
    test_results["filename"] = test_results["sample"].apply(
        lambda x: "_".join(os.path.basename(x).replace("#DISCARD#", "").split("_")[0:-1])
    )

    for info in tqdm(test_img_info):
        img_path = info.image_path
        mask_path = info.mask_path

        img_json_entry = {
            "image_path": img_path,
            "mask_path": mask_path,
            "file_name": os.path.basename(img_path),
            "prediction": None,
        }

        test_img = cv2.imread(img_path)

        img_name = os.path.basename(img_path)

        h = test_img.shape[0]
        w = test_img.shape[1]

        gt_img = None

        if mask_path is not None and os.path.exists(mask_path):
            gt_img = cv2.imread(mask_path, 0)
            if test_img.shape[0:2] != gt_img.shape:
                # TODO: This is a patch to handle a bug in the backend code of ai-go-aml
                #  and must be removed in the future
                gt_img = np.pad(
                    gt_img, ((0, test_img.shape[0] - gt_img.shape[0]), (0, test_img.shape[1] - gt_img.shape[1]))
                )
        if patch_num_h is not None and patch_num_w is not None:
            patch_size, step = compute_patch_info(h, w, patch_num_h, patch_num_w, overlap)
        elif patch_h is not None and patch_w is not None:
            [patch_num_h, patch_num_w], step = compute_patch_info_from_patch_dim(h, w, patch_h, patch_w, overlap)
            patch_size = (patch_h, patch_w)
        else:
            raise ValueError(
                "Either number of patches or patch size is required for reconstruction, this should not happen"
                " at this stage"
            )

        img_patches = get_sorted_patches_by_image(test_results, img_name)
        pred = img_patches["pred_label"].to_numpy().reshape(patch_num_h, patch_num_w)

        # Treat annotated good predictions as background, this is an optimistic assumption that assumes that the
        # remaining background is good, but it is not always true so maybe on non annotated areas we are missing
        # defects and it would be necessary to handle this in a different way.
        if annotated_good is not None:
            pred[np.isin(pred, annotated_good)] = 0
        if patch_num_h is not None and patch_num_w is not None:
            output_mask, predicted_defect = reconstruct_patch(
                input_img_shape=test_img.shape,
                patch_size=patch_size,
                pred=pred,
                patch_num_h=patch_num_h,
                patch_num_w=patch_num_w,
                idx_to_class=idx_to_class,
                step=step,
                return_polygon=return_polygon,
                method=patch_reconstruction_method,
            )
        else:
            raise ValueError("`patch_num_h` and `patch_num_w` cannot be None at this point")

        if return_polygon:
            img_json_entry["prediction"] = predicted_defect
        else:
            img_json_entry["prediction"] = output_mask

        reconstructions.append(img_json_entry)
        if gt_img is not None:
            if annotated_good is not None:
                gt_img[np.isin(gt_img, annotated_good)] = 0

            gt_img_binary = (gt_img > 0).astype(bool)
            regions_pred = label(output_mask).astype(np.uint8)

            for k in range(1, regions_pred.max() + 1):
                region = (regions_pred == k).astype(bool)
                # If there's no overlap with the gt
                if np.sum(np.bitwise_and(region, gt_img_binary)) == 0:
                    false_region_bad += 1

            output_mask = (output_mask > 0).astype(np.uint8)
            gt_img = label(gt_img)

            for i in range(1, gt_img.max() + 1):
                region = (gt_img == i).astype(bool)
                if np.sum(np.bitwise_and(region, output_mask)) == 0:
                    false_region_good += 1
                else:
                    true_region_bad += 1

    return false_region_bad, false_region_good, true_region_bad, reconstructions

from_mask_to_polygon(mask_img)

Convert a mask of pattern to a list of polygon vertices.

Parameters:

  • mask_img (np.ndarray) –

    masked patch reconstruction image

Returns:

  • list

    a list of lists containing the coordinates of the polygons containing each region of the mask:

  • list

    [ [ { "x": 1.1, "y": 2.2 }, { "x": 2.1, "y": 3.2 } ], ...

  • list

    ].

Source code in quadra/utils/patch/metrics.py
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
def from_mask_to_polygon(mask_img: np.ndarray) -> list:
    """Convert a mask of pattern to a list of polygon vertices.

    Args:
        mask_img: masked patch reconstruction image
    Returns:
        a list of lists containing the coordinates of the polygons containing each region of the mask:
        [
            [
                {
                    "x": 1.1,
                    "y": 2.2
                },
                {
                    "x": 2.1,
                    "y": 3.2
                }
            ], ...
        ].
    """
    points_dict = []
    # find vertices of polygon: points -> list of array of dim n_vertex, 1, 2(x,y)
    polygon_points, hier = cv2.findContours(mask_img, cv2.RETR_TREE, cv2.CHAIN_APPROX_TC89_L1)

    if not hier[:, :, 2:].all(-1).all():  # there are holes
        holes = ndimage.binary_fill_holes(mask_img).astype(int)
        holes -= mask_img
        holes = (holes > 0).astype(np.uint8)
        if holes.sum() > 0:  # there are holes
            for hole in regionprops(label(holes)):
                a, _, _, _d = hole.bbox
                mask_img[a] = 0

        polygon_points, hier = cv2.findContours(mask_img, cv2.RETR_LIST, cv2.CHAIN_APPROX_TC89_L1)

    for pol in polygon_points:
        #  pol: n_vertex, 1, 2
        current_poly = []
        for point in pol:
            current_poly.append({"x": int(point[0, 0]), "y": int(point[0, 1])})
        points_dict.append(current_poly)

    return points_dict

get_sorted_patches_by_image(test_results, img_name)

Gets the patches of a given image sorted by patch number.

Parameters:

  • test_results (pd.DataFrame) –

    Pandas dataframe containing test results like the one produced by SklearnClassificationTrainer

  • img_name (str) –

    name of the image used to filter the results.

Returns:

  • pd.DataFrame

    test results filtered by image name and sorted by patch number

Source code in quadra/utils/patch/metrics.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def get_sorted_patches_by_image(test_results: pd.DataFrame, img_name: str) -> pd.DataFrame:
    """Gets the patches of a given image sorted by patch number.

    Args:
        test_results: Pandas dataframe containing test results like the one produced by SklearnClassificationTrainer
        img_name: name of the image used to filter the results.

    Returns:
        test results filtered by image name and sorted by patch number
    """
    img_patches = test_results[test_results["filename"] == os.path.splitext(img_name)[0]]
    patches_idx = np.array(
        [int(os.path.basename(x).split("_")[-1].replace(".png", "")) for x in img_patches["sample"].tolist()]
    )
    patches_idx = np.argsort(patches_idx).tolist()
    img_patches = img_patches.iloc[patches_idx]

    return img_patches

reconstruct_patch(input_img_shape, patch_size, pred, patch_num_h, patch_num_w, idx_to_class, step, return_polygon=True, method='priority')

Reconstructs the prediction image from the patches.

Parameters:

  • input_img_shape (Tuple[int, ...]) –

    The size of the reconstructed image

  • patch_size (Tuple[int, int]) –

    Array defining the patch size

  • pred (np.ndarray) –

    Numpy array containing reconstructed prediction (patch_num_h x patch_num_w)

  • patch_num_h (int) –

    Number of vertical patches

  • patch_num_w (int) –

    Number of horizontal patches

  • idx_to_class (Dict) –

    Dictionary mapping indices to labels

  • step (Tuple[int, int]) –

    Array defining the step size to be used for reconstruction

  • return_polygon (bool) –

    If true compute predicted polygons. Defaults to True.

  • method (str) –

    Reconstruction method to be used. Currently supported: "priority" and "major_voting"

Returns:

  • Tuple[np.ndarray, List[Dict]]

    (reconstructed_prediction_image, predictions) where predictions is an array of objects [{ "label": Predicted_label, "points": List of dict coordinates "x" and "y" representing the points of a polygon that surrounds an image area covered by patches of label = predicted_label }]

Source code in quadra/utils/patch/metrics.py
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
def reconstruct_patch(
    input_img_shape: Tuple[int, ...],
    patch_size: Tuple[int, int],
    pred: np.ndarray,
    patch_num_h: int,
    patch_num_w: int,
    idx_to_class: Dict,
    step: Tuple[int, int],
    return_polygon: bool = True,
    method: str = "priority",
) -> Tuple[np.ndarray, List[Dict]]:
    """Reconstructs the prediction image from the patches.

    Args:
        input_img_shape: The size of the reconstructed image
        patch_size: Array defining the patch size
        pred: Numpy array containing reconstructed prediction (patch_num_h x patch_num_w)
        patch_num_h: Number of vertical patches
        patch_num_w: Number of horizontal patches
        idx_to_class: Dictionary mapping indices to labels
        step: Array defining the step size to be used for reconstruction
        return_polygon: If true compute predicted polygons. Defaults to True.
        method: Reconstruction method to be used. Currently supported: "priority" and "major_voting"

    Returns:
        (reconstructed_prediction_image, predictions) where predictions is an array of objects
            [{
                "label": Predicted_label,
                "points": List of dict coordinates "x" and "y" representing the points of a polygon that
                    surrounds an image area covered by patches of label = predicted_label
            }]
    """
    if method == "priority":
        return _reconstruct_patch_priority(
            input_img_shape,
            patch_size,
            pred,
            patch_num_h,
            patch_num_w,
            idx_to_class,
            step,
            return_polygon,
        )
    if method == "major_voting":
        return _reconstruct_patch_major_voting(
            input_img_shape,
            patch_size,
            pred,
            patch_num_h,
            patch_num_w,
            idx_to_class,
            step,
            return_polygon,
        )

    raise ValueError(f"Invalid reconstruction method {method}")