Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions src/training/degradeimage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ Pix* DegradeImage(Pix* input, int exposure, TRand* randomizer,
input = pix;
int width = pixGetWidth(input);
int height = pixGetHeight(input);

if (exposure >= 2) {
// An erosion simulates the spreading darkening of a dark copy.
// This is backwards to binary morphology,
Expand Down Expand Up @@ -167,6 +168,12 @@ Pix* DegradeImage(Pix* input, int exposure, TRand* randomizer,
}
data += input->wpl;
}

// pix = input;
// input = PrepareDistortedPix(pix, false, true, true, true, true,
// 1, randomizer, nullptr);
// pixDestroy(&pix);

return input;
}

Expand All @@ -181,8 +188,6 @@ Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert,
GenericVector<TBOX>* boxes) {
Pix* distorted = pixCopy(nullptr, const_cast<Pix*>(pix));
// Things to do to synthetic training data.
if (invert && randomizer->SignedRand(1.0) < 0)
pixInvert(distorted, distorted);
if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) {
// TODO(rays) Cook noise in a more thread-safe manner than rand().
// Attempt to make the sequences reproducible.
Expand Down Expand Up @@ -210,6 +215,8 @@ Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert,
(*boxes)[b].set_right((*boxes)[b].left() + 1);
}
}
if (invert && randomizer->SignedRand(1.0) < -0.9)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why -0.9?

I think it should be kept as 0 (50% percents of images inverted).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also,

  • Todo -> TODO (like in other places in the codebase).
  • Why are you keeping the commented code in DegradeImage()?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

0 (50% percents of images inverted).

Is that a realistic expectation of the types of images which will be OCRed?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Todo -> TODO (like in other places in the codebase).

OK.

  • Why are you keeping the commented code in DegradeImage()?

I meant to delete it after the changes to text2image worked fine. I will fix that.

pixInvert(distorted, distorted);
return distorted;
}

Expand Down
28 changes: 28 additions & 0 deletions src/training/text2image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,28 @@ BOOL_PARAM_FLAG(rotate_image, true, "Rotate the image in a random way.");
// Degradation to apply to the image.
INT_PARAM_FLAG(exposure, 0, "Exposure level in photocopier");

// Distort the rendered image by various means according to the bool flags.
BOOL_PARAM_FLAG(distort_image, false,
"Degrade rendered image with noise, blur, invert.");

// Distortion to apply to the image.
BOOL_PARAM_FLAG(invert, true, "Invert the image");

// Distortion to apply to the image.
BOOL_PARAM_FLAG(white_noise, true, "Add Gaussian Noise");

// Distortion to apply to the image.
BOOL_PARAM_FLAG(smooth_noise, true, "Smoothen Noise");

// Distortion to apply to the image.
BOOL_PARAM_FLAG(blur, true, "Blur the image");

// Distortion to apply to the image.
//BOOL_PARAM_FLAG(perspective, false, "Generate Perspective Distortion");

// Distortion to apply to the image.
//INT_PARAM_FLAG(box_reduction, 0, "Integer reduction factor box_scale");

// Output image resolution.
INT_PARAM_FLAG(resolution, 300, "Pixels per inch");

Expand Down Expand Up @@ -619,6 +641,12 @@ static int Main() {
pix = DegradeImage(pix, FLAGS_exposure, &randomizer,
FLAGS_rotate_image ? &rotation : nullptr);
}
if (FLAGS_distort_image) {
//Todo: perspective is set to false and box_reduction to 1.
pix = PrepareDistortedPix(pix, false, FLAGS_invert,
FLAGS_white_noise, FLAGS_smooth_noise, FLAGS_blur,
1, &randomizer, nullptr);
}
render.RotatePageBoxes(rotation);

if (pass == 0) {
Expand Down