The source code notes: July 2021

Saturday, 31 July 2021

Colour image histogram in Python

If you have missed the previous post on image channel decomposition, follow this link and then come back here.

Colour image histogram

We can have two main types of colour image histogram: luminance histogram and component histogram. The principle used is the same as for the grey-level histogram in both the types, with one main difference between them: the first type is obtained from a grey-scale version of the colour image, while the second type, from the image's three separate channels.

Component histogram

To plot the component histogram of a colour image, after decomposing the image's channels into three separate images, each one containing only the colour for each channel, we must convert these images to grayscale. This step should account for the difference in human perception between red, green and blue, not just average the three channels' intensity, as explained here.

In the following picture, we can see the result of the above procedure executed on a 24 bits RGB image:

clockwise from top left:original image, red, blue and green channel

Some problems with the contrast are soon very evident, especially with the blue channel.

Once we have the arrays of the intensity values for each channel, we can apply the same steps to each channel's intensity array, as we would do for a grayscale image. That would produce the histogram for each one of the image's channels, as we can see in the next picture:

Interpreting the histogram

By analyzing the histogram above, it's evident that what it seems, after a quick look, a pretty and well-taken picture, in reality, it's instead a poor quality one and with a lot of room for improvements.

The red and the blue's dynamic range, ad example, it's too low. A little better for the greens, but still not enough intensity values. As a consequence, the contrast of the image lacks variance, and hence the colours look washed-up.

However, we can use for colour images the same contrast improvement technique we learned here, like histogram equalization, which we will learn how to implement in the next post, together with other histogram operations.

The full source code for colour histogram:

from PIL import Image, ImageTk
import tkinter as tk
import sys

def rgb2gray(input_image):
   if input_image.mode != 'RGB':
       return None
   else:
       output_image = Image.new('L', (input_image.width,
                                    input_image.height))

       for x in range(input_image.width):
           for y in range(input_image.height):
               pix = input_image.getpixel((x, y))
               pix = int(pix[0]*0.299 +
                         pix[1]*0.587 +
                         pix[2]*0.114)
               output_image.putpixel((x,y), pix)
      return output_image

def histogram(input_image):
    if input_image.mode != 'L' and input_image.mode != 'P':
        return None
    else:
        IHIST = [0 for i in range(256)]
        HIST = [0 for i in range(256)]
        SUM = 0
        for x in range(input_image.width):
            for y in range(input_image.height):
                pix = input_image.getpixel((x, y))
                IHIST[pix] = IHIST[pix]+1
                SUM += 1
        for i in range(256):
            HIST[i] = float(IHIST[i]/SUM)

       return HIST

def draw_histogram(canvas, IHIST, hist_w, hist_h, color, bins=False):
   hist_max = max(IHIST)## get the max value
   ## Normalize between 0 and hist_h
   for i in range(256):
      IHIST[i] = float(IHIST[i]/hist_max) * hist_h
   ## A bin is a bar of the histogram
   bin_w = round(float(hist_w/256))
   offset = hist_w + 20 ## where we draw the first
   prev_x = offset
   prev_y = hist_h
   for i in range(256):
      if bins:
         canvas.create_line(bin_w*i+offset, hist_h,
            bin_w*i+offset, hist_h-IHIST[i],fill=color)
      else:
         canvas.create_line(prev_x, prev_y,
            bin_w*i+offset, hist_h-IHIST[i],fill=color)
         prev_x = bin_w*i+offset
         prev_y = hist_h-IHIST[i]

def get_channel(input_image, channel_key):
   if input_image.mode != 'RGB':
       return None
   else:
      channels_indices = {'R':0 ,'G':1 ,'B':2}
      output_image = Image.new('RGB', (input_image.width,
                                      input_image.height))
      ch_idx = channels_indices[channel_key]
      for x in range(input_image.width):
         for y in range(input_image.height):
            pix = input_image.getpixel((x, y))
            pix = pix[ch_idx]
            if ch_idx == 0:
               output_image.putpixel((x,y), (pix,0,0))
            elif ch_idx == 1:
               output_image.putpixel((x,y), (0,pix,0))
            elif ch_idx == 2:
               output_image.putpixel((x,y), (0,0,pix))

      return output_image

if __name__ == "__main__":
   root = tk.Tk()
   img = Image.open("retriver.png")
   if img == None:
      print("Error opening image file"),
      sys.exit(1)

   width = img.width*2+20
   height = img.height+20
   root.title("RGB Image histogram")
   root.geometry(f'{width}x{height}')

   output_R = get_channel(img, 'R')
   output_G = get_channel(img, 'G')
   output_B = get_channel(img, 'B')

   gray_R = rgb2gray(output_R)
   gray_G = rgb2gray(output_G)
   gray_B = rgb2gray(output_B)

   IHISTR = histogram(gray_R)
   IHISTG = histogram(gray_G)
   IHISTB = histogram(gray_B)

   hist_w = img.width
   hist_h = img.height

   if output_R != None and output_G!= None and output_B!=None:
      output_R = ImageTk.PhotoImage(output_R)
      output_G = ImageTk.PhotoImage(output_G)
      output_B = ImageTk.PhotoImage(output_B)
      input_im = ImageTk.PhotoImage(img)

      canvas = tk.Canvas(root, width=width, height=height, bg="#ffffff")
      canvas.create_image(width/4-1, height/2-1, image=input_im, state="normal")

      draw_histogram(canvas, IHISTR, hist_w, hist_h, "#f00")
      draw_histogram(canvas, IHISTG, hist_w, hist_h, "#0f0")
      draw_histogram(canvas, IHISTB, hist_w, hist_h, "#00f")

      canvas.place(x=0, y=0)
      canvas.pack()

      root.mainloop()
   else:
        print("Input image must be in 'RGB' colour space")

The code has been already explained in the previous posts. But, if something is not clear, ask for it in the comment section.

Friday, 30 July 2021

RGB channel decomposition - Python implementation

In the previous post, we have seen histogram equalization for grayscale images. Before learning about colour image histogram, we must know how to separate the channels of an image to be able to process them separately. We will learn how to do that in this post.

For this, we will use an RGB image with a bit depth of 8 bits per channel. I wrote a simple function which returns a copy of the input image containing only the data for a single channel. It's possible to select the required channel using a key ('R', 'G' or 'B') passed as an argument to the function:

def get_channel(input_image, channel_key):
   if input_image.mode != 'RGB':
       return None
   else:
      channels_indices = {'R':0 ,'G':1 ,'B':2}
      output_image = Image.new('RGB', (input_image.width,
                                                             input_image.height))
      ch_idx = channels_indices[channel_key]
      for x in range(input_image.width):
         for y in range(input_image.height):
            pix = input_image.getpixel((x, y))
            pix = pix[ch_idx]
            if ch_idx == 0:
               output_image.putpixel((x,y), (pix,0,0))
            elif ch_idx == 1:
               output_image.putpixel((x,y), (0,pix,0))
            elif ch_idx == 2:
               output_image.putpixel((x,y), (0,0,pix))

      return output_image

the function above takes an input image and a key as arguments. The key is used to choose the relevant channel's index for the pixel's tuple. Let's go through the most relevant lines:

channels_indices dict to map keys to indices,

channels_indices = {'R':0 ,'G':1 ,'B':2}

ch_idx gets 0, 1 or 2 based on channel_key value

ch_idx = channels_indices[channel_key]

first we get the pixel's tuple, then we get the channel's value from it:

pix = input_image.getpixel((x, y))
pix = pix[ch_idx]

we also use ch_idx to select the output channel:

if ch_idx == 0:
   output_image.putpixel((x,y), (pix,0,0))
elif ch_idx == 1:
   output_image.putpixel((x,y), (0,pix,0))
elif ch_idx == 2:
   output_image.putpixel((x,y), (0,0,pix))

In the following picture we can see the original image with the three channels separated:

and this is the full code:

from PIL import Image, ImageTk
import tkinter as tk

def get_channel(input_image, channel_key):
   if input_image.mode != 'RGB':
       return None
   else:
      channels_indices = {'R':0 ,'G':1 ,'B':2}
      output_image = Image.new('RGB', (input_image.width,
                                      input_image.height))
      ch_idx = channels_indices[channel_key]
      for x in range(input_image.width):
         for y in range(input_image.height):
            pix = input_image.getpixel((x, y))
            pix = pix[ch_idx]
            if ch_idx == 0:
               output_image.putpixel((x,y), (pix,0,0))
            elif ch_idx == 1:
               output_image.putpixel((x,y), (0,pix,0))
            elif ch_idx == 2:
               output_image.putpixel((x,y), (0,0,pix))

      return output_image

if __name__ == "__main__":
   root = tk.Tk()
   img = Image.open("retriver.png")

   width = img.width*2+20
   height = img.height*2+20
   root.title("RGB channel decomposition")
   root.geometry(f'{width}x{height}')

   output_R = get_channel(img, 'R')
   output_G = get_channel(img, 'G')
   output_B = get_channel(img, 'B')

   hist_w = img.width
   hist_h = img.height

   if output_R != None and output_G!= None and output_B!=None:
      output_R = ImageTk.PhotoImage(output_R)
      output_G = ImageTk.PhotoImage(output_G)
      output_B = ImageTk.PhotoImage(output_B)
      input_im = ImageTk.PhotoImage(img)

      canvas = tk.Canvas(root, width=width, height=height, bg="#ffffff")
      canvas.create_image(width/4-1, height/4-1, image=input_im, state="normal")
      canvas.create_image(3*width/4, height/4-1, image=output_R, state="normal")
      canvas.create_image(width/4-1, 3*height/4-1, image=output_G, state="normal")
      canvas.create_image(3*width/4, 3*height/4-1, image=output_B, state="normal")

      canvas.place(x=0, y=0)
      canvas.pack()

      root.mainloop()
   else:
        print("Input image must be in 'RGB' colour space")

Now we can learn how to plot an histogram for a colour image. Follow me in the next post to see how it is done and how it can help us.

Wednesday, 28 July 2021

Histogram equalization - python implementation

As I have mentioned in the previous post, it's possible to process an image's histogram to spread the distribution of grey level values. This procedure is called histogram equalization. By equalizing the histogram, we redistribute the image's grey level values uniformly so that the number of pixels at any value becomes almost equivalent. More formally, this technique gives a linear trend to the cumulative probability function associated with the image.

Suppose that we have an image f(x,y), and its histogram h(i). The cumulative distribution function of h(i) is as follow:

it can be proved that the above transform makes y = c(i) to follow a uniform distribution. Thus, for a 256 gray-level image, the following equation defines the transformation to equalize its histogram:

where n is the total number of pixels in the image.

This operation increases the contrast of the image, which becomes much clear and filled with details.

Histogram equalization in Python

As a first thing, we need a function to compute the image's histogram, about which you can read in the previous post:

def histogram(input_image):
    if input_image.mode != 'L' and input_image.mode != 'P':
        return None
    else:
        IHIST = [0 for i in range(256)]
        HIST = [0 for i in range(256)]
        SUM = 0
        for x in range(input_image.width):
            for y in range(input_image.height):
                pix = input_image.getpixel((x, y))
                IHIST[pix] = IHIST[pix]+1
                SUM += 1
        for i in range(256):
            HIST[i] = float(IHIST[i]/SUM)

       return HIST

then we can write our histogram equalization function:

def histogram_equalization(input_image):
    if input_image.mode != 'L' and input_image.mode != 'P':
        return None
    else:
        HISTEQ = [0 for i in range(256)]
        SUM = 0.0
        HIST = histogram(input_image)
        for i in range(256):
            SUM = 0.0
            for j in range(i):
                SUM += HIST[j]
            HISTEQ[i] = int(255*SUM+0.5)
        output_image = Image.new('L', (input_image.width,
                                        input_image.height))
        for y in range(input_image.height):
            for x in range(input_image.width):
                pix = input_image.getpixel((x, y))
                output_image.putpixel((x, y), HISTEQ[pix])
        return HISTEQ, output_image

The function above computes the histogram of the input image. Then, from this histogram, then the cumulative histogram is obtained (first nested loop). Finally, a new image is created and populated using the grey levels pixel values stored by the cumulative histogram (second nested loop). This image is then returned together with the histogram.

Below we can see the original greyscale image and its histogram:

original image and its histogram

the next picture shows the image after histogram equalization and its new histogram:

Picture A

Even though the plot doesn't show a scale, it's easy to see how the grey level values of the pixels are now more spread than in the previous image.

In the next picture we can see the plot of the cumulative histogram:

Picture B

The above histogram is that returned by the histogram_equalization function.

If we compute and plot the above picture's cumulative histogram, we obtain the results shown in the next picture:

Picture C

we can see how the resulting cumulative histogram approximates a uniformly distributed image.

Histogram equalization implementation in Python

To obtain the results visible in the above pictures, we only need the two fumctions given at the beginning of this post, but we need to alternate them in the following manner:

Picture A:

HISTEQ, out_im = histogram_equalization(img)
HISTEQ = histogram(out_im)

Picture B:

HISTEQ, out_im = histogram_equalization(img)

Picture C:

HISTEQ, out_im = histogram_equalization(img)
HISTEQ, out_im = histogram_equalization(out_im)

the following is the complete code for this post:

from PIL import Image, ImageTk
import tkinter as tk

def histogram(input_image):
    if input_image.mode != 'L' and input_image.mode != 'P':
        return None
    else:
        IHIST = [0 for i in range(256)]
        HIST = [0 for i in range(256)]
        SUM = 0
        for x in range(input_image.width):
            for y in range(input_image.height):
                pix = input_image.getpixel((x, y))
                IHIST[pix] = IHIST[pix]+1
                SUM += 1
        for i in range(256):
            HIST[i] = float(IHIST[i]/SUM)

       return HIST

def histogram_equalization(input_image):
    if input_image.mode != 'L' and input_image.mode != 'P':
        return None
    else:
        HISTEQ = [0 for i in range(256)]
        SUM = 0.0
        HIST = histogram(input_image)
        for i in range(256):
            SUM = 0.0
            for j in range(i+1):
                SUM += HIST[j]
            HISTEQ[i] = int(255*SUM+.5)
        output_image = Image.new('L', (input_image.width,
                                        input_image.height))
        for y in range(input_image.height):
            for x in range(input_image.width):
                pix = input_image.getpixel((x, y))
                output_image.putpixel((x, y), HISTEQ[pix])
        return HISTEQ, output_image

def draw_histogram(canvas, IHIST, hist_w, hist_h):
        ## A bin is a bar of the histogram
        bin_w = round(float(hist_w/512))
        offset = hist_w + 20 ## where we draw the first bin
        for i in range(256):
            canvas.create_line(bin_w*i+offset, hist_h,
                               bin_w*i+offset, hist_h-IHIST[i])

if __name__=="__main__":
    root = tk.Tk()
    root.title("IMAGE GREY-LEVEL HISTOGRAM EQUALIZED")
    img = Image.open("retriver_gray.png")

    width = img.width*2
    height = img.height
    root.geometry(f'{width}x{height}')

    HISTEQ, out_im = histogram_equalization(img)
    HISTEQ = histogram(out_im)
    ## HISTEQ, out_im = histogram_equalization(out_im)

   if HISTEQ != None:
        hist_w = img.width
        hist_h = img.height

        hist_max = max(HISTEQ) ## get the max value
        ## Normalize between 0 and hist_h
        for i in range(256):
            HISTEQ[i] = float(HISTEQ[i]/hist_max) * hist_h

        output_im = ImageTk.PhotoImage(out_im)

        canvas = tk.Canvas(root, width=width, height=height, bg="#ffffff")
        canvas.create_image(width/4-1, height/2-1, image=output_im, state="normal")

        draw_histogram(canvas, HISTEQ, hist_w, hist_h)

        canvas.place(x=0, y=0)
        canvas.pack()

        root.mainloop()
    else:
        print("Input image's mode must be 'L' or 'P'"\
              "Check https://pillow.readthedocs.io/en/"\
              "latest/handbook/concepts.html#concept-modes")

That's all for now. I hope you're enjoying and finding interesting this series on image processing in Python. Please, comment, share and get in touch if you find any error or if you would like to request a specific algorithm. In the next post we will learn about more histogram operations and colour image's histogram. Stay tuned.

Monday, 19 July 2021

Grey-level image histogram - Pyhton implementation

As promised in the previous post, we will talk about the image's histogram.

Plotting data obtained from images is helpful to study and understand them. One plot type that can be helpful for any image processing practitioner is the grey-level histogram. The grey-level histogram of an image gives us, as it's easy to deduce, the grey-level distribution of the pixels within the image.

The histogram of an image is the set of numbers M, the value of which representing what percentage of the image is at that grey level. The following formula defines the histogram of an image:

hi = ni/nt for i = 0 to (M-1)

in which ni is the number of pixels within the image at the ith grey level value and nt is the total number of pixels in the image.

The following function implements a grey-level histogram for a greyscale image:

def histogram(input_image):
    if input_image.mode != 'L' and input_image.mode != 'P':
        return None
    else:
        IHIST = [0 for i in range(256)]
        HIST = [0 for i in range(256)] ## histogram array
        SUM = 0
        for x in range(input_image.width):
            for y in range(input_image.height):
                pix = input_image.getpixel((x, y))
                IHIST[pix] = IHIST[pix]+1
                SUM += 1
        for i in range(256):
            HIST[i] = float(IHIST[i]/SUM)

       return HIST

The picture below shows the image and its grey-level histogram:

Histogram interpretation

Histogram interpretation deserves a tutorial of its own, especially if we want to talk also about colour images histogram.
For now, we should understand at least that by examining the histogram of an image is possible to detect some problem caused during its acquisition, as poor or excessive contrast, or a low dynamic range.
We should also be aware of the existence of some techniques involving histograms that can help us to improve our images, as histogram equalization, which we'll discuss in the next post.

The complete code:

from PIL import Image, ImageTk
import tkinter as tk

def histogram(input_image):
    if input_image.mode != 'L' and input_image.mode != 'P':
        return None
    else:
        IHIST = [0 for i in range(256)]
        HIST = [0 for i in range(256)]
        SUM = 0
        for x in range(input_image.width):
            for y in range(input_image.height):
                pix = input_image.getpixel((x, y))
                IHIST[pix] = IHIST[pix]+1
                SUM += 1
        for i in range(256):
            HIST[i] = float(IHIST[i]/SUM)

       return HIST

def draw_histogram(canvas, IHIST, hist_w, hist_h):
        ## A bin is a bar of the histogram
        bin_w = round(float(hist_w/512))
        offset = hist_w + 20 ## where we draw the first bin
        for i in range(256):
            canvas.create_line(bin_w*i+offset, hist_h,
                               bin_w*i+offset, hist_h-IHIST[i])

if __name__=="__main__":
    root = tk.Tk()
    root.title("IMAGE GREY-LEVEL HISTOGRAM")
    img = Image.open("retriver_gray.png")

    width = img.width*2
    height = img.height
    root.geometry(f'{width}x{height}')

    IHIST = histogram(img)
    if IHIST != None:
        hist_w = img.width
        hist_h = img.height

        hist_max = max(IHIST) ## get the max value
        ## Normalize between 0 and hist_h
        for i in range(256):
            IHIST[i] = float(IHIST[i]/hist_max) * hist_h

        input_im = ImageTk.PhotoImage(img)

        canvas = tk.Canvas(root, width=width, height=height, bg="#ffffff")
        canvas.create_image(width/4-1, height/2-1, image=input_im, state="normal")

        draw_histogram(canvas, IHIST, hist_w, hist_h)

        canvas.place(x=0, y=0)
        canvas.pack()

        root.mainloop()
    else:
        print("Input image's mode must be 'L' or 'P'"\
              "Check https://pillow.readthedocs.io/en/"\
              "latest/handbook/concepts.html#concept-modes")

Saturday, 17 July 2021

Contrast stretching in Python

Contrast can be defined as the spread in the value of brightness, within a specific range, in an image. An image with a high contrast contains a low number of shades or colours, so high contrast isn't always good, like in pictures of people or scenery, for example, as these might have lost informative details. But, this property of images can be helpful if we need to isolate some feature that could be revealed or made clear by increasing the contrast. And, in the opposite case, we could reduce the contrast for making an area more uniform and smooth.

Contrast stretching

The formula to modify the contrast is similar to that for brightness given in the previous post, where we use multiplication, with the difference that, to avoid saturation and too much brightening, it is common to first subtract some value from the pixels values and then perform the multiplication:

P0(x,y) = (P1(x,y) - 100) * 2.0

where P1 is the pixel at (x,y) position in the input image, and P0 is the pixel of the output one. For RGB images, the formula is to apply to all the channels. Note that I chosen 2.0 as the value for the multiplication because it was the value that, to my eyes, returned a better result. I could never stress enough the importance of our personal judgement when we try to get an optimal result.

Here is the code implementing contrast stretching in a greyscale image:

from PIL import Image, ImageTk
import tkinter as tk

def contrast(input_image, value):
    if input_image.mode != 'L' and input_image.mode != 'P':
        return None
    else:
        output_image = Image.new('L', (input_image.width,
                                     input_image.height))

        for x in range(input_image.width):
            for y in range(input_image.height):
                pix = input_image.getpixel((x, y))
                output_image.putpixel((x,y), int((pix-100) * value))

       return output_image

def main():
    root = tk.Tk()
    img = Image.open("retriver_gray.png", formats=['PNG'])

    width = img.width*2+20
    height = img.height
    root.geometry(f'{width}x{height}')

    output_im = contrast(img, 2.0)

    if output_im != None:
        output_im = ImageTk.PhotoImage(output_im)
        input_im = ImageTk.PhotoImage(img)

        canvas = tk.Canvas(root, width=width, height=height, bg="#ffffff")
        canvas.create_image(width/4-1, height/2-1, image=input_im, state="normal")
        canvas.create_image(20+3*img.width/2-1, img.height/2-1, image=output_im, state="normal")
        canvas.place(x=0, y=0)
        canvas.pack()

        root.mainloop()
    else:
        print("Input image's mode must be 'L' or 'P'"\
              "Check https://pillow.readthedocs.io/en/"\
              "latest/handbook/concepts.html#concept-modes")

if __name__ == "__main__":
    main()

This is the result produced:

The following is a bonus for those of you who came here expecially for the code: contrast stretching for RGB images:

def contrast_rgb(input_image, value):
    if input_image.mode == 'L' or input_image.mode == 'P':
        output_image = Image.new('L', (input_image.width,
                                     input_image.height))

        for x in range(input_image.width):
            for y in range(input_image.height):
                pix = input_image.getpixel((x, y))
                value = value != None and value or 0
                output_image.putpixel((x,y), int((pix-100) * value))
    elif input_image.mode == 'RGB':
        output_image = Image.new('RGB', (input_image.width,
                                         input_image.height))

        for x in range(input_image.width):
            for y in range(input_image.height):
                pix = input_image.getpixel((x, y))
                output_image.putpixel((x,y), (int((pix[0]-100) * value),
                                              int((pix[1]-100) * value),
                                              int((pix[2]-100) * value)))
    else:
        return None
    return output_image

The result:

If you enjoy this content and would like to see more of it, or if you wish to know about a specific algorithm, please comment and also share this series. If you haven't read the older posts, head for the first one at this link. or read the next.

What is brightness and why we care

In this tutorial, we'll learn about brightness and contrast in greyscale and RGB images, and how these properties, can be optimized to our advantage. This is the second part of a series on image processing in python and this is the first part.

What is brightness?

The brightness is the intensity of the light represented at a pixel. This brightness is related to the amount of light energy a pixel adds to the scene at its location. In grayscale images, 0 is the smallest amount, and 255 is the greatest. In a colour image, like an RGB image, the value of brightness is computed by averaging the three channels. Thus, we can say that the brightness of pure red in an RGB image is 85 ((255+0+0)/3).

Sometimes, an image can be either too bright or too dark, so we need to apply some correction. All we need to do in such cases is increasing or decreasing the value of the pixels. In a greyscale image we would apply the following formula:

\[P0(x,y) = P1(x,y) + 10\]

where P1 is the pixel in the original image at (x,y) position and P0 is the pixel in the corrected image at the same position. For an RGB image, we should apply the operation on each channel, paying attention not to desaturate the image. Desaturation happens when the value of the three channels become more or less similar. Let's consider a pixel which triplet is (235,160,200). Adding 50 would give us this new triplet: (255,210,250). Red would be set to 255, as it cannot go further, while the brightness of green and blue would increase disproportionately. Things would get even worse when we are dealing with primary colours, as these colours would lose their purity.

To mitigate the effects of desaturation, we can use multiplication:

\[P0(x,y) = P1(x,y)*1.3\]

Switching to multiplication doesn't always work as we can still desaturate an image, and in the end, we must trust our judgement when trying to obtain the best result.

Brightness correction implementation

The following code implements a simple function to increase or decrease an image brightness.

from PIL import Image, ImageTk
import tkinter as tk

def brightness(input_image, value):
    if input_image.mode != 'L' and input_image.mode != 'P':
        return None
    else:
        output_image = Image.new('L', (input_image.width,
                                     input_image.height))

        for x in range(input_image.width):
            for y in range(input_image.height):
                pix = input_image.getpixel((x, y))
                output_image.putpixel((x,y), int(pix + value))

       return output_image

def main():
    root = tk.Tk()
    img = Image.open("retriver_gray.png", formats=['PNG'])

    width = img.width*2+20
    height = img.height
    root.geometry(f'{width}x{height}')

    output_im = brightness(img, 50)

    if output_im != None:
        output_im = ImageTk.PhotoImage(output_im)
        input_im = ImageTk.PhotoImage(img)

        canvas = tk.Canvas(root, width=width, height=height, bg="#ffffff")
        canvas.create_image(width/4-1, height/2-1, image=input_im, state="normal")
        canvas.create_image(20+3*img.width/2-1, img.height/2-1, image=output_im, state="normal")
        canvas.place(x=0, y=0)
        canvas.pack()

        root.mainloop()
    else:
        print("Input image's mode must be 'L' or 'P'"\
              "Check https://pillow.readthedocs.io/en/"\
              "latest/handbook/concepts.html#concept-modes")

if __name__ == "__main__":
    main()

This is the result of increasing the image's brightness of 50:

There is very little to say about the code apart from that I used addition to increase brightness. I leave to the reader experimenting with multiplication. If you are interested in more image processing algorithms, read the next post about contrast stretching in Python.

Thursday, 15 July 2021

Image processing in Python

This is the first post of a series on image processing theory and practice. We will study some of the most important algorithms used in this field and we will learn how to implement them. I chose Python as the implementation language because this will make this series approachable for a large audience.

Python has become the language of choice for many scientists, students and engineers involved in machine vision and A.I. in general. Probably, the reasons for that are Python's easy syntax and Python's portability. But, without doubt, Python owns a good part of its success also to the large number of powerful and well-established libraries existing for the language, which allow users to perform nearly any kind of task with images and other types of data. Why bother to follow this tutorial, then? Firstly, because learning it's fun. Secondly, because if you are serious about your domain of expertise, you should have a good understanding of what a library (or whatever tool you use) is doing with your data, so to be able to choose the right functionality for your tasks, and in the right order.

What's image processing and why do we need it

A digital image is a signal, thus as such, it can be analysed and processed, to modify or to improve its properties. As with all signals, an image can have some noise in it that has to be removed, or maybe it needs to go through some transformation step before it can be used for some scope. Image processing is the study and the application of the mathematics needed to perform these improvements and transformations, utilizing computers.

A digital image can be represented as a bidimensional array of pixels, where each pixel is a sample of the image. Depending on the type of image, a pixel can have 1, 3 or 4 values associated with it. Such values are also called channels. Each channel holds a component of the image, like the level of one of the primary colours. The components of a pixel may vary a lot, depending on the encoding of the image (e.g. the file format) and the implementation algorithm. Also, the bit depth of an image can vary. The bit depth is the number of memory's bits used to hold the value associated with one pixel's channel.

When we consider all the components of a pixel and the range of values they can hold, we call these the image's colour space. Among all, the most common colour space used in digital images is probably the RGB, used also in PC's monitors and scanners. Printers use CMYK (cyan, magenta, yellow, and black).

The pixel of an RGB image has usually 8 bits per channel, thus in such a case, we say that the image has a bit depth of 24 bits, for a total of 3 bytes of data per pixel.

Another important type of image is the greyscale. This image is characterized by the absence of colours, except for the black, white and a range of shades of grey obtained by computing the amount of light carried by each pixel. If only black and white are present, we have a binary image, another important image type for machine vision applications. Grayscale images are lightweight images as they only need 1 byte per pixel (8 bits). In grayscale images, each pixel can be rendered with a value between 0 and 255, and that takes the same amount of memory's space as an ASCII character. These properties make this type of image more suitable for tasks where a lot of memory and computational power is needed. It's not for a case that in many algorithms, the first processing step on a colour image is to convert it from its colour space to grayscale.

Colour space conversion

As I mentioned above, sometimes we need to convert the colour space of our images to a model more suitable for our needs. We can convert any colour space to another, but the most important conversion it's, without doubt, the conversion to greyscale. The simplest method to achieve this with an RGB image is to average the value of the three channels:

\[P = (R+G+B)/3\]

where P is the value of the pixel in the output image. Other algorithms are less naive, and take into consideration how we perceive the colours and their properties, as in the case of the gamma compression and the channel-dependent luminance algorithms. The former considers how luminance affects the way we perceive differences in colours, the latter how our eyes are more sensitive to green rather than blue or red. The implementation of gamma correction is expensive but gives a more accurate result, while the opposite is true for the channel-dependent luminance. Another algorithm do exist which provide a good balance between computational costs and luminance correctness, and that is the linear approximation, also used by some popular imaging libraries, like Opencv and Pillow:

\[P = 0.299R + 0.587G + 0.114B\]

Implementing RGB to Grayscale conversion

If you don't have Pillow (PIL fork) installed, you need it if you want to try the code in this tutorial. If you use pip as a package manager, just install it with the command pip install pillow.

Pillow has quite a few ready-made filters implemented, but we will use it only because makes it very easy for us to access an image's data. We will also use Tkinter, from Python's standard library, as a display interface.

Let's import our libraries:

from PIL import Image, ImageTk

import tkinter as tk

follows our conversion function:

def rgb2gray(input_image):

if input_image.mode != 'RGB':

return None

else:

output_image = Image.new('L', (input_image.width,

input_image.height))

for x in range(input_image.width):

for y in range(input_image.height):

pix = input_image.getpixel((x, y))

pix = int(pix[0]*0.299 +

pix[1]*0.587 +

pix[2]*0.114)

output_image.putpixel((x,y), pix)

return output_image

this line checks that we have an input image that is compatible with this function:

if input_image.mode != 'RGB':

return None

if this is the case, we create a new image in 'L' mode, which means 8 bits per pixel. Then, we iterate over all the input image's pixels, retrieving the pixel's tuple for each position (x,y) visited:

output_image = Image.new('L', (input_image.width,

input_image.height))

for x in range(input_image.width):

for y in range(input_image.height):

pix = input_image.getpixel((x, y))

we use the tuple's three values (r,g,b) to create the gray pixel, by applying linear approximation. Note that we reuse the pix variable. Next step, we set the output image's pixel:

pix = int(int(pix[0]*0.299)+

int(pix[1]*0.587)+

int(pix[2]*0.114))

output_image.putpixel((x,y), pix)

finally, the driver's code:

if __name__ == "__main__":

root = tk.Tk()

img = Image.open("retriver.png")

width = img.width*2+20

height = img.height

root.geometry(f'{width}x{height}')

output_im = rgb2gray(img)

if output_im != None:

output_im = ImageTk.PhotoImage(output_im)

input_im = ImageTk.PhotoImage(img)

canvas = tk.Canvas(root, width=width, height=height, bg="#ffffff")

canvas.create_image(width/4-1, height/2-1, image=input_im, state="normal")

canvas.create_image(20+3*img.width/2-1, img.height/2-1, image=output_im, state="normal")

canvas.place(x=0, y=0)

canvas.pack()

root.mainloop()

else:

print("Input image must be in 'RGB' colour space")

below you can see the result after applying the rgb2gray function to the input image:

The complete code:

from PIL import Image, ImageTk
import tkinter as tk

def rgb2gray(input_image):
    if input_image.mode != 'RGB':
        return None
    else:
        output_image = Image.new('L', (input_image.width,
                                     input_image.height))
        for x in range(input_image.width):
            for y in range(input_image.height):
                pix = input_image.getpixel((x, y))
                pix = int(int(pix[0]*0.299)+
                          int(pix[1]*0.587)+
                          int(pix[2]*0.114))
                output_image.putpixel((x,y), pix)

       return output_image

if __name__ == "__main__":

    root = tk.Tk()
    img = Image.open("retriver.png")

    width = img.width*2+20
    height = img.height
    root.geometry(f'{width}x{height}')

    output_im = rgb2gray(img)
    if output_im != None:
        output_im = ImageTk.PhotoImage(output_im)
        input_im = ImageTk.PhotoImage(img)

        canvas = tk.Canvas(root, width=width, height=height, bg="#ffffff")
        canvas.create_image(width/4-1, height/2-1, image=input_im, state="normal")
        canvas.create_image(20+3*img.width/2-1, img.height/2-1, image=output_im, state="normal")
        canvas.place(x=0, y=0)
        canvas.pack()

        root.mainloop()
    else:
        print("Input image must be in 'RGB' colour space")

Final words

In this first post, we have seen what image processing is and how to implement a simple RGB to the greyscale converter. In the next post, we will study and implement another algorithm to deepen our knowledge about image processing and machine vision.

MathJax 3

Saturday, 31 July 2021

Friday, 30 July 2021

Wednesday, 28 July 2021

Monday, 19 July 2021

Saturday, 17 July 2021

Thursday, 15 July 2021