Lecture 2 updates and fixes

af744144 · Škoviera, Radoslav, Mgr., Ph.D. · c1b66a81 · af744144 · af744144
Commit af744144 authored 1 month ago by Škoviera, Radoslav, Mgr., Ph.D.
--- a/src/pge_lectures/lecture_02/l2_matrices_processing.ipynb
+++ b/src/pge_lectures/lecture_02/l2_matrices_processing.ipynb
--- a/src/pge_lectures/lecture_02/l2_matrices_processing.qmd
+++ b/src/pge_lectures/lecture_02/l2_matrices_processing.qmd
@@ -192,7 +192,8 @@ def shuffle_matrix(matrix):
    for r in range(n_row):
        for c in range(m_cols):
            flat_index = r * m_cols + c
-            shuffled_matrix[r][c] = matrix[int(flat_indices[flat_index] / m_cols)][flat_indices[flat_index] % m_cols]
+            shuffled_matrix[r][c] = matrix[int(flat_indices[flat_index]
+            / m_cols)][flat_indices[flat_index] % m_cols]

    return shuffled_matrix

@@ -270,8 +271,9 @@ def multiply_two_matrices(mat_A, mat_B):
    n_col_B = len(mat_B[0])

    if n_col_A != n_row_B:  # the matrices need to have the correct shapes
-        raise ValueError(f"Matrices A and B cannot be multiplied. "
-                         f"Matrix B needs to have the same number of columns (has {n_col_B}) as matrix A has rows (has {n_row_A}).")
+        raise ValueError("Matrices A and B cannot be multiplied. "
+                         "Matrix B needs to have the same number of columns "
+                         f"(has {n_col_B}) as matrix A has rows (has {n_row_A}).")
    # this function will not be able to deal with broadcasting!
    mat_C = create_empty(n_row_A, n_col_B)
    for r in range(n_row_A):
@@ -323,7 +325,8 @@ def find_equal_values(mat_A, mat_B, unique=False):
    for r in range(n_row_B):
        flat_values_B.extend(mat_B[r])

-    if unique:  # remove duplicates, otherwise the `remove` method below might not be enough
+    if unique:  # remove duplicates,
+        # otherwise the `remove` method below might not be enough
        remove_duplicates(flat_values_B)

    equal_values = []
@@ -417,7 +420,7 @@ img = [
    [  0,   0,   0,   0, 100, 100,   0,   0,   0,   0],
 ]

-print(img)
+print(pretty_print_as_rows(img))

 from matplotlib import pyplot as plt

@@ -432,7 +435,7 @@ plt.show()

 #### Linear search

-Requires iterating over the array, asymptotic runtime is $O(n)$.
+Requires iterating over the array, until the item with the correct value is found.

 ```{python}
 import numpy as np
@@ -452,7 +455,7 @@ print("Linear search run time:")

 ##### Computational complexity side-quest

-Even though the 'asymptotic' time of linear search is $O(n)$, the 'actual' time depends on the 'properties' of the array - where the item is located in the array.
+The asymptotic time of the linear search is $O(n)$, the 'actual' time depends on the 'properties' of the array - where the item is located in the array.

 ```{python}
 N = 100
@@ -472,7 +475,7 @@ print("Searching for an item at the end of the array:")

 #### Binary search

-Requires pre-sorted array but asymptotic runtime is $O(\log n)$ - we only look which "half" of the array might contain the item.
+Requires pre-sorted array but asymptotic runtime is $O(\log n)$ - we only look which "half" of the array might contain the item, then discard the other half.

 ```{python}
 def binary_search_loop(a, x):
@@ -503,7 +506,8 @@ def binary_search_recursion(a, x):
        # otherwise, we look at the left half of the array
        return binary_search_recursion(a[:mid_idx], x)

-# recursion is slower in this case, as we are copying the whole array, instead of just using the index.
+# recursion is slower in this case, as we are copying the whole array,
+# instead of just using the index.
 sa = np.sort(a)
 print("Binary search run time:")
 %timeit binary_search_loop(sa, 5)
@@ -518,22 +522,25 @@ print("'Fair' binary search run time (includes sorting):")
 The advantage of binary search is relatively constant search time. As you can see from the following, the search time is shorter than "average" search time for linear search: $O(\log n) < O(n)$. This includes even sorting the array.

 ```{python}
-print("Searching for an item in the middle of the array, using binary search:")
+print("Searching for an item in the middle of the array, "
+"using binary search:")
 searched_item = sa[N // 2]
 %timeit binary_search_loop(np.sort(a), searched_item)

-print("Searching for an item at the beginning of the array, using binary search:")
+print("Searching for an item at the beginning of the array, "
+"using binary search:")
 searched_item = sa[0]
 %timeit binary_search_loop(np.sort(a), searched_item)

-print("Searching for an item at the end of the array, using binary search:")
+print("Searching for an item at the end of the array, "
+"using binary search:")
 searched_item = sa[-1]
 %timeit binary_search_loop(np.sort(a), searched_item)
 ```

 #### Interpolation search

-Even more restrictive requirements than binary search - the array needs to be 'uniformly' (equal distribution of values) sorted. It uses similar approach as binary search but uses "an educated guess" of where the item might be (this is where the uniformity comes into play). The is $O(\log n)$
+Even more restrictive requirements than binary search - the array needs to be 'uniformly' (equal distribution of values) sorted. It uses similar approach as binary search but uses "an educated guess" of where the item might be (this is where the uniformity comes into play). The asymptotic run time is $O(\log(log n))$.

 ```{python}
 def interpolation_search(a, x):
@@ -615,7 +622,8 @@ N = 100
 a = np.random.choice(N**3, size=N, replace=True)
 searched_item = a[N // 2]
 occurrences = find_all(a, searched_item)
-print("The value", searched_item, "occurs", len(occurrences), "times in the array at indices", occurrences)
+print("The value", searched_item, "occurs", len(occurrences),
+    "times in the array at indices", occurrences)
 ```

 ### 'Statistical' computations on arrays
@@ -687,7 +695,8 @@ Summing items between two indices is useful for many tasks. For example, computi
 ```{python}
 import numpy as np
 temp_measurements = (np.random.rand(200) * 10).tolist()
-print("Input array: " + ', '.join([f"{x:.2f}" for x in temp_measurements[:10]]) + ", ...")
+print("Input array: " + ', '.join([f"{x:.2f}"
+    for x in temp_measurements[:10]]) + ", ...")

 def range_sum(a, start, end):
    s = 0
@@ -756,9 +765,11 @@ def range_sum_cs(a_cumsum, start, end):

 print(my_list)
 start, end = 3, 7
-print(f"Sum between indices {start} and {end}:", range_sum_cs(cumulative_sum(my_list), start, end))
+print(f"Sum between indices {start} and {end}:",
+    range_sum_cs(cumulative_sum(my_list), start, end))
 # sanity check with the "simple" method:
-print("This is the same as with the simple `range_sum` method:", range_sum(my_list, start, end) == range_sum_cs(cumulative_sum(my_list), start, end))
+print("This is the same as with the simple `range_sum` method:",
+    range_sum(my_list, start, end) == range_sum_cs(cumulative_sum(my_list), start, end))
 ```

 Now, let's say we have the cumulative sum precomputed and we want to compute the range sum for 100 different ranges. How long will it take?
@@ -793,7 +804,8 @@ def range_average(csa, start, end):
 a = list(range(10))
 csa = cumulative_sum(a)
 print("Input array:", a)
-print("Average value of elements between indices 2 and 5 (inclusive):", range_average(csa, 2, 5))
+print("Average value of elements between indices 2 and 5 (inclusive):",
+    range_average(csa, 2, 5))
 ```

 ### Integral image (summed-area table, cumulative sum in 2D)
@@ -831,11 +843,11 @@ bottom_right = (7, 6)  # c, d

 Let's break it down: We want to compute the sum of the gray area - area of interest (AOI) in @fig-integral_image_full. To do that, we need to take the "total sum" (delimited by red rectangle in @fig-integral_image_full) of the area from $[0, 0]$ to $[c, d]$. That is, the sum from the origin of the image to the bottom_right corner of the AOI. This sum has the value at $ii[c][d]$. Then, we subtract the two areas from origin to the top-right corner of the AOI (delimited by blue rectangle in @fig-integral_image_full) and the area from the origin to the bottom-left corner of the AOI (delimited by green rectangle in @fig-integral_image_full). The sums of these areas are located at $ii[a-1][d]$ and $ii[c][b-1]$. This way, we subtracted twice the sum of the area from the origin to just above the top-left corner of the AOI. We need to add it once back-in. Therefore, the last step is to add the sum of this area (delimited by orange rectangle in @fig-integral_image_full) that is located at $ii[a-1][b-1]$. To recap, the full equation is:

-$$area_sum[[a, b], [c, d]] = ii[c][d] - ii[a-1][d] - ii[c][b-1] + ii[a-1][b-1]$$
+$$area\_sum[[a, b], [c, d]] = ii[c][d] - ii[a-1][d] - ii[c][b-1] + ii[a-1][b-1]$$

 Specifically, in our case:

-$$area_sum[[5, 3], [7, 6]] = ii[7][6] - ii[4][6] - ii[7][3] + ii[4][3]$$
+$$area\_sum[[5, 3], [7, 6]] = ii[7][6] - ii[4][6] - ii[7][3] + ii[4][3]$$

 | general term     | current case term | color in @fig-integral_image_full |
 |:-----------------|:------------------|:----------------------------------|
@@ -897,16 +909,16 @@ def draw_rectangle(ax, top_left, bottom_right, color, width=6, fill_only=False):
    )
    ax.add_patch(rect)

-ax = draw_matrix(matrix)
+ax = draw_matrix(ii)

 a, b = top_left
 c, d = bottom_right
 draw_rectangle(ax, top_left, bottom_right, 'k', fill_only=True)
-draw_rectangle(ax, [0, 0], bottom_right, 'r', width=16)
+draw_rectangle(ax, [0, 0], bottom_right, 'r', width=22)
 draw_rectangle(ax, bottom_right, bottom_right, 'r', fill_only=True)
-draw_rectangle(ax, [0, 0], [a-1, d], 'b', width=12)
+draw_rectangle(ax, [0, 0], [a-1, d], 'b', width=16)
 draw_rectangle(ax, [a-1, d], [a-1, d], 'b', fill_only=True)
-draw_rectangle(ax, [0, 0], [c, b-1], 'g', width=8)
+draw_rectangle(ax, [0, 0], [c, b-1], 'g', width=10)
 draw_rectangle(ax, [c, b-1], [c, b-1], 'g', fill_only=True)
 draw_rectangle(ax, [0, 0], [a-1, b-1], 'orange', width=6)
 draw_rectangle(ax, [a-1, b-1], [a-1, b-1], 'orange', fill_only=True)
@@ -916,7 +928,10 @@ plt.tight_layout()
 plt.show()
 ```

+{{< pagebreak >}}
+
 Let's first compute the sum "manually":
+
 ```{python}
 #| code-fold: true
 #| echo: false
@@ -936,15 +951,19 @@ def print_str_nicely(string, max_line_length=80):
    for i in range(0, len(string), max_line_length):
        print(string[i:i+max_line_length])

-print(f'Sum ("manual" approach): {sum_value}')
 print("Summed elements:")
 print_str_nicely(sum_string_op)
 print("Summed values:")
 print_str_nicely(sum_string_value)
+print(f'Sum ("manual" approach): {sum_value}')
 ```

 Now, let's compute the sum from the integral image:
+
 ```{python}
 sum_value = ii[c][d] - ii[a-1][d] - ii[c][b-1] + ii[a-1][b-1]
-print(f'Sum (integral image approach): {sum_value}')
+
+print("Sum (integral image approach):\n\t"
+    f"{ii[c][d]} - {ii[a-1][d]} - {ii[c][b-1]} + {ii[a-1][b-1]}"
+    f" = {sum_value}")
 ```
\ No newline at end of file