Commit 762927e3 authored by nd-02110114's avatar nd-02110114
Browse files

🐛 fix bug

parent 774156f9
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -22,6 +22,6 @@ class TestHyperparamOpt(unittest.TestCase):

    try:
      _ = dc.hyper.HyperparamOpt(rf_model_builder)
    except:
    except ValueError:
      initialized = False
    assert not initialized
+175 −175
Original line number Diff line number Diff line
@@ -4,179 +4,6 @@ import numpy as np
from scipy.spatial import ConvexHull


def intersect_interval(interval1: Tuple[int, int],
                       interval2: Tuple[int, int]) -> Tuple[int, int]:
  """Computes the intersection of two intervals.

  Parameters
  ----------
  interval1: Tuple[int]
    Should be `(x1_min, x1_max)`
  interval2: Tuple[int]
    Should be `(x2_min, x2_max)`

  Returns
  -------
  x_intersect: Tuple[int]
    Should be the intersection. If the intersection is empty returns
    `(0, 0)` to represent the empty set. Otherwise is `(max(x1_min,
    x2_min), min(x1_max, x2_max))`.
  """
  x1_min, x1_max = interval1
  x2_min, x2_max = interval2
  if x1_max < x2_min:
    # If interval1 < interval2 entirely
    return (0, 0)
  elif x2_max < x1_min:
    # If interval2 < interval1 entirely
    return (0, 0)
  x_min = max(x1_min, x2_min)
  x_max = min(x1_max, x2_max)
  return (x_min, x_max)


def intersection(box1: CoordinateBox, box2: CoordinateBox) -> CoordinateBox:
  """Computes the intersection box of provided boxes.

  Parameters
  ----------
  box1: `CoordinateBox`
    First `CoordinateBox`
  box2: `CoordinateBox`
    Another `CoordinateBox` to intersect first one with.

  Returns
  -------
  A `CoordinateBox` containing the intersection. If the intersection is empty, returns the box with 0 bounds.
  """
  x_intersection = intersect_interval(box1.x_range, box2.x_range)
  y_intersection = intersect_interval(box1.y_range, box2.y_range)
  z_intersection = intersect_interval(box1.z_range, box2.z_range)
  return CoordinateBox(x_intersection, y_intersection, z_intersection)


def union(box1: CoordinateBox, box2: CoordinateBox) -> CoordinateBox:
  """Merges provided boxes to find the smallest union box. 

  This method merges the two provided boxes.

  Parameters
  ----------
  box1: `CoordinateBox`
    First box to merge in
  box2: `CoordinateBox`
    Second box to merge into this box

  Returns
  -------
  Smallest `CoordinateBox` that contains both `box1` and `box2`
  """
  x_min = min(box1.x_range[0], box2.x_range[0])
  y_min = min(box1.y_range[0], box2.y_range[0])
  z_min = min(box1.z_range[0], box2.z_range[0])
  x_max = max(box1.x_range[1], box2.x_range[1])
  y_max = max(box1.y_range[1], box2.y_range[1])
  z_max = max(box1.z_range[1], box2.z_range[1])
  return CoordinateBox((x_min, x_max), (y_min, y_max), (z_min, z_max))


def merge_overlapping_boxes(boxes: List[CoordinateBox],
                            threshold: float = 0.8) -> List[CoordinateBox]:
  """Merge boxes which have an overlap greater than threshold.

  Parameters
  ----------
  boxes: list[CoordinateBox]
    A list of `CoordinateBox` objects.
  threshold: float, default 0.8
    The volume fraction of the boxes that must overlap for them to be
    merged together. 
  
  Returns
  -------
  list[CoordinateBox] of merged boxes. This list will have length less
  than or equal to the length of `boxes`.
  """
  outputs: List[CoordinateBox] = []
  for box in boxes:
    for other in boxes:
      if box == other:
        continue
      intersect_box = intersection(box, other)
      if (intersect_box.volume() >= threshold * box.volume() or
          intersect_box.volume() >= threshold * other.volume()):
        box = union(box, other)
    unique_box = True
    for output in outputs:
      if output.contains(box):
        unique_box = False
    if unique_box:
      outputs.append(box)
  return outputs


def get_face_boxes(coords: np.ndarray, pad: int = 5) -> List[CoordinateBox]:
  """For each face of the convex hull, compute a coordinate box around it.

  The convex hull of a macromolecule will have a series of triangular
  faces. For each such triangular face, we construct a bounding box
  around this triangle. Think of this box as attempting to capture
  some binding interaction region whose exterior is controlled by the
  box. Note that this box will likely be a crude approximation, but
  the advantage of this technique is that it only uses simple geometry
  to provide some basic biological insight into the molecule at hand.

  The `pad` parameter is used to control the amount of padding around
  the face to be used for the coordinate box.

  Parameters
  ----------
  coords: np.ndarray
    Of shape `(N, 3)`. The coordinates of a molecule.
  pad: int, optional (default 5)
    The number of angstroms to pad.

  Returns
  -------
  boxes: List[CoordinateBox]
    List of `CoordinateBox`

  Examples
  --------
  >>> coords = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]])
  >>> boxes = get_face_boxes(coords, pad=5)
  """
  hull = ConvexHull(coords)
  boxes = []
  # Each triangle in the simplices is a set of 3 atoms from
  # coordinates which forms the vertices of an exterior triangle on
  # the convex hull of the macromolecule.
  for triangle in hull.simplices:
    # Points is the set of atom coordinates that make up this
    # triangular face on the convex hull
    points = np.array(
        [coords[triangle[0]], coords[triangle[1]], coords[triangle[2]]])
    # Let's extract x/y/z coords for this face
    x_coords = points[:, 0]
    y_coords = points[:, 1]
    z_coords = points[:, 2]

    # Let's compute min/max points
    x_min, x_max = np.amin(x_coords), np.amax(x_coords)
    x_min, x_max = int(np.floor(x_min)) - pad, int(np.ceil(x_max)) + pad
    x_bounds = (x_min, x_max)

    y_min, y_max = np.amin(points[:, 1]), np.amax(points[:, 1])
    y_min, y_max = int(np.floor(y_min)) - pad, int(np.ceil(y_max)) + pad
    y_bounds = (y_min, y_max)
    z_min, z_max = np.amin(points[:, 2]), np.amax(points[:, 2])
    z_min, z_max = int(np.floor(z_min)) - pad, int(np.ceil(z_max)) + pad
    z_bounds = (z_min, z_max)
    box = CoordinateBox(x_bounds, y_bounds, z_bounds)
    boxes.append(box)
  return boxes


class CoordinateBox(object):
  """A coordinate box that represents a block in space.

@@ -263,7 +90,7 @@ class CoordinateBox(object):
    z_cont = (z_min <= point[2] and point[2] <= z_max)
    return x_cont and y_cont and z_cont

  def __eq__(self, other: CoordinateBox) -> bool:  # type: ignore
  def __eq__(self, other: "CoordinateBox") -> bool:  # type: ignore
    """Compare two boxes to see if they're equal.

    Parameters
@@ -334,7 +161,7 @@ class CoordinateBox(object):
    z_min, z_max = self.z_range
    return (x_max - x_min) * (y_max - y_min) * (z_max - z_min)

  def contains(self, other: CoordinateBox) -> bool:
  def contains(self, other: "CoordinateBox") -> bool:
    """Test whether this box contains another.

    This method checks whether `other` is contained in this box.
@@ -363,3 +190,176 @@ class CoordinateBox(object):
    return (self_x_min <= other_x_min and other_x_max <= self_x_max and
            self_y_min <= other_y_min and other_y_max <= self_y_max and
            self_z_min <= other_z_min and other_z_max <= self_z_max)


def intersect_interval(interval1: Tuple[int, int],
                       interval2: Tuple[int, int]) -> Tuple[int, int]:
  """Computes the intersection of two intervals.

  Parameters
  ----------
  interval1: Tuple[int]
    Should be `(x1_min, x1_max)`
  interval2: Tuple[int]
    Should be `(x2_min, x2_max)`

  Returns
  -------
  x_intersect: Tuple[int]
    Should be the intersection. If the intersection is empty returns
    `(0, 0)` to represent the empty set. Otherwise is `(max(x1_min,
    x2_min), min(x1_max, x2_max))`.
  """
  x1_min, x1_max = interval1
  x2_min, x2_max = interval2
  if x1_max < x2_min:
    # If interval1 < interval2 entirely
    return (0, 0)
  elif x2_max < x1_min:
    # If interval2 < interval1 entirely
    return (0, 0)
  x_min = max(x1_min, x2_min)
  x_max = min(x1_max, x2_max)
  return (x_min, x_max)


def intersection(box1: CoordinateBox, box2: CoordinateBox) -> CoordinateBox:
  """Computes the intersection box of provided boxes.

  Parameters
  ----------
  box1: `CoordinateBox`
    First `CoordinateBox`
  box2: `CoordinateBox`
    Another `CoordinateBox` to intersect first one with.

  Returns
  -------
  A `CoordinateBox` containing the intersection. If the intersection is empty, returns the box with 0 bounds.
  """
  x_intersection = intersect_interval(box1.x_range, box2.x_range)
  y_intersection = intersect_interval(box1.y_range, box2.y_range)
  z_intersection = intersect_interval(box1.z_range, box2.z_range)
  return CoordinateBox(x_intersection, y_intersection, z_intersection)


def union(box1: CoordinateBox, box2: CoordinateBox) -> CoordinateBox:
  """Merges provided boxes to find the smallest union box. 

  This method merges the two provided boxes.

  Parameters
  ----------
  box1: `CoordinateBox`
    First box to merge in
  box2: `CoordinateBox`
    Second box to merge into this box

  Returns
  -------
  Smallest `CoordinateBox` that contains both `box1` and `box2`
  """
  x_min = min(box1.x_range[0], box2.x_range[0])
  y_min = min(box1.y_range[0], box2.y_range[0])
  z_min = min(box1.z_range[0], box2.z_range[0])
  x_max = max(box1.x_range[1], box2.x_range[1])
  y_max = max(box1.y_range[1], box2.y_range[1])
  z_max = max(box1.z_range[1], box2.z_range[1])
  return CoordinateBox((x_min, x_max), (y_min, y_max), (z_min, z_max))


def merge_overlapping_boxes(boxes: List[CoordinateBox],
                            threshold: float = 0.8) -> List[CoordinateBox]:
  """Merge boxes which have an overlap greater than threshold.

  Parameters
  ----------
  boxes: list[CoordinateBox]
    A list of `CoordinateBox` objects.
  threshold: float, default 0.8
    The volume fraction of the boxes that must overlap for them to be
    merged together. 
  
  Returns
  -------
  list[CoordinateBox] of merged boxes. This list will have length less
  than or equal to the length of `boxes`.
  """
  outputs: List[CoordinateBox] = []
  for box in boxes:
    for other in boxes:
      if box == other:
        continue
      intersect_box = intersection(box, other)
      if (intersect_box.volume() >= threshold * box.volume() or
          intersect_box.volume() >= threshold * other.volume()):
        box = union(box, other)
    unique_box = True
    for output in outputs:
      if output.contains(box):
        unique_box = False
    if unique_box:
      outputs.append(box)
  return outputs


def get_face_boxes(coords: np.ndarray, pad: int = 5) -> List[CoordinateBox]:
  """For each face of the convex hull, compute a coordinate box around it.

  The convex hull of a macromolecule will have a series of triangular
  faces. For each such triangular face, we construct a bounding box
  around this triangle. Think of this box as attempting to capture
  some binding interaction region whose exterior is controlled by the
  box. Note that this box will likely be a crude approximation, but
  the advantage of this technique is that it only uses simple geometry
  to provide some basic biological insight into the molecule at hand.

  The `pad` parameter is used to control the amount of padding around
  the face to be used for the coordinate box.

  Parameters
  ----------
  coords: np.ndarray
    Of shape `(N, 3)`. The coordinates of a molecule.
  pad: int, optional (default 5)
    The number of angstroms to pad.

  Returns
  -------
  boxes: List[CoordinateBox]
    List of `CoordinateBox`

  Examples
  --------
  >>> coords = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]])
  >>> boxes = get_face_boxes(coords, pad=5)
  """
  hull = ConvexHull(coords)
  boxes = []
  # Each triangle in the simplices is a set of 3 atoms from
  # coordinates which forms the vertices of an exterior triangle on
  # the convex hull of the macromolecule.
  for triangle in hull.simplices:
    # Points is the set of atom coordinates that make up this
    # triangular face on the convex hull
    points = np.array(
        [coords[triangle[0]], coords[triangle[1]], coords[triangle[2]]])
    # Let's extract x/y/z coords for this face
    x_coords = points[:, 0]
    y_coords = points[:, 1]
    z_coords = points[:, 2]

    # Let's compute min/max points
    x_min, x_max = np.amin(x_coords), np.amax(x_coords)
    x_min, x_max = int(np.floor(x_min)) - pad, int(np.ceil(x_max)) + pad
    x_bounds = (x_min, x_max)

    y_min, y_max = np.amin(points[:, 1]), np.amax(points[:, 1])
    y_min, y_max = int(np.floor(y_min)) - pad, int(np.ceil(y_max)) + pad
    y_bounds = (y_min, y_max)
    z_min, z_max = np.amin(points[:, 2]), np.amax(points[:, 2])
    z_min, z_max = int(np.floor(z_min)) - pad, int(np.ceil(z_max)) + pad
    z_bounds = (z_min, z_max)
    box = CoordinateBox(x_bounds, y_bounds, z_bounds)
    boxes.append(box)
  return boxes
+8 −1
Original line number Diff line number Diff line
@@ -7,7 +7,14 @@ markers =
ignore_missing_imports = True

[flake8]
ignore = E111, E114, E124, E125, E129, E722, W503,W504
ignore = 
    E111,  # Indentation is not a multiple of four
    E114,  # Indentation is not a multiple of four (comment)
    E124,  # Closing bracket does not match visual indentation
    E125,  # Continuation line with same indent as next logical line
    E129,  # Visually indented line with same indent as next logical line
    W503,  # Line break before binary operator
    W504,  # Line break after binary operator
max-line-length = 300

[yapf]