Source code for mathematical.utils

#!/usr/bin/env python
#
#  utils.py
"""
Utilities for mathematical operations.
"""
#
#  Copyright © 2014-2021 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#  MA 02110-1301, USA.
#
#  intdiv, roman, and equiv_operators based on ChemPy (https://github.com/bjodah/chempy)
#  |  Copyright (c) 2015-2018, Björn Dahlgren
#  |  All rights reserved.
#  |
#  |  Redistribution and use in source and binary forms, with or without modification,
#  |  are permitted provided that the following conditions are met:
#  |
#  |    Redistributions of source code must retain the above copyright notice, this
#  |    list of conditions and the following disclaimer.
#  |
#  |    Redistributions in binary form must reproduce the above copyright notice, this
#  |    list of conditions and the following disclaimer in the documentation and/or
#  |    other materials provided with the distribution.
#  |
#  |  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
#  |  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
#  |  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#  |  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
#  |  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
#  |  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
#  |  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
#  |  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#  |  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
#  |  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#  gcd_array based on
#     https://www.geeksforgeeks.org/python-program-for-gcd-of-more-than-two-or-array-numbers/
#
#  _precalc_fact, log_factorial, _log_pi_r, _log_pi, _expectation,
#  and _confidence_value based on Pyteomics (https://github.com/levitsky/pyteomics)
#  |  Copyright (c) 2011-2015, Anton Goloborodko & Lev Levitsky
#  |  Licensed under the Apache License, Version 2.0 (the "License");
#  |  you may not use this file except in compliance with the License.
#  |  You may obtain a copy of the License at
#  |
#  |    http://www.apache.org/licenses/LICENSE-2.0
#  |
#  |  Unless required by applicable law or agreed to in writing, software
#  |  distributed under the License is distributed on an "AS IS" BASIS,
#  |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  |  See the License for the specific language governing permissions and
#  |  limitations under the License.
#  |
#  |  See also:
#  |  Goloborodko, A.A.; Levitsky, L.I.; Ivanov, M.V.; and Gorshkov, M.V. (2013)
#  |  "Pyteomics - a Python Framework for Exploratory Data Analysis and Rapid Software
#  |  Prototyping in Proteomics", Journal of The American Society for Mass Spectrometry,
#  |  24(2), 301–304. DOI: `10.1007/s13361-012-0516-6 <http://dx.doi.org/10.1007/s13361-012-0516-6>`_
#  |
#  |  Levitsky, L.I.; Klein, J.; Ivanov, M.V.; and Gorshkov, M.V. (2018)
#  |  "Pyteomics 4.0: five years of development of a Python proteomics framework",
#  |  Journal of Proteome Research.
#  |  DOI: `10.1021/acs.jproteome.8b00717 <http://dx.doi.org/10.1021/acs.jproteome.8b00717>`_
#

# stdlib
import math
from decimal import ROUND_HALF_UP, Decimal
from math import log10
from operator import eq, ge, gt, le, lt, ne
from typing import Any, Iterator, List, Optional, Sequence, Union, overload

# 3rd party
import numpy
import pandas  # type: ignore
from domdf_python_tools.doctools import prettify_docstrings
from domdf_python_tools.typing import PathLike

pandas.DataFrame.__module__ = "pandas"

__all__ = [
		"intdiv",
		"roman",
		"magnitude",
		"remove_zero",
		"isint",
		"represents_int",
		"rounders",
		"strip_strings",
		"strip_booleans",
		"strip_nonetype",
		"nanmean",
		"nanstd",
		"nanrsd",
		"strip_none_bool_string",
		"gcd",
		"gcd_array",
		"gcd2",
		"lcm",
		"hcf",
		"hcf2",
		"mod_inverse",
		"log_factorial",
		"equiv_operators",
		"FRange",
		"concatenate_csv",
		]


[docs]def intdiv(p: float, q: float) -> int: """ Integer divsions which rounds toward zero. **Examples** >>> intdiv(3, 2) 1 >>> intdiv(-3, 2) -1 >>> -3 // 2 -2 """ r = p // q if r < 0 and q * r != p: r += 1 return int(r)
[docs]def roman(num: float) -> str: """ Retuns the Roman numeral represtation of the given value. **Examples:** .. code-block:: >>> roman(4) 'IV' >>> roman(17) 'XVII' """ tokens = "M CM D CD C XC L XL X IX V IV I".split() values = 1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1 result = '' for t, v in zip(tokens, values): cnt = int(num // v) result += t * cnt num -= v * cnt return result
[docs]def magnitude(x: float) -> int: """ Returns the magnitude of the given value. :param x: Numerical value to find the magnitude of. .. versionchanged:: 0.2.0 Now returns the absolute magnitude of negative numbers. """ if x > 0.0: return int(log10(x)) elif x < 0.0: return int(log10(abs(x))) else: return 0
[docs]def remove_zero(inputlist: Sequence[Union[float, bool, None]]) -> List[float]: """ Remove zero values from the given list. Also removes :py:obj:`False` and :py:obj:`None`. :param inputlist: list to remove zero values from """ array = numpy.array(inputlist) return list(array[numpy.nonzero(array)])
[docs]def isint(num: float) -> bool: """ Checks whether a float is an integer value. .. note:: This function only works with floating-point numbers :param num: value to check """ return num == int(num)
[docs]def represents_int(s: Any) -> bool: """ Checks whether a value can be converted to an :class:`int`. :param s: value to check """ try: int(s) return True except (ValueError, TypeError): return False
RepresentsInt = represents_int
[docs]def rounders(val_to_round: Union[str, float, Decimal], round_format: str) -> Decimal: """ Round a value to the specified number format, e.g. ``"0.000"`` for three decimal places. :param val_to_round: The value to round :param round_format: The rounding format """ return Decimal(Decimal(val_to_round).quantize(Decimal(str(round_format)), rounding=ROUND_HALF_UP))
[docs]def strip_strings(ls: Sequence[Any]) -> List: """ Remove strings from a list. :param ls: the list to remove strings from. :return: The list without strings. """ return [x for x in ls if not isinstance(x, str)]
[docs]def strip_booleans(ls: Sequence[Any]) -> List: """ Remove booleans from a list. :param ls: the list to remove booleans from. :return: The list without boolean values. """ return [x for x in ls if not isinstance(x, bool)]
[docs]def strip_nonetype(ls: Sequence[Any]) -> List: """ Remove :py:obj:`None` from a list. :param ls: the list to remove :py:obj:`None` from. :return: The list without :py:obj:`None` values. """ return [x for x in ls if x is not None]
[docs]def nanmean(ls: Sequence[Any], dtype=float) -> float: """ Returns the mean of the given sequence, ignoring :py:obj:`None` and ``numpy.nan`` values etc. Similar to numpy.nanmean except it handles :py:obj:`None`. :param ls: :param dtype: """ return float(numpy.nanmean(numpy.array(ls, dtype=dtype)))
[docs]def nanstd(ls: Sequence[Any], dtype=float) -> float: """ Returns the standard deviation of the given sequence, ignoring :py:obj:`None` and ``numpy.nan`` values etc. Similar to numpy.nanstd except it handles :py:obj:`None`. :param ls: :param dtype: """ return float(numpy.nanstd(numpy.array(ls, dtype=dtype)))
[docs]def nanrsd(ls: Sequence[Any], dtype=float) -> float: """ Returns the relative standard deviation of the given sequence, ignoring :py:obj:`None` and ``numpy.nan`` values etc. :param ls: :param dtype: """ mean = nanmean(ls, dtype=dtype) std = nanstd(ls, dtype=dtype) return float(std / abs(mean))
[docs]def strip_none_bool_string(ls: Sequence) -> List: """ Remove :py:obj:`None`, boolean and string values from a list. :param ls: The list to remove values from. """ ls = strip_nonetype(ls) ls = strip_booleans(ls) ls = strip_strings(ls) return ls
[docs]def gcd(a: int, b: int) -> int: """ Returns the GCD (HCF) of ``a`` and ``b`` using Euclid's Algorithm. :param a: :param b: """ # while a != 0: # a, b = b % a, a # return b return math.gcd(a, b)
[docs]def gcd_array(array) -> float: """ Returns the GCD for an array of numbers using Euclid's Algorithm. Based on https://www.geeksforgeeks.org/python-program-for-gcd-of-more-than-two-or-array-numbers/ :param array: """ a = array[0] b = array[1] x = math.gcd(a, b) for i in range(2, len(array)): x = math.gcd(x, array[i]) return x
[docs]def gcd2(numbers: Sequence[int]) -> int: """ Returns the GCD (HCF) of a list of numbers using Euclid's Algorithm. :param numbers: """ c = numbers[0] for i in range(1, (len(numbers))): c = gcd(c, numbers[i]) return c
[docs]def lcm(numbers: Sequence[int]) -> float: """ Returns the LCM of a list of numbers using Euclid's Algorithm. :param numbers: """ product = numbers[0] for i in range(1, len(numbers)): product = product * numbers[i] gcd = gcd2(numbers) lcm = product / gcd if product % gcd == 0: return lcm else: return product
hcf = gcd hcf2 = gcd2
[docs]def mod_inverse(a: int, m: int) -> Optional[float]: """ Returns the modular inverse of ``a % m``, which is the number ``x`` such that ``a × x % m = 1``. :param a: :param m: """ # noqa: D400 if gcd(a, m) != 1: return None # No mod inverse exists if a & m aren't relatively prime # Calculation using the Extended Euclidean Algorithm u1, u2, u3 = 1, 0, a v1, v2, v3 = 0, 1, m while v3 != 0: q = u3 // v3 # // forces integer division in Python 3 v1, v2, v3, u1, u2, u3 = (u1 - q * v1), (u2 - q * v2), (u3 - q * v3), v1, v2, v3 return u1 % m
modInverse = mod_inverse equiv_operators = dict(zip("< <= == != >= >".split(), (lt, le, eq, ne, ge, gt))) _precalc_fact = numpy.log([math.factorial(n) for n in range(20)])
[docs]def log_factorial(x: float) -> float: """ Returns the natural logarithm of ``x`` factorial (``ln(x!``). :param x: """ arr = numpy.array(x) m: bool = (arr >= _precalc_fact.size) # type: ignore out = numpy.empty(arr.shape) out[~m] = _precalc_fact[arr[~m].astype(int)] arr = arr[m] out[m] = arr * numpy.log(arr) - arr + 0.5 * numpy.log(2 * numpy.pi * arr) return float(out)
def _log_pi_r(d: float, k: float, p: float = 0.5) -> float: return k * math.log(p) + log_factorial(k + d) - log_factorial(k) - log_factorial(d) def _log_pi(d: float, k: float, p: float = 0.5) -> float: return _log_pi_r(d, k, p) + (d + 1) * math.log(1 - p)
[docs]@prettify_docstrings class FRange(Sequence[float]): """ Returns a range of floating-point numbers. The arguments to the range constructor may be integers or floats. :param start: :param stop: :param step: :raises ValueError: If step is zero, or if any value is larger than 1×10 :superscript:`14`. .. versionadded:: 0.2.0 """ #: The value of the ``start`` parameter (or ``0.0`` if the parameter was not supplied) start: float #: The value of the ``stop`` parameter stop: float #: The value of the ``step`` parameter (or ``1.0`` if the parameter was not supplied) step: float _init = False
[docs] def __setattr__(self, key, value): if self._init: raise AttributeError("Could not set attribute") else: super().__setattr__(key, value)
[docs] def __delattr__(self, key): if self._init: raise AttributeError("Could not delete attribute") else: super().__delattr__(key)
@overload def __init__(self, stop: float) -> None: ... @overload def __init__(self, start: float, stop: float, step: float = ...) -> None: ... def __init__(self, start=None, stop=None, step=1.0) -> None: # type: ignore if start is not None and stop is None: self.stop = float(start) self.start = 0.0 elif start is not None and stop is not None: self.start = float(start) self.stop = float(stop) else: raise TypeError("Invalid argument types.") if step == 0.0: raise ValueError("'step' argument must not be zero") else: self.step = float(step) if magnitude(self.start) > 14: raise ValueError(f"Value {self.start} too large for 'start'") if magnitude(self.stop) > 14: raise ValueError(f"Value {self.stop} too large for 'stop'") if magnitude(self.step) > 14: raise ValueError(f"Value {self.step} too large for 'step'") self._init = True
[docs] def count(self, value: float) -> int: """ Returns ``1`` if the value is within the range, ``0`` otherwise. :param value: """ if value in self: return 1 else: return 0
[docs] def index(self, value: float) -> int: # type: ignore """ Returns the index of ``value`` in the range. :param value: :raises ValueError: if the value is not in the range. """ if value not in self: raise ValueError(f"{value} is not in range") else: return int((value - self.start) / self.step)
[docs] def __len__(self) -> int: """ Returns the number of values in the range. """ if self.stop <= self.start and self.step > 0: return 0 elif self.stop >= self.start and self.step < 0: return 0 else: return math.ceil((self.stop - self.start) / self.step)
[docs] def __contains__(self, o: object) -> bool: """ Returns whether ``o`` is in the range. :param o: """ if isinstance(o, (int, float)): if self.step > 0: return (self.start <= o < self.stop) and not ((o - self.start) % self.step) elif self.step < 0: return (self.start >= o > self.stop) and not ((o - self.start) % self.step) return False
[docs] def __iter__(self) -> Iterator[float]: """ Iterates over values in the range. """ count = 0 while True: value = float(self.start + count * self.step) if self.step > 0 and value >= self.stop: break elif self.step < 0 and value <= self.stop: break else: yield value count += 1
@overload def __getitem__(self, i: int) -> int: ... @overload def __getitem__(self, s: slice) -> "FRange": ...
[docs] def __getitem__(self, item): """ Returns the value in the range at index ``item``. :param item: """ if isinstance(item, int) and item >= 0: value = self.start + (item * self.step) if value >= self.stop: raise IndexError("FRange object index out of range") else: return value elif isinstance(item, int): value = self.stop - (item * self.step) if value < self.start: raise IndexError("FRange object index out of range") else: return value # elif isinstance(item, slice): # step = item.step or 1 # start_idx = item.start or 0 # # if start_idx > len(self): # start = self.stop # else: # start = self[start_idx] # # if self.stop - (item.stop * self.step) < self.start: # stop = self.stop # else: # stop = self[item.stop] # # return self.__class__(start, stop, step) else: raise NotImplementedError(f"Unsupported type for __getitem__: {type(item)}")
[docs] def __repr__(self) -> str: if self.step != 1.0: return f"FRange({self.start}, {self.stop}, {self.step})" else: return f"FRange({self.start}, {self.stop})"
[docs] def __reversed__(self) -> Iterator[float]: """ Returns :func:`reversed(self) <reversed>`. """ # Special case where start == stop if self.start == self.stop: return iter(FRange(self.start, self.stop, -self.step)) # difference between last value and self.stop remainder = ((self.stop - self.start) % self.step) or self.step return iter(FRange( start=(self.stop - remainder), stop=(self.start - self.step), step=-self.step, ))
[docs] def __eq__(self, other) -> bool: if isinstance(other, (range, FRange)): # if self.stop < self.start and self.step > 0: # self_stop = self.start # elif self.stop > self.start and self.step < 0: # self_stop = self.start # else: # self_stop = self.stop # # # difference between last value and self.stop # remainder = ((self_stop - self.start) % self.step) # # if self_stop == self.start: # self_step = 1 # elif remainder: # self_step = 1 # elif self_stop - self.step == self.start: # self_step = 1 # else: # self_step = self.step # # if other.stop < other.start and other.step > 0: # other_stop = other.start # elif other.stop > other.start and other.step < 0: # other_stop = other.start # else: # other_stop = other.stop # # # difference between last value and other.stop # remainder = ((other_stop - other.start) % other.step) # # if other_stop == other.start: # other_step = 1 # elif remainder: # other_step = 1 # elif other_stop - other.step == other.start: # other_step = 1 # else: # other_step = other.step # # if self.start == self_stop and other.start == other_stop and self_step == other_step: # return True # elif self_stop == other_stop and self.start == other.start and self_step == other_step: # return True # else: # return False # for left, right in zip_longest(self, other): # if left != right: # return False # return True return tuple(self) == tuple(other) else: return False
def __hash__(self): return hash(tuple(self))
[docs]def concatenate_csv(*files: PathLike, outfile: Optional[PathLike] = None) -> pandas.DataFrame: r""" Concatenate multiple CSV files together and return a :class:`pandas.DataFrame` representing the output. :param \*files: The files to concatenate. :param outfile: The file to save the output as. If :py:obj:`None` no file will be saved. :return: A :class:`pandas.DataFrame` containing the concatenated CSV data. .. versionadded:: 0.3.0 """ data_frames = [] for csv_file in files: # Read CSV file to data frame results_df = pandas.read_csv(csv_file, header=0, index_col=False, dtype=str) data_frames.append(results_df) concat_df = pandas.concat(data_frames) if outfile is not None: concat_df.to_csv(outfile, index=False) return concat_df