Pyteomics documentation v4.7.4

pyteomics.mgf

Contents

Source code for pyteomics.mgf

"""
mgf - read and write MS/MS data in Mascot Generic Format
========================================================

Summary
-------

`MGF <http://www.matrixscience.com/help/data_file_help.html>`_ is a simple
human-readable format for MS/MS data. It allows storing MS/MS peak lists and
exprimental parameters.

This module provides classes and functions for access to data stored in
MGF files.
Parsing is done using :py:class:`MGF` and :py:class:`IndexedMGF` classes.
The :py:func:`read` function can be used as an entry point.
MGF spectra are converted to dictionaries. MS/MS data points are
(optionally) represented as :py:mod:`numpy` arrays.
Also, common parameters can be read from MGF file header with
:py:func:`read_header` function.
:py:func:`write` allows creation of MGF files.

Classes
-------

  :py:class:`MGF` - a text-mode MGF parser. Suitable to read spectra from a file consecutively.
  Needs a file opened in text mode (or will open it if given a file name).

  :py:class:`IndexedMGF` - a binary-mode MGF parser. When created, builds a byte offset index
  for fast random access by spectrum titles. Sequential iteration is also supported.
  Needs a seekable file opened in binary mode (if created from existing file object).

  :py:class:`MGFBase` - abstract class, the common ancestor of the two classes above.
  Can be used for type checking.

Functions
---------

  :py:func:`read` - an alias for :py:class:`MGF` or :py:class:`IndexedMGF`.

  :py:func:`get_spectrum` - read a single spectrum with given title from a file.

  :py:func:`chain` - read multiple files at once.

  :py:func:`chain.from_iterable` - read multiple files at once, using an
  iterable of files.

  :py:func:`read_header` - get a dict with common parameters for all spectra
  from the beginning of MGF file.

  :py:func:`write` - write an MGF file.

-------------------------------------------------------------------------------
"""

#   Copyright 2012 Anton Goloborodko, Lev Levitsky
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

try:
    import numpy as np
except ImportError:
    np = None
import itertools as it
import sys
import warnings
from . import auxiliary as aux


[docs] class MGFBase(aux.MaskedArrayConversionMixin): """Abstract mixin class representing an MGF file. Subclasses implement different approaches to parsing.""" _comments = set('#;!/') _array_keys = ['m/z array', 'intensity array', 'charge array', 'ion array'] _array_keys_unicode = [u'm/z array', u'intensity array', u'charge array', u'ion array'] encoding = None
[docs] def __init__(self, source=None, **kwargs): """Create an MGF file object, set MGF-specific parameters. Parameters ---------- source : str or file or None, optional A file object (or file name) with data in MGF format. Default is :py:const:`None`, which means read standard input. use_header : bool, optional, keyword only Add the info from file header to each dict. Spectrum-specific parameters override those from the header in case of conflict. Default is :py:const:`True`. convert_arrays : one of {0, 1, 2}, optional, keyword only If `0`, m/z, intensities and (possibly) charges or (possibly) ions will be returned as regular lists. If `1`, they will be converted to regular :py:class:`numpy.ndarray`'s. If `2`, charges will be reported as a masked array (default). The default option is the slowest. `1` and `2` require :py:mod:`numpy`. read_charges : bool, optional, keyword only If `True` (default), fragment charges are reported. Disabling it improves performance. read_ions : bool, optional If `True` (default: False), fragment ions are reported. Disabling it improves performance. Note that right now, only one of (read_charges, read_ions) may be True. dtype : type or str or dict, optional, keyword only dtype argument to :py:mod:`numpy` array constructor, one for all arrays or one for each key. Keys should be 'm/z array', 'intensity array', 'charge array' and/or 'ion array'. encoding : str, optional, keyword only File encoding. """ super(MGFBase, self).__init__(source, **kwargs) self._use_header = kwargs.pop('use_header', True) self._read_charges = kwargs.pop('read_charges', True) self._read_ions = kwargs.pop('read_ions', False) # Make sure no charges are read if ions are read if self._read_ions: self._read_charges = False if self._use_header: self._read_header() else: self._header = None
def __reduce_ex__(self, protocol): return (self.__class__, (self._source_init,), self.__getstate__()) def __getstate__(self): state = super(MGFBase, self).__getstate__() state['use_header'] = self._use_header state['header'] = self._header return state def __setstate__(self, state): super(MGFBase, self).__setstate__(state) self._header = state['header'] self._use_header = state['use_header'] @staticmethod def parse_precursor_charge(charge_text, list_only=False): return aux._parse_charge(charge_text, list_only=list_only) @staticmethod def parse_pepmass_charge(pepmass_str): split = pepmass_str.split() if len(split) > 3: raise aux.PyteomicsError('MGF format error: cannot parse ' 'PEPMASS = {}'.format(pepmass_str)) elif len(split) == 3: charge = split[2] try: pepmass = tuple(map(float, split[:2])) except ValueError: raise aux.PyteomicsError('MGF format error: cannot parse ' 'PEPMASS = {}'.format(pepmass_str)) else: pepmass = tuple(map(float, split[:2])) pepmass = pepmass + (None,) * (2-len(pepmass)) charge = None return pepmass, charge @staticmethod def parse_peak_charge(charge_text, list_only=False): return aux._parse_charge(charge_text, list_only=False) @staticmethod def parse_peak_ion(ion_text): return aux._parse_ion(ion_text) @property def header(self): if self._header is None: self._read_header() return self._header def _read_header_lines(self, header_lines): header = {} for line in header_lines: if line.strip() == 'BEGIN IONS': break l = line.split('=') if len(l) == 2: key = l[0].lower() val = l[1].strip() header[key] = val if 'charge' in header: header['charge'] = self.parse_precursor_charge(header['charge'], True) self._header = header def _read_spectrum_lines(self, lines): """Read a single spectrum from ``self._source``. Returns ------- out : dict """ masses = [] intensities = [] charges = [] ions = [] params = self.header.copy() if self._use_header else {} for i, line in enumerate(lines): sline = line.strip() if sline == 'BEGIN IONS': if i == 0: continue else: raise aux.PyteomicsError('Error when parsing MGF: unexpected start of spectrum.') if not sline or sline[0] in self._comments: pass elif sline == 'END IONS': if 'pepmass' in params: params['pepmass'], charge = self.parse_pepmass_charge(params['pepmass']) if charge is not None: params['charge'] = charge if isinstance(params.get('charge'), aux.basestring): params['charge'] = self.parse_precursor_charge(params['charge'], True) if 'rtinseconds' in params: params['rtinseconds'] = aux.unitfloat(params['rtinseconds'], 'second') out = {'params': params, 'm/z array': masses, 'intensity array': intensities} if self._read_charges: out['charge array'] = charges if self._read_ions: out['ion array'] = ions self._build_all_arrays(out) if self.encoding and sys.version_info.major == 2: for key, ukey in zip(self._array_keys + ['params'], self._array_keys_unicode + [u'params']): if key in out: out[ukey] = out.pop(key) return out else: if '=' in sline: # spectrum-specific parameters! l = sline.split('=', 1) params[l[0].lower()] = l[1].strip() else: # this must be a peak list l = sline.split() try: masses.append(float(l[0])) intensities.append(float(l[1])) if self._read_charges: charges.append(self.parse_peak_charge(l[2]) if len(l) > 2 else 0) if self._read_ions: ions.append(self.parse_peak_ion(l[2]) if len(l) > 2 else "") except ValueError: raise aux.PyteomicsError( 'Error when parsing %s. Line:\n%s' % (getattr(self._source, 'name', 'MGF file'), line)) except IndexError: pass def get_spectrum(self, title): raise NotImplementedError() @staticmethod def _get_time(spectrum): try: return spectrum['params']['rtinseconds'] except KeyError: raise aux.PyteomicsError('RT information not found.')
[docs] class IndexedMGF(MGFBase, aux.TaskMappingMixin, aux.TimeOrderedIndexedReaderMixin, aux.IndexSavingTextReader): """ A class representing an MGF file. Supports the `with` syntax and direct iteration for sequential parsing. Specific spectra can be accessed by title using the indexing syntax in constant time. If created using a file object, it needs to be opened in binary mode. When iterated, :py:class:`IndexedMGF` object yields spectra one by one. Each 'spectrum' is a :py:class:`dict` with five keys: 'm/z array', 'intensity array', 'charge array', 'ion array' and 'params'. 'm/z array' and 'intensity array' store :py:class:`numpy.ndarray`'s of floats, 'charge array' is a masked array (:py:class:`numpy.ma.MaskedArray`) of ints, 'ion_array' is an array of Ions (str) and 'params' stores a :py:class:`dict` of parameters (keys and values are :py:class:`str`, keys corresponding to MGF, lowercased). Attributes ---------- header : dict The file header. time : RTLocator A property used for accessing spectra by retention time. """ delimiter = 'BEGIN IONS'
[docs] def __init__(self, source=None, use_header=True, convert_arrays=2, read_charges=True, dtype=None, encoding='utf-8', index_by_scans=False, read_ions=False, _skip_index=False, **kwargs): """ Create an :py:class:`IndexedMGF` (binary-mode) reader for a given MGF file. Parameters ---------- source : str or file or None, optional A file object (or file name) with data in MGF format. Default is :py:const:`None`, which means read standard input. .. note :: If a file object is given, it must be opened in binary mode. use_header : bool, optional Add the info from file header to each dict. Spectrum-specific parameters override those from the header in case of conflict. Default is :py:const:`True`. convert_arrays : one of {0, 1, 2}, optional If `0`, m/z, intensities and (possibly) charges will be returned as regular lists. If `1`, they will be converted to regular :py:class:`numpy.ndarray`'s. If `2`, charges will be reported as a masked array (default). The default option is the slowest. `1` and `2` require :py:mod:`numpy`. read_charges : bool, optional If `True` (default), fragment charges are reported. Disabling it improves performance. read_ions : bool, optional If `True` (default: False), fragment ion types are reported. Disabling it improves performance. Note that right now, only one of (read_charges, read_ions) may be True. dtype : type or str or dict, optional dtype argument to :py:mod:`numpy` array constructor, one for all arrays or one for each key. Keys should be 'm/z array', 'intensity array', 'charge array' and/or 'ion array'. encoding : str, optional File encoding. block_size : int, optinal Size of the chunk (in bytes) used to parse the file when creating the byte offset index. Returns ------- out : IndexedMGF The reader object. """ self._index_by_scans = index_by_scans self._read_ions = read_ions self.label = r'SCANS=(\d+)\s*' if index_by_scans else r'TITLE=([^\n]*\S)\s*' super(IndexedMGF, self).__init__(source, parser_func=self._read, pass_file=False, args=(), kwargs={}, use_header=use_header, convert_arrays=convert_arrays, read_charges=read_charges, dtype=dtype, encoding=encoding, read_ions=read_ions, _skip_index=_skip_index, **kwargs)
def __reduce_ex__(self, protocol): return (self.__class__, (self._source_init, False, self._convert_arrays, self._read_charges, None, self.encoding, self._index_by_scans, self._read_ions, True), self.__getstate__()) @aux._keepstate_method def _read_header(self): try: first = next(v for v in self._offset_index.values())[0] except StopIteration: # the index is empty, no spectra in file first = -1 header_lines = self.read(first).decode(self.encoding).split('\n') return self._read_header_lines(header_lines) def _item_from_offsets(self, offsets): start, end = offsets lines = self._read_lines_from_offsets(start, end) return self._read_spectrum_lines(lines) def _read(self, **kwargs): for _, offsets in self._offset_index.items(): spectrum = self._item_from_offsets(offsets) yield spectrum def get_spectrum(self, key): return self.get_by_id(key) def _warn_empty(self): text = ("{} object has an empty index for file {}. If this is unexpected, consider adjusting `label` or " "setting `index_by_scans={}`.".format( self.__class__.__name__, getattr(self._source, 'name', self._source_init), not self._index_by_scans)) warnings.warn(text)
[docs] class MGF(MGFBase, aux.FileReader): """ A class representing an MGF file. Supports the `with` syntax and direct iteration for sequential parsing. Specific spectra can be accessed by title using the indexing syntax (if the file is seekable), but it takes linear time to search through the file. Consider using :py:class:`IndexedMGF` for constant-time access to spectra. :py:class:`MGF` object behaves as an iterator, **yielding** spectra one by one. Each 'spectrum' is a :py:class:`dict` with five keys: 'm/z array', 'intensity array', 'charge array', 'ion array' and 'params'. 'm/z array' and 'intensity array' store :py:class:`numpy.ndarray`'s of floats, 'charge array' is a masked array (:py:class:`numpy.ma.MaskedArray`) of ints, 'ion_array' is a masked array of Ions (str) and 'params' stores a :py:class:`dict` of parameters (keys and values are :py:class:`str`, keys corresponding to MGF, lowercased). Attributes ---------- header : dict The file header. """
[docs] def __init__(self, source=None, use_header=True, convert_arrays=2, read_charges=True, read_ions=False, dtype=None, encoding=None): """ Create an :py:class:`MGF` (text-mode) reader for a given MGF file. Parameters ---------- source : str or file or None, optional A file object (or file name) with data in MGF format. Default is :py:const:`None`, which means read standard input. ..note :: If a file object is given, it must be opened in text mode. use_header : bool, optional Add the info from file header to each dict. Spectrum-specific parameters override those from the header in case of conflict. Default is :py:const:`True`. convert_arrays : one of {0, 1, 2}, optional If `0`, m/z, intensities and (possibly) charges will be returned as regular lists. If `1`, they will be converted to regular :py:class:`numpy.ndarray`'s. If `2`, charges will be reported as a masked array (default). The default option is the slowest. `1` and `2` require :py:mod:`numpy`. read_charges : bool, optional If `True` (default), fragment charges are reported. Disabling it improves performance. read_ions : bool, optional If `True` (default: False), fragment ion types are reported. Disabling it improves performance. Note that right now, only one of (read_charges, read_ions) may be True. dtype : type or str or dict, optional dtype argument to :py:mod:`numpy` array constructor, one for all arrays or one for each key. Keys should be 'm/z array', 'intensity array', 'charge array' and/or 'ion array'. encoding : str, optional File encoding. Returns ------- out : MGF The reader object. """ super(MGF, self).__init__(source, mode='r', parser_func=self._read, pass_file=False, args=(), kwargs={}, encoding=encoding, use_header=use_header, convert_arrays=convert_arrays, read_charges=read_charges, read_ions=read_ions, dtype=dtype)
@aux._keepstate_method def _read_header(self): return self._read_header_lines(self._source) def _read_spectrum(self): return self._read_spectrum_lines(self._source) def _read(self): for line in self._source: if line.strip() == 'BEGIN IONS': yield self._read_spectrum() @aux._keepstate_method def get_spectrum(self, title): for line in self._source: sline = line.strip() if sline[:5] == 'TITLE' and sline.split('=', 1)[1].strip() == title: spectrum = self._read_spectrum() spectrum['params']['title'] = title return spectrum def __getitem__(self, key): return self.get_spectrum(key)
[docs] def read(*args, **kwargs): """Returns a reader for a given MGF file. Most of the parameters repeat the instantiation signature of :py:class:`MGF` and :py:class:`IndexedMGF`. Additional parameter `use_index` helps decide which class to instantiate for given `source`. Parameters ---------- source : str or file or None, optional A file object (or file name) with data in MGF format. Default is :py:const:`None`, which means read standard input. use_header : bool, optional Add the info from file header to each dict. Spectrum-specific parameters override those from the header in case of conflict. Default is :py:const:`True`. convert_arrays : one of {0, 1, 2}, optional If `0`, m/z, intensities and (possibly) charges will be returned as regular lists. If `1`, they will be converted to regular :py:class:`numpy.ndarray`'s. If `2`, charges will be reported as a masked array (default). The default option is the slowest. `1` and `2` require :py:mod:`numpy`. read_charges : bool, optional If `True` (default), fragment charges are reported. Disabling it improves performance. read_ions : bool, optional If `True` (default: False), fragment ion types are reported. Disabling it improves performance. Note that right now, only one of (read_charges, read_ions) may be True. dtype : type or str or dict, optional dtype argument to :py:mod:`numpy` array constructor, one for all arrays or one for each key. Keys should be 'm/z array', 'intensity array', 'charge array' and/or 'ion array'. encoding : str, optional File encoding. use_index : bool, optional Determines which parsing method to use. If :py:const:`True` (default), an instance of :py:class:`IndexedMGF` is created. This facilitates random access by spectrum titles. If an open file is passed as `source`, it needs to be open in binary mode. If :py:const:`False`, an instance of :py:class:`MGF` is created. It reads `source` in text mode and is suitable for iterative parsing. Access by spectrum title requires linear search and thus takes linear time. block_size : int, optinal Size of the chunk (in bytes) used to parse the file when creating the byte offset index. (Accepted only for :py:class:`IndexedMGF`.) Returns ------- out : MGFBase Instance of :py:class:`MGF` or :py:class:`IndexedMGF`. """ if args: source = args[0] else: source = kwargs.get('source') use_index = kwargs.pop('use_index', None) use_index = aux._check_use_index(source, use_index, True) tp = IndexedMGF if use_index else MGF return tp(*args, **kwargs)
[docs] def get_spectrum(source, title, *args, **kwargs): """Read one spectrum (with given `title`) from `source`. See :py:func:`read` for explanation of parameters affecting the output. .. note :: Only the key-value pairs after the "TITLE =" line will be included in the output. Parameters ---------- source : str or file or None File to read from. title : str Spectrum title. *args Given to :py:func:`read`. **kwargs Given to :py:func:`read`. Returns ------- out : dict or None A dict with the spectrum, if it is found, and None otherwise. """ with read(source, *args, **kwargs) as f: return f[title]
[docs] @aux._keepstate def read_header(source): """ Read the specified MGF file, get search parameters specified in the header as a :py:class:`dict`, the keys corresponding to MGF format (lowercased). Parameters ---------- source : str or file File name or file object representing an file in MGF format. Returns ------- header : dict """ with aux._file_obj(source, 'r') as source: header = {} for line in source: if line.strip() == 'BEGIN IONS': break l = line.split('=') if len(l) == 2: key = l[0].lower() val = l[1].strip() header[key] = val if 'charge' in header: header['charge'] = aux._parse_charge(header['charge'], True) return header
_default_key_order = ['title', 'pepmass', 'rtinseconds', 'charge'] def _pepmass_repr(k, pepmass): outstr = k.upper() + '=' if not isinstance(pepmass, (str, int, float)): # assume iterable try: outstr += ' '.join(str(x) for x in pepmass if x is not None) except TypeError: raise aux.PyteomicsError('Cannot handle parameter: PEPMASS = {}'.format(pepmass)) else: outstr += str(pepmass) return outstr def _charge_repr(k, charge): try: val = aux.Charge(charge) except (TypeError, aux.PyteomicsError): val = aux.ChargeList(charge) return '{}={}'.format(k.upper(), val) def _default_repr(key, val): return '{}={}'.format(key.upper(), val) _default_value_formatters = {'pepmass': _pepmass_repr, 'charge': _charge_repr}
[docs] @aux._file_writer() def write(spectra, output=None, header='', key_order=_default_key_order, fragment_format=None, write_charges=True, write_ions=False, use_numpy=None, param_formatters=_default_value_formatters): """ Create a file in MGF format. Parameters ---------- spectra : iterable A **sequence** of dictionaries with keys 'm/z array', 'intensity array', and 'params'. 'm/z array' and 'intensity array' should be sequences of :py:class:`int`, :py:class:`float`, or :py:class:`str`. Strings will be written 'as is'. The sequences should be of equal length, otherwise excessive values will be ignored. 'params' should be a :py:class:`dict` with keys corresponding to MGF format. Keys must be strings, they will be uppercased and used as is, without any format consistency tests. Values can be of any type allowing string representation. 'charge array' or 'ion array' can also be specified. .. note :: Passing a single spectrum will work, but will trigger a warning. This usage pattern is discouraged. To ensure correct output when writing multiple spectra, it is recommended to construct a sequence of spectra first and then call :py:func:`write` once. .. seealso :: This discussion of usage patterns of :py:func:`write`: https://github.com/levitsky/pyteomics/discussions/109 output : str or file or None, optional Path or a file-like object open for writing. If an existing file is specified by file name, it will be opened for writing. Default value is :py:const:`None`, which means using standard output. .. note:: The default mode for output files specified by name has been changed from `a` to `w` in *pyteomics 4.6*. See `file_mode` to override the mode. header : dict or (multiline) str or list of str, optional In case of a single string or a list of strings, the header will be written 'as is'. In case of dict, the keys (must be strings) will be uppercased. write_charges : bool, optional If :py:const:`False`, fragment charges from 'charge array' will not be written. Default is :py:const:`True`. write_ions : bool, optional If :py:const:`False`, fragment ions from 'ion array' will not be written. If :py:const:`True`, then `write_charges` is set to :py:const:`False`. Default is :py:const:`False`. fragment_format : str, optional Format string for m/z, intensity and charge (or ion annotation) of a fragment. Useful to set the number of decimal places, e.g.: ``fragment_format='%.4f %.0f'``. Default is ``'{} {} {}'``. .. note:: The supported format syntax differs depending on other parameters. If `use_numpy` is :py:const:`True` and :py:mod:`numpy` is available, fragment peaks will be written using :py:func:`numpy.savetxt`. Then, `fragment_format` must be recognized by that function. Otherwise, plain Python string formatting is done. See `the docs <https://docs.python.org/library/string.html#format-specification-mini-language>`_ for details on writing the format string. If some or all charges are missing, an empty string is substituted instead, so formatting as :py:class:`!float` or :py:class:`!int` will raise an exception. Hence it is safer to just use ``{}`` for charges. key_order : list, optional A list of strings specifying the order in which params will be written in the spectrum header. Unlisted keys will be in arbitrary order. Default is :py:data:`_default_key_order`. .. note:: This does not affect the order of lines in the global header. param_formatters : dict, optional A dict mapping parameter names to functions. Each function must accept two arguments (key and value) and return a string. Default is :py:data:`_default_value_formatters`. use_numpy : bool, optional Controls whether fragment peak arrays are written using :py:func:`numpy.savetxt`. Using :py:func:`numpy.savetxt` is faster, but cannot handle sparse arrays of fragment charges. You may want to disable this if you need to save spectra with 'charge arrays' with missing values. If not specified, will be set to the opposite of `write_chrages`. If :py:mod:`numpy` is not available, this parameter has no effect. file_mode : str, keyword only, optional If `output` is a file name, defines the mode the file will be opened in. Otherwise will be ignored. Default is `'w'`. .. note :: The default changed from `'a'` in *pyteomics 4.6*. encoding : str, keyword only, optional Output file encoding (if `output` is specified by name). Returns ------- output : file """ def key_value_line(key, val): return param_formatters.get(key, _default_repr)(key, val) + '\n' nones = (None, np.nan, np.ma.masked) if np is not None else (None,) if fragment_format is None: fragment_format = '{} {} {}' np_format_2 = '%.5f %.1f' np_format_3 = '%.5f %.1f %d' np_format_i = '%.5f %.1f %s' else: np_format_2 = np_format_3 = np_format_i = fragment_format format_str = fragment_format + '\n' if write_ions: write_charges = False if use_numpy is None: use_numpy = not write_charges if isinstance(header, dict): head_dict = header.copy() head_lines = [key_value_line(k, v) for k, v in header.items()] head_str = '\n'.join(head_lines) else: if isinstance(header, str): head_str = header head_lines = header.split('\n') else: head_lines = list(header) head_str = '\n'.join(header) head_dict = {} for line in head_lines: if not line.strip() or any(line.startswith(c) for c in MGF._comments): continue l = line.split('=') if len(l) == 2: head_dict[l[0].lower()] = l[1].strip() if head_str: output.write(head_str + '\n\n') if isinstance(spectra, dict) and 'm/z array' in spectra: spectra = (spectra, ) warnings.warn("Passing a single spectrum to `write()` is discouraged. " "To write a set of spectra, pass them to `write()` all at once. " "For more info, see: https://github.com/levitsky/pyteomics/discussions/109.") for spectrum in spectra: output.write('BEGIN IONS\n') found = set() for key in it.chain(key_order, spectrum['params']): if key not in found and key in spectrum['params']: found.add(key) val = spectrum['params'][key] if val != head_dict.get(key): output.write(key_value_line(key, val)) try: success = True if np is not None and use_numpy: if (not write_charges or 'charge array' not in spectrum) and (not write_ions or 'ion array' not in spectrum): X = np.empty((len(spectrum['m/z array']), 2)) X[:, 0] = spectrum['m/z array'] X[:, 1] = spectrum['intensity array'] np.savetxt(output, X, fmt=np_format_2) elif isinstance(spectrum.get('charge array'), np.ndarray): X = np.empty((len(spectrum['m/z array']), 3)) X[:, 0] = spectrum['m/z array'] X[:, 1] = spectrum['intensity array'] X[:, 2] = spectrum['charge array'] np.savetxt(output, X, fmt=np_format_3) elif isinstance(spectrum.get('ion array'), np.ndarray): X = np.empty((len(spectrum['m/z array']), 3), dtype=object) X[:, 0] = spectrum['m/z array'] X[:, 1] = spectrum['intensity array'] X[:, 2] = spectrum['ion array'] np.savetxt(output, X, fmt=np_format_i) else: success = False else: success = False if not success: for m, i, c in zip(spectrum['m/z array'], spectrum['intensity array'], spectrum.get('charge array', it.cycle((None,))) if write_charges else spectrum.get('ion array', it.cycle((None,))) if write_ions else it.cycle((None,))): output.write(format_str.format( m, i, (c if c not in nones else ''))) except KeyError: raise aux.PyteomicsError("'m/z array' and 'intensity array' must be present in all spectra.") output.write('END IONS\n\n') return output
chain = aux._make_chain(read, 'read')

Contents