Skip to content

Implementation of SugarSelection #4790

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: develop
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
@@ -27,6 +27,7 @@ Fixes
the function to prevent shared state. (Issue #4655)

Enhancements
* Addition of 'sugar' token for GLYCAM, PDB and CHARMM sugar selection (Issue #4790)
* Addition of 'water' token for water selection (Issue #4839)
* Enables parallelization for analysis.density.DensityAnalysis (Issue #4677, PR #4729)
* Enables parallelization for analysis.contacts.Contacts (Issue #4660)
252 changes: 252 additions & 0 deletions package/MDAnalysis/core/selection.py
Original file line number Diff line number Diff line change
@@ -1056,6 +1056,258 @@ def _apply(self, group):
return group[np.isin(nmidx, matches)]


class SugarSelection(Selection):
"""Consists of sugar residues with recognized abbreviations.

Recognized residue names in :attr:`SugarSelection.sugar_res`.

* from glycam.org server::
Abbreviations for PDB, CHARMM and GLYCAM
https://glycam.org/docs/othertoolsservice/2016/06/09/3d-snfg-list-of-residue-names/index.html

* manually added aglycans 'ROH', 'OME', 'TBT'
from GLYCAM-Web generated files.

.. versionadded:: 2.9.0
"""
token = 'sugar'

sugar_res = {
# https://glycam.org/docs/othertoolsservice/2016/06/09/3d-snfg-list-of-residue-names/index.html
# Full PDB Abbreviations Nomenclature
'GLC', 'MAL', 'BGC', 'NAG', '4YS', 'SGN', 'BGLN', 'NDG', 'GCS',
'GCU', 'QUI', 'OLI', 'MAN', 'BMA', 'MAV', 'BEM', 'RAM', 'TYV', 'ARA',
'AHR', 'GAL', 'GLA', 'NGA', 'ADA', 'GUL', 'GUP', 'GL0', 'LGU', 'ALT',
'ALL', 'WOO', 'TAL', 'IDO', 'IDS', 'FUC', 'FUL', 'LYX', 'ABE', 'XYL',
'XYS', 'LXC', 'XYP', 'PAR', 'RIB', 'DIG', 'COL', 'BAC', 'API', 'FRU',
'TAG', 'SOR', 'PSI', 'DHA', 'KDN', 'KDO', 'NEU', 'SIA', 'MUR', 'GMH',
# Full CHARMM Abbreviations Nomenclature
'AGLC', 'BGLC', 'AGLCNA', 'BGLCNA', 'BGLCN0', 'AGLCA', 'BGLCA',
'BGLCA0', 'AMAN', 'BMAN', 'ARHM', 'BRHM', 'AARB', 'BARB', 'AGAL',
'BGAL', 'AGALNA', 'BGALNA', 'AGUL', 'BGUL', 'AALT', 'BALT', 'AALL',
'ATAL', 'BTAL', 'AIDO', 'BIDO', 'AIDOA', 'BIDOA', 'AFUC', 'BFUC',
'ALYF', 'BLYF', 'AXYL', 'BXYL', 'AXYF', 'BXYF', 'ARIB', 'BRIB',
'AFRU', 'BFRU', 'ANE5AC', 'BNE5AC',
# GLYCAM Abbreviations
# Glucose Nomenculature
'0GA', '0GB', '1GA', '1GB', '2GA', '2GB', '3GA', '3GB', '4GA', '4GB',
'6GA', '6GB', 'ZGA', 'ZGB', 'YGA', 'YGB', 'XGA', 'XGB', 'WGA', 'WGB',
'VGA', 'VGB', 'UGA', 'UGB', 'TGA', 'TGB', 'SGA', 'SGB', 'RGA', 'RGB',
'QGA', 'QGB', 'PGA', 'PGB', '0gA', '0gB', '1gA', '1gB', '2gA', '2gB',
'3gA', '3gB', '4gA', '4gB', '6gA', '6gB', 'ZgA', 'ZgB', 'YgA', 'YgB',
'XgA', 'XgB', 'WgA', 'WgB', 'VgA', 'VgB', 'UgA', 'UgB', 'TgA', 'TgB',
'SgA', 'SgB', 'RgA', 'RgB', 'QgA', 'QgB', 'PgA', 'PgB',
# N-Acetyl Glucosamine Nomenculature
'0YA', '0YB', '1YA', '1YB', '3YA', '3YB', '4YA', '4YB', '6YA', '6YB',
'WYA', 'WYB', 'VYA', 'VYB', 'UYA', 'UYB', 'QYA', 'QYB', '0yA', '0yB',
'1yA', '1yB', '3yA', '3yB', '4yA', '4yB', '6yA', '6yB', 'WyA', 'WyB',
'VyA', 'VyB', 'UyA', 'UyB', 'QyA', 'QyB',
# Glucosamine Nomenculature
'0YN', '0Yn', '0YNP', '0YnP', '0YS', '0Ys', '3YS', '3Ys', '4YS',
'4Ys', '6YS', '6Ys', 'QYS', 'QYs', 'UYS', 'UYs', 'VYS', 'VYs',
'WYS', 'WYs', '0yS', '0ys', '3yS', '3ys', '4yS', '4ys',
# Glucuronic Acid Nomenculature
'0ZA', '0ZB', '1ZA', '1ZB', '2ZA', '2ZB', '3ZA', '3ZB', '4ZA',
'4ZB', 'ZZA', 'ZZB', 'YZA', 'YZB', 'WZA', 'WZB', 'TZA', 'TZB',
'0zA', '0zB', '1zA', '1zB', '2zA', '2zB', '3zA', '3zB', '4zA',
'4zB', 'ZzA', 'ZzB', 'YzA', 'YzB', 'WzA', 'WzB', 'TzA',
'TzB', '0ZBP',
# Quinovose Nomenculature
'0QA', '0QB', '1QA', '1QB', '2QA', '2QB', '3QA', '3QB', '4QA',
'4QB', 'ZQA', 'ZQB', 'YQA', 'YQB', 'WQA', 'WQB', 'TQA', 'TQB',
'0qA', '0qB', '1qA', '1qB', '2qA', '2qB', '3qA', '3qB', '4qA',
'4qB', 'ZqA', 'ZqB', 'YqA', 'YqB', 'WqA', 'WqB', 'TqA', 'TqB',
# Mannose Nomenculature
'0MA', '0MB', '1MA', '1MB', '2MA', '2MB', '3MA', '3MB', '4MA',
'4MB', '6MA', '6MB', 'ZMA', 'ZMB', 'YMA', 'YMB', 'XMA', 'XMB',
'WMA', 'WMB', 'VMA', 'VMB', 'UMA', 'UMB', 'TMA', 'TMB', 'SMA',
'SMB', 'RMA', 'RMB', 'QMA', 'QMB', 'PMA', 'PMB', '0mA', '0mB',
'1mA', '1mB', '2mA', '2mB', '3mA', '3mB', '4mA', '4mB', '6mA',
'6mB', 'ZmA', 'ZmB', 'YmA', 'YmB', 'XmA', 'XmB', 'WmA', 'WmB',
'VmA', 'VmB', 'UmA', 'UmB', 'TmA', 'TmB', 'SmA', 'SmB', 'RmA',
'RmB', 'QmA', 'QmB', 'PmA', 'PmB',
# N-Acetyl Mannosamine Nomenculature
'0WA', '0WB', '1WA', '1WB', '3WA', '3WB', '4WA', '4WB', '6WA',
'6WB', 'WWA', 'WWB', 'VWA', 'VWB', 'UWA', 'UWB', 'QWA', 'QWB',
'0wA', '0wB', '1wA', '1wB', '3wA', '3wB', '4wA', '4wB', '6wA',
'6wB', 'WwA', 'WwB', 'VwA', 'VwB', 'UwA', 'UwB', 'QwA', 'QwB',
# Rhamnose Nomenculature
'0HA', '0HB', '1HA', '1HB', '2HA', '2HB', '3HA', '3HB', '4HA',
'4HB', 'ZHA', 'ZHB', 'YHA', 'YHB', 'WHA', 'WHB', 'THA', 'THB',
'0hA', '0hB', '1hA', '1hB', '2hA', '2hB', '3hA', '3hB', '4hA',
'4hB', 'ZhA', 'ZhB', 'YhA', 'YhB', 'WhA', 'WhB', 'ThA', 'ThB',
# Tyvelose Nomenculature
'0TV', '0Tv', '1TV', '1Tv', '2TV', '2Tv', '4TV', '4Tv', 'YTV',
'YTv', '0tV', '0tv', '1tV', '1tv', '2tV', '2tv', '4tV', '4tv',
'YtV', 'Ytv',
# Arabinose Nomenculature
'0AA', '0AB', '1AA', '1AB', '2AA', '2AB', '3AA', '3AB', '4AA',
'4AB', 'ZAA', 'ZAB', 'YAA', 'YAB', 'WAA', 'WAB', 'TAA', 'TAB',
'0AD', '0AU', '1AD', '1AU', '2AD', '2AU', '3AD', '3AU', '5AD',
'5AU', 'ZAD', 'ZAU', '0aA', '0aB', '1aA', '1aB', '2aA', '2aB',
'3aA', '3aB', '4aA', '4aB', 'ZaA', 'ZaB', 'YaA', 'YaB', 'WaA',
'WaB', 'TaA', 'TaB', '0aD', '0aU', '1aD', '1aU', '2aD', '2aU',
'3aD', '3aU', '5aD', '5aU', 'ZaD', 'ZaU',
# Galactose Nomenculature
'0LA', '0LB', '1LA', '1LB', '2LA', '2LB', '3LA', '3LB', '4LA',
'4LB', '6LA', '6LB', 'ZLA', 'ZLB', 'YLA', 'YLB', 'XLA', 'XLB',
'WLA', 'WLB', 'VLA', 'VLB', 'ULA', 'ULB', 'TLA', 'TLB', 'SLA',
'SLB', 'RLA', 'RLB', 'QLA', 'QLB', 'PLA', 'PLB', '0lA', '0lB',
'1lA', '1lB', '2lA', '2lB', '3lA', '3lB', '4lA', '4lB', '6lA',
'6lB', 'ZlA', 'ZlB', 'YlA', 'YlB', 'XlA', 'XlB', 'WlA', 'WlB',
'VlA', 'VlB', 'UlA', 'UlB', 'TlA', 'TlB', 'SlA', 'SlB', 'RlA',
'RlB', 'QlA', 'QlB', 'PlA', 'PlB',
# N-Acetyl Galactosamine Nomenculature
'0VA', '0VB', '1VA', '1VB', '3VA', '3VB', '4VA', '4VB', '6VA',
'6VB', 'WVA', 'WVB', 'VVA', 'VVB', 'UVA', 'UVB', 'QVA', 'QVB',
'0vA', '0vB', '1vA', '1vB', '3vA', '3vB', '4vA', '4vB', '6vA',
'6vB', 'WvA', 'WvB', 'VvA', 'VvB', 'UvA', 'UvB', 'QvA', 'QvB',
# Galacturonic Acid Nomenculature
'0OA', '0OB', '1OA', '1OB', '2OA', '2OB', '3OA', '3OB', '4OA',
'4OB', 'ZOA', 'ZOB', 'YOA', 'YOB', 'WOA', 'WOB', 'TOA', 'TOB',
'0oA', '0oB', '1oA', '1oB', '2oA', '2oB', '3oA', '3oB', '4oA',
'4oB', 'ZoA', 'ZoB', 'YoA', 'YoB', 'WoA', 'WoB', 'ToA', 'ToB',
# Gulose Nomenculature
'0KA', '0KB', '1KA', '1KB', '2KA', '2KB', '3KA', '3KB', '4KA',
'4KB', '6KA', '6KB', 'ZKA', 'ZKB', 'YKA', 'YKB', 'XKA', 'XKB',
'WKA', 'WKB', 'VKA', 'VKB', 'UKA', 'UKB', 'TKA', 'TKB', 'SKA',
'SKB', 'RKA', 'RKB', 'QKA', 'QKB', 'PKA', 'PKB', '0kA', '0kB',
'1kA', '1kB', '2kA', '2kB', '3kA', '3kB', '4kA', '4kB', '6kA',
'6kB', 'ZkA', 'ZkB', 'YkA', 'YkB', 'XkA', 'XkB', 'WkA', 'WkB',
'VkA', 'VkB', 'UkA', 'UkB', 'TkA', 'TkB', 'SkA', 'SkB', 'RkA',
'RkB', 'QkA', 'QkB', 'PkA', 'PkB',
# Altrose Nomenculature
'0EA', '0EB', '1EA', '1EB', '2EA', '2EB', '3EA', '3EB', '4EA',
'4EB', '6EA', '6EB', 'ZEA', 'ZEB', 'YEA', 'YEB', 'XEA', 'XEB',
'WEA', 'WEB', 'VEA', 'VEB', 'UEA', 'UEB', 'TEA', 'TEB', 'SEA',
'SEB', 'REA', 'REB', 'QEA', 'QEB', 'PEA', 'PEB', '0eA', '0eB',
'1eA', '1eB', '2eA', '2eB', '3eA', '3eB', '4eA', '4eB', '6eA',
'6eB', 'ZeA', 'ZeB', 'YeA', 'YeB', 'XeA', 'XeB', 'WeA', 'WeB',
'VeA', 'VeB', 'UeA', 'UeB', 'TeA', 'TeB', 'SeA', 'SeB', 'ReA',
'ReB', 'QeA', 'QeB', 'PeA', 'PeB',
# Allose Nomenculature
'0NA', '0NB', '1NA', '1NB', '2NA', '2NB', '3NA', '3NB', '4NA',
'4NB', '6NA', '6NB', 'ZNA', 'ZNB', 'YNA', 'YNB', 'XNA', 'XNB',
'WNA', 'WNB', 'VNA', 'VNB', 'UNA', 'UNB', 'TNA', 'TNB', 'SNA',
'SNB', 'RNA', 'RNB', 'QNA', 'QNB', 'PNA', 'PNB', '0nA', '0nB',
'1nA', '1nB', '2nA', '2nB', '3nA', '3nB', '4nA', '4nB', '6nA',
'6nB', 'ZnA', 'ZnB', 'YnA', 'YnB', 'XnA', 'XnB', 'WnA', 'WnB',
'VnA', 'VnB', 'UnA', 'UnB', 'TnA', 'TnB', 'SnA', 'SnB', 'RnA',
'RnB', 'QnA', 'QnB', 'PnA', 'PnB',
# Talose Nomenculature
'0TA', '0TB', '1TA', '1TB', '2TA', '2TB', '3TA', '3TB', '4TA',
'4TB', '6TA', '6TB', 'ZTA', 'ZTB', 'YTA', 'YTB', 'XTA', 'XTB',
'WTA', 'WTB', 'VTA', 'VTB', 'UTA', 'UTB', 'TTA', 'TTB', 'STA',
'STB', 'RTA', 'RTB', 'QTA', 'QTB', 'PTA', 'PTB', '0tA', '0tB',
'1tA', '1tB', '2tA', '2tB', '3tA', '3tB', '4tA', '4tB', '6tA',
'6tB', 'ZtA', 'ZtB', 'YtA', 'YtB', 'XtA', 'XtB', 'WtA', 'WtB',
'VtA', 'VtB', 'UtA', 'UtB', 'TtA', 'TtB', 'StA', 'StB', 'RtA',
'RtB', 'QtA', 'QtB', 'PtA', 'PtB',
# Iduronic Acid Nomenculature
'0UA', '0UB', '1UA', '1UB', '2UA', '2UB', '3UA', '3UB', '4UA',
'4UB', 'ZUA', 'ZUB', 'YUA', 'YUB', 'WUA', 'WUB', 'TUA', 'TUB',
'0uA', '0uB', '1uA', '1uB', '2uA', '2uB', '3uA', '3uB', '4uA',
'4uB', 'ZuA', 'ZuB', 'YuA', 'YuB', 'WuA', 'WuB', 'TuA', 'TuB',
'YuAP',
# Fucose Nomenculature
'0FA', '0FB', '1FA', '1FB', '2FA', '2FB', '3FA', '3FB', '4FA',
'4FB', 'ZFA', 'ZFB', 'YFA', 'YFB', 'WFA', 'WFB', 'TFA', 'TFB',
'0fA', '0fB', '1fA', '1fB', '2fA', '2fB', '3fA', '3fB', '4fA',
'4fB', 'ZfA', 'ZfB', 'YfA', 'YfB', 'WfA', 'WfB', 'TfA', 'TfB',
# Lyxose Nomenculature
'0DA', '0DB', '1DA', '1DB', '2DA', '2DB', '3DA', '3DB', '4DA',
'4DB', 'ZDA', 'ZDB', 'YDA', 'YDB', 'WDA', 'WDB', 'TDA', 'TDB',
'0DD', '0DU', '1DD', '1DU', '2DD', '2DU', '3DD', '3DU', '5DD',
'5DU', 'ZDD', 'ZDU', '0dA', '0dB', '1dA', '1dB', '2dA', '2dB',
'3dA', '3dB', '4dA', '4dB', 'ZdA', 'ZdB', 'YdA', 'YdB', 'WdA',
'WdB', 'TdA', 'TdB', '0dD', '0dU', '1dD', '1dU', '2dD', '2dU',
'3dD', '3dU', '5dD', '5dU', 'ZdD', 'ZdU',
# Abequose Nomenculature
'0AE', '2AE', '4AE', 'YGa', '0AF', '2AF', '4AF', 'YAF',
# Xylose Nomenculature
'0XA', '0XB', '1XA', '1XB', '2XA', '2XB', '3XA', '3XB', '4XA',
'4XB', 'ZXA', 'ZXB', 'YXA', 'YXB', 'WXA', 'WXB', 'TXA', 'TXB',
'0XD', '0XU', '1XD', '1XU', '2XD', '2XU', '3XD', '3XU', '5XD',
'5XU', 'ZXD', 'ZXU', '0xA', '0xB', '1xA', '1xB', '2xA', '2xB',
'3xA', '3xB', '4xA', '4xB', 'ZxA', 'ZxB', 'YxA', 'YxB', 'WxA',
'WxB', 'TxA', 'TxB', '0xD', '0xU', '1xD', '1xU', '2xD', '2xU',
'3xD', '3xU', '5xD', '5xU', 'ZxD', 'ZxU',
# Ribose Nomenculature
'0RA', '0RB', '1RA', '1RB', '2RA', '2RB', '3RA', '3RB', '4RA',
'4RB', 'ZRA', 'ZRB', 'YRA', 'YRB', 'WRA', 'WRB', 'TRA', 'TRB',
'0RD', '0RU', '1RD', '1RU', '2RD', '2RU', '3RD', '3RU', '5RD',
'5RU', 'ZRD', 'ZRU', '0rA', '0rB', '1rA', '1rB', '2rA', '2rB',
'3rA', '3rB', '4rA', '4rB', 'ZrA', 'ZrB', 'YrA', 'YrB', 'WrA',
'WrB', 'TrA', 'TrB', '0rD', '0rU', '1rD', '1rU', '2rD', '2rU',
'3rD', '3rU', '5rD', '5rU', 'ZrD', 'ZrU',
# Bacillosamine Nomenculature
'0BC', '3BC', '0bC', '3bC',
# Fructose Nomenculature
'0CA', '0CB', '1CA', '1CB', '2CA', '2CB', '3CA', '3CB', '4CA',
'4CB', '5CA', '5CB', 'WCA', 'WCB', '0CD', '0CU', '1CD', '1CU',
'2CD', '2CU', '3CD', '3CU', '4CD', '4CU', '6CD', '6CU', 'WCD',
'WCU', 'VCD', 'VCU', 'UCD', 'UCU', 'QCD', 'QCU', '0cA', '0cB',
'1cA', '1cB', '2cA', '2cB', '3cA', '3cB', '4cA', '4cB', '5cA',
'5cB', 'WcA', 'WcB', '0cD', '0cU', '1cD', '1cU', '2cD', '2cU',
'3cD', '3cU', '4cD', '4cU', '6cD', '6cU', 'WcD', 'WcU', 'VcD',
'VcU', 'UcD', 'UcU', 'QcD', 'QcU',
# Tagatose Nomenculature
'0JA', '0JB', '1JA', '1JB', '2JA', '2JB', '3JA', '3JB', '4JA',
'4JB', '5JA', '5JB', 'WJA', 'WJB', '0JD', '0JU', '1JD', '1JU',
'2JD', '2JU', '3JD', '3JU', '4JD', '4JU', '6JD', '6JU', 'WJD',
'WJU', 'VJD', 'VJU', 'UJD', 'UJU', 'QJD', 'QJU', '0jA', '0jB',
'1jA', '1jB', '2jA', '2jB', '3jA', '3jB', '4jA', '4jB', '5jA',
'5jB', 'WjA', 'WjB', '0jD', '0jU', '1jD', '1jU', '2jD', '2jU',
'3jD', '3jU', '4jD', '4jU', '6jD', '6jU', 'WjD', 'WjU', 'VjD',
'VjU', 'UjD', 'UjU', 'QjD', 'QjU',
# Sorbose Nomenculature
'0BA', '0BB', '1BA', '1BB', '2BA', '2BB', '3BA', '3BB', '4BA',
'4BB', '5BA', '5BB', 'WBA', 'WBB', '0BD', '0BU', '1BD', '1BU',
'2BD', '2BU', '3BD', '3BU', '4BD', '4BU', '6BD', '6BU', 'WBD',
'WBU', 'VBD', 'VBU', 'UBD', 'UBU', 'QBD', 'QBU', '0bA', '0bB',
'1bA', '1bB', '2bA', '2bB', '3bA', '3bB', '4bA', '4bB', '5bA',
'5bB', 'WbA', 'WbB', '0bD', '0bU', '1bD', '1bU', '2bD', '2bU',
'3bD', '3bU', '4bD', '4bU', '6bD', '6bU', 'WbD', 'WbU', 'VbD',
'VbU', 'UbD', 'UbU', 'QbD', 'QbU',
# Psicose Nomenculature
'0PA', '0PB', '1PA', '1PB', '2PA', '2PB', '3PA', '3PB', '4PA',
'4PB', '5PA', '5PB', 'WPA', 'WPB', '0PD', '0PU', '1PD', '1PU',
'2PD', '2PU', '3PD', '3PU', '4PD', '4PU', '6PD', '6PU', 'WPD',
'WPU', 'VPD', 'VPU', 'UPD', 'UPU', 'QPD', 'QPU', '0pA', '0pB',
'1pA', '1pB', '2pA', '2pB', '3pA', '3pB', '4pA', '4pB', '5pA',
'5pB', 'WpA', 'WpB', '0pD', '0pU', '1pD', '1pU', '2pD', '2pU',
'3pD', '3pU', '4pD', '4pU', '6pD', '6pU', 'WpD', 'WpU', 'VpD',
'VpU', 'UpD', 'UpU', 'QpD', 'QpU',
# N-Acetyl Neuraminic Acid Nomenculature
'0SA', '0SB', '4SA', '4SB', '7SA', '7SB', '8SA', '8SB', '9SA',
'9SB', 'ASA', 'ASB', 'BSA', 'BSB', 'CSA', 'CSB', 'DSA', 'DSB',
'ESA', 'ESB', 'FSA', 'FSB', 'GSA', 'GSB', 'HSA', 'HSB', 'ISA',
'ISB', 'JSA', 'JSB', 'KSA', 'KSB', '0sA', '0sB', '4sA', '4sB',
'7sA', '7sB', '8sA', '8sB', '9sA', '9sB', 'AsA', 'AsB', 'BsA',
'BsB', 'CsA', 'CsB', 'DsA', 'DsB', 'EsA', 'EsB', 'FsA', 'FsB',
'GsA', 'GsB', 'HsA', 'HsB', 'IsA', 'IsB', 'JsA', 'JsB', 'KsA',
'KsB',
# N-Glycolyl Neuraminic Acid Nomenculature
'0GL', '4GL', '7GL', '8GL', '9GL', 'CGL', 'DGL', 'EGL', 'FGL',
'GGL', 'HGL', 'IGL', 'JGL', 'KGL', '0gL', '4gL', '7gL', '8gL',
'9gL', 'AgL', 'BgL', 'CgL', 'DgL', 'EgL', 'FgL', 'GgL', 'HgL',
'IgL', 'JgL', 'KgL',
# Aglycon Nomenculature
'ROH', 'OME', 'TBT',
}

def _apply(self, group):
resname_attr = group.universe._topology.resnames
# which values in resname attr are in sugar_res?
matches = [ix for (nm, ix) in resname_attr.namedict.items()
if nm in self.sugar_res]
# index of each atom's resname
nmidx = resname_attr.nmidx[group.resindices]
# intersect atom's resname index and matches to sugar_res
return group[np.isin(nmidx, matches)]



class NucleicSelection(Selection):
"""All atoms in nucleic acid residues with recognized residue names.

20 changes: 20 additions & 0 deletions testsuite/MDAnalysisTests/core/test_atomselections.py
Original file line number Diff line number Diff line change
@@ -46,6 +46,8 @@
XTC,
NUCLsel,
PDB_charges,
GLYCAM,
SUGAR_PDB,
PDB_elements,
PDB_full,
PDB_helix,
@@ -1773,3 +1775,21 @@ def test_formal_charge_selection(sel, size, name):

assert len(ag) == size
assert ag.atoms[0].name == name


def test_sugar_glycam_selection():
u = mda.Universe(GLYCAM)

ag_token = u.select_atoms("sugar")
ag_ref = u.select_atoms("resname ROH or resname 3RA or resname 0MB")

assert ag_token == ag_ref


def test_sugar_pdb_selection():
u = mda.Universe(SUGAR_PDB)

ag_token = u.select_atoms("sugar")
ag_ref = u.select_atoms("resname NAG or resname BMA or resname MAN")

assert ag_token == ag_ref
13,734 changes: 13,734 additions & 0 deletions testsuite/MDAnalysisTests/data/6kya.pdb

Large diffs are not rendered by default.

48 changes: 48 additions & 0 deletions testsuite/MDAnalysisTests/data/GLYCAM_sugars.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
ATOM 1 HO1 ROH 1 -3.767 9.090 -1.562 1.00 0.00 H
ATOM 2 O1 ROH 1 -4.172 9.583 -2.279 1.00 0.00 O
TER
ATOM 3 C1 3RA 2 -3.447 8.385 -2.772 1.00 0.00 C
ATOM 4 H1 3RA 2 -2.393 8.442 -2.502 1.00 0.00 H
ATOM 5 C2 3RA 2 -3.566 8.292 -4.312 1.00 0.00 C
ATOM 6 H2 3RA 2 -2.891 7.494 -4.628 1.00 0.00 H
ATOM 7 C3 3RA 2 -4.967 7.926 -4.841 1.00 0.00 C
ATOM 8 H3 3RA 2 -4.930 7.759 -5.918 1.00 0.00 H
ATOM 9 C4 3RA 2 -5.437 6.645 -4.123 1.00 0.00 C
ATOM 10 H4 3RA 2 -4.792 5.821 -4.431 1.00 0.00 H
ATOM 11 C5 3RA 2 -5.383 6.751 -2.590 1.00 0.00 C
ATOM 12 H5E 3RA 2 -5.627 5.791 -2.134 1.00 0.00 H
ATOM 13 H5A 3RA 2 -6.099 7.491 -2.230 1.00 0.00 H
ATOM 14 O5 3RA 2 -4.017 7.152 -2.176 1.00 0.00 O
ATOM 15 O4 3RA 2 -6.780 6.321 -4.518 1.00 0.00 O
ATOM 16 H4O 3RA 2 -7.373 7.024 -4.240 1.00 0.00 H
ATOM 17 H2O 3RA 2 -3.657 10.231 -4.683 1.00 0.00 H
ATOM 18 O2 3RA 2 -3.088 9.499 -4.932 1.00 0.00 O
ATOM 19 O3 3RA 2 -5.887 9.005 -4.566 1.00 0.00 O
TER
ATOM 20 C1 0MB 3 -6.532 9.610 -5.698 1.00 0.00 C
ATOM 21 H1 0MB 3 -7.112 8.861 -6.238 1.00 0.00 H
ATOM 22 O5 0MB 3 -5.515 10.163 -6.603 1.00 0.00 O
ATOM 23 C5 0MB 3 -6.036 10.922 -7.785 1.00 0.00 C
ATOM 24 H5 0MB 3 -6.549 10.213 -8.438 1.00 0.00 H
ATOM 25 C6 0MB 3 -4.846 11.525 -8.554 1.00 0.00 C
ATOM 26 H62 0MB 3 -4.311 12.238 -7.925 1.00 0.00 H
ATOM 27 H61 0MB 3 -5.193 12.048 -9.446 1.00 0.00 H
ATOM 28 O6 0MB 3 -3.941 10.479 -8.952 1.00 0.00 O
ATOM 29 H6O 0MB 3 -3.676 9.992 -8.166 1.00 0.00 H
ATOM 30 C4 0MB 3 -7.047 12.010 -7.341 1.00 0.00 C
ATOM 31 H4 0MB 3 -6.521 12.815 -6.825 1.00 0.00 H
ATOM 32 O4 0MB 3 -7.681 12.556 -8.527 1.00 0.00 O
ATOM 33 H4O 0MB 3 -8.051 13.417 -8.304 1.00 0.00 H
ATOM 34 C3 0MB 3 -8.126 11.397 -6.407 1.00 0.00 C
ATOM 35 H3 0MB 3 -8.663 10.622 -6.958 1.00 0.00 H
ATOM 36 O3 0MB 3 -9.116 12.384 -5.997 1.00 0.00 O
ATOM 37 H3O 0MB 3 -9.774 12.490 -6.691 1.00 0.00 H
ATOM 38 C2 0MB 3 -7.467 10.735 -5.174 1.00 0.00 C
ATOM 39 H2 0MB 3 -8.233 10.308 -4.525 1.00 0.00 H
ATOM 40 O2 0MB 3 -6.690 11.710 -4.417 1.00 0.00 O
ATOM 41 H2O 0MB 3 -6.224 11.233 -3.721 1.00 0.00 H
TER
CONECT 3 2
CONECT 2 3
CONECT 20 19
CONECT 19 20
5 changes: 5 additions & 0 deletions testsuite/MDAnalysisTests/datafiles.py
Original file line number Diff line number Diff line change
@@ -379,6 +379,8 @@
"SURFACE_PDB", # 111 FCC lattice topology for NSGrid bug #2345
"SURFACE_TRR", # full precision coordinates for NSGrid bug #2345
"DSSP", # DSSP test suite
"GLYCAM", # PDB file with GLYCAM sugars
"SUGAR_PDB", # PDB file with PDB abbreviation sugars
]

from importlib import resources
@@ -879,5 +881,8 @@
# DSSP testing: from https://github.com/ShintaroMinami/PyDSSP
DSSP = (_data_ref / "dssp").as_posix()

GLYCAM = (_data_ref / 'GLYCAM_sugars.pdb').as_posix()
SUGAR_PDB = (_data_ref / '6kya.pdb').as_posix()

# This should be the last line: clean up namespace
del resources