License: Creative Commons Attribution 3.0 Unported license (CC BY 3.0)
When quoting this document, please refer to the following
DOI: 10.4230/OASIcs.GCB.2013.80
URN: urn:nbn:de:0030-drops-42324
URL: http://dagstuhl.sunsite.rwth-aachen.de/volltexte/2013/4232/
Klingenberg, Heiner ;
Martinjak, Robin ;
Glöckner, Frank Oliver ;
Daniel, Rolf ;
Lingner, Thomas ;
Meinicke, Peter
Dinucleotide distance histograms for fast detection of rRNA in metatranscriptomic sequences
Abstract
With the advent of metatranscriptomics it has now become possible to study the dynamics of microbial communities. The analysis of environmental RNA-Seq data implies several challenges for the development of efficient tools in bioinformatics. One of the first steps in the computational analysis of metatranscriptomic sequencing reads requires the separation of rRNA and mRNA fragments to ensure that only protein coding sequences are actually used in a subsequent functional analysis. In the context of the rRNA filtering task it is desirable to have a broad spectrum of different methods in order to find a suitable trade-off between speed and accuracy for a particular dataset. We introduce a machine learning approach for the detection of rRNA in metatranscriptomic sequencing reads that is based on support vector machines in combination with dinucleotide distance histograms for feature representation. The results show that our SVM-based approach is at least one order of magnitude faster than any of the existing tools with only a slight degradation of the detection performance when compared to state-of-the-art alignment-based methods.
BibTeX - Entry
@InProceedings{klingenberg_et_al:OASIcs:2013:4232,
author = {Heiner Klingenberg and Robin Martinjak and Frank Oliver Gl{\"o}ckner and Rolf Daniel and Thomas Lingner and Peter Meinicke},
title = {{Dinucleotide distance histograms for fast detection of rRNA in metatranscriptomic sequences}},
booktitle = {German Conference on Bioinformatics 2013},
pages = {80--89},
series = {OpenAccess Series in Informatics (OASIcs)},
ISBN = {978-3-939897-59-0},
ISSN = {2190-6807},
year = {2013},
volume = {34},
editor = {Tim Bei{\ss}barth and Martin Kollmar and Andreas Leha and Burkhard Morgenstern and Anne-Kathrin Schultz and Stephan Waack and Edgar Wingender},
publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
address = {Dagstuhl, Germany},
URL = {http://drops.dagstuhl.de/opus/volltexte/2013/4232},
URN = {urn:nbn:de:0030-drops-42324},
doi = {10.4230/OASIcs.GCB.2013.80},
annote = {Keywords: Metatranscriptomics, metagenomics, rRNA detection, distance histograms}
}
Keywords: |
|
Metatranscriptomics, metagenomics, rRNA detection, distance histograms |
Collection: |
|
German Conference on Bioinformatics 2013 |
Issue Date: |
|
2013 |
Date of publication: |
|
09.09.2013 |