diff --git a/Cargo.lock b/Cargo.lock index d84574f0..946f46a3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6346,9 +6346,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.138" +version = "1.0.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" dependencies = [ "indexmap", "itoa", diff --git a/docs/notebooks/base_sequence_quality.ipynb b/docs/notebooks/base_sequence_quality.ipynb new file mode 100644 index 00000000..ac49dd6f --- /dev/null +++ b/docs/notebooks/base_sequence_quality.ipynb @@ -0,0 +1,455 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "83d7ccd7", + "metadata": {}, + "source": [ + "### Import dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "58b40aa6", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/user/.pyenv/versions/3.12.9/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "INFO:polars_bio:Creating BioSessionContext\n" + ] + } + ], + "source": [ + "import polars_bio as pb\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "id": "2e29cd0a", + "metadata": {}, + "source": [ + "### Usage examples" + ] + }, + { + "cell_type": "markdown", + "id": "b238193d", + "metadata": {}, + "source": [ + "#### Usage example - .fastq file" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "0420c240", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " pos avg q1 median q3 lower upper\n", + "0 47 37.665 37.535714 39.921053 41.060185 32.249008 46.346892\n", + "1 38 37.640 37.964286 40.067308 41.024038 33.374657 45.613668\n", + "2 65 35.995 35.190000 37.433333 39.810000 28.260000 46.740000\n", + "3 3 35.690 35.483333 37.208661 37.600394 32.307743 40.775984\n", + "4 41 37.870 37.678571 40.116071 41.004902 32.689076 45.994398\n", + ".. ... ... ... ... ... ... ...\n", + "96 42 37.780 37.583333 40.139344 40.954918 32.525956 46.012295\n", + "97 43 37.775 38.114583 40.126866 40.869403 33.982354 45.001632\n", + "98 15 38.725 38.226190 40.352459 41.168033 33.813427 45.580796\n", + "99 46 37.790 37.479167 39.975000 41.042453 32.134237 46.387382\n", + "100 24 38.265 38.397059 40.095745 41.125000 34.305147 45.216912\n", + "\n", + "[101 rows x 7 columns]\n" + ] + } + ], + "source": [ + "result = pb.base_sequence_quality(\"example.fastq\", output_type=\"pandas.DataFrame\", target_partitions=2)\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "id": "9886c394", + "metadata": {}, + "source": [ + "#### Usage example - .csv file" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "66c3af24", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (101, 7)
posavgq1medianq3lowerupper
u64f64f64f64f64f64f64
4737.66537.53571439.92105341.06018532.24900846.346892
9032.27533.18333334.98684235.6396129.49891839.324026
2637.85538.02884640.07894740.95175433.64448445.336117
735.435.46590936.9062537.48737432.43371240.519571
335.6935.48333337.20866137.60039432.30774340.775984
1938.42538.237540.23846241.00490234.08639745.156005
9430.77532.05769234.65151535.5126.87923140.688462
1738.50538.12540.12745141.0937533.67187545.546875
7932.4631.9687535.20535736.437525.26562543.140625
4137.8737.67857140.11607141.00490232.68907645.994398
" + ], + "text/plain": [ + "shape: (101, 7)\n", + "┌─────┬────────┬───────────┬───────────┬───────────┬───────────┬───────────┐\n", + "│ pos ┆ avg ┆ q1 ┆ median ┆ q3 ┆ lower ┆ upper │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ u64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", + "╞═════╪════════╪═══════════╪═══════════╪═══════════╪═══════════╪═══════════╡\n", + "│ 47 ┆ 37.665 ┆ 37.535714 ┆ 39.921053 ┆ 41.060185 ┆ 32.249008 ┆ 46.346892 │\n", + "│ 90 ┆ 32.275 ┆ 33.183333 ┆ 34.986842 ┆ 35.63961 ┆ 29.498918 ┆ 39.324026 │\n", + "│ 26 ┆ 37.855 ┆ 38.028846 ┆ 40.078947 ┆ 40.951754 ┆ 33.644484 ┆ 45.336117 │\n", + "│ 7 ┆ 35.4 ┆ 35.465909 ┆ 36.90625 ┆ 37.487374 ┆ 32.433712 ┆ 40.519571 │\n", + "│ 3 ┆ 35.69 ┆ 35.483333 ┆ 37.208661 ┆ 37.600394 ┆ 32.307743 ┆ 40.775984 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 19 ┆ 38.425 ┆ 38.2375 ┆ 40.238462 ┆ 41.004902 ┆ 34.086397 ┆ 45.156005 │\n", + "│ 94 ┆ 30.775 ┆ 32.057692 ┆ 34.651515 ┆ 35.51 ┆ 26.879231 ┆ 40.688462 │\n", + "│ 17 ┆ 38.505 ┆ 38.125 ┆ 40.127451 ┆ 41.09375 ┆ 33.671875 ┆ 45.546875 │\n", + "│ 79 ┆ 32.46 ┆ 31.96875 ┆ 35.205357 ┆ 36.4375 ┆ 25.265625 ┆ 43.140625 │\n", + "│ 41 ┆ 37.87 ┆ 37.678571 ┆ 40.116071 ┆ 41.004902 ┆ 32.689076 ┆ 45.994398 │\n", + "└─────┴────────┴───────────┴───────────┴───────────┴───────────┴───────────┘" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pb.base_sequence_quality(\"example.csv\")" + ] + }, + { + "cell_type": "markdown", + "id": "5ab1da41", + "metadata": {}, + "source": [ + "#### Usage example - .parquet file" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a2cb9c97", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (101, 7)
posavgq1medianq3lowerupper
u64f64f64f64f64f64f64
4437.56537.7187540.13392941.02403832.76081745.981971
2738.4438.23437540.2741.20703133.77539145.666016
7233.26533.86111135.687537.92391327.76690844.018116
8631.81533.36538535.13291135.76265829.76947439.358569
9032.27533.18333334.98684235.6396129.49891839.324026
3438.20537.97916740.15957441.16803333.19586745.951332
8432.41533.2535.18243235.8547329.34290539.761824
4937.2136.67857139.64583340.76388930.55059546.891865
6336.2535.21590938.02631640.27527.62727347.863636
8032.6132.62535.28947436.42045526.93181842.113636
" + ], + "text/plain": [ + "shape: (101, 7)\n", + "┌─────┬────────┬───────────┬───────────┬───────────┬───────────┬───────────┐\n", + "│ pos ┆ avg ┆ q1 ┆ median ┆ q3 ┆ lower ┆ upper │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ u64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", + "╞═════╪════════╪═══════════╪═══════════╪═══════════╪═══════════╪═══════════╡\n", + "│ 44 ┆ 37.565 ┆ 37.71875 ┆ 40.133929 ┆ 41.024038 ┆ 32.760817 ┆ 45.981971 │\n", + "│ 27 ┆ 38.44 ┆ 38.234375 ┆ 40.27 ┆ 41.207031 ┆ 33.775391 ┆ 45.666016 │\n", + "│ 72 ┆ 33.265 ┆ 33.861111 ┆ 35.6875 ┆ 37.923913 ┆ 27.766908 ┆ 44.018116 │\n", + "│ 86 ┆ 31.815 ┆ 33.365385 ┆ 35.132911 ┆ 35.762658 ┆ 29.769474 ┆ 39.358569 │\n", + "│ 90 ┆ 32.275 ┆ 33.183333 ┆ 34.986842 ┆ 35.63961 ┆ 29.498918 ┆ 39.324026 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 34 ┆ 38.205 ┆ 37.979167 ┆ 40.159574 ┆ 41.168033 ┆ 33.195867 ┆ 45.951332 │\n", + "│ 84 ┆ 32.415 ┆ 33.25 ┆ 35.182432 ┆ 35.85473 ┆ 29.342905 ┆ 39.761824 │\n", + "│ 49 ┆ 37.21 ┆ 36.678571 ┆ 39.645833 ┆ 40.763889 ┆ 30.550595 ┆ 46.891865 │\n", + "│ 63 ┆ 36.25 ┆ 35.215909 ┆ 38.026316 ┆ 40.275 ┆ 27.627273 ┆ 47.863636 │\n", + "│ 80 ┆ 32.61 ┆ 32.625 ┆ 35.289474 ┆ 36.420455 ┆ 26.931818 ┆ 42.113636 │\n", + "└─────┴────────┴───────────┴───────────┴───────────┴───────────┴───────────┘" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pb.base_sequence_quality(\"example.parquet\")" + ] + }, + { + "cell_type": "markdown", + "id": "b519abbd", + "metadata": {}, + "source": [ + "#### Usage example - `polars.lazyframe.frame.LazyFrame` object" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "1899ca01", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:polars_bio:Table: example registered for path: ./example.fastq\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "200rows [00:00, 62156.25rows/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "shape: (101, 7)
posavgq1medianq3lowerupper
u64f64f64f64f64f64f64
6436.09535.06481537.740.14583327.44328747.767361
435.6835.55288537.20866137.60039432.48162140.671657
6835.9134.94047636.93181839.53947428.0419846.43797
7233.26533.86111135.687537.92391327.76690844.018116
5836.7735.33823538.9062540.71710527.2699348.78541
2637.85538.02884640.07894740.95175433.64448445.336117
937.3637.3437539.01470639.50245134.10569942.740502
7133.00533.79166735.85135138.58333326.60416745.770833
1137.7137.78448339.09459539.54279335.14701842.180258
2438.26538.39705940.09574541.12534.30514745.216912
" + ], + "text/plain": [ + "shape: (101, 7)\n", + "┌─────┬────────┬───────────┬───────────┬───────────┬───────────┬───────────┐\n", + "│ pos ┆ avg ┆ q1 ┆ median ┆ q3 ┆ lower ┆ upper │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ u64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", + "╞═════╪════════╪═══════════╪═══════════╪═══════════╪═══════════╪═══════════╡\n", + "│ 64 ┆ 36.095 ┆ 35.064815 ┆ 37.7 ┆ 40.145833 ┆ 27.443287 ┆ 47.767361 │\n", + "│ 4 ┆ 35.68 ┆ 35.552885 ┆ 37.208661 ┆ 37.600394 ┆ 32.481621 ┆ 40.671657 │\n", + "│ 68 ┆ 35.91 ┆ 34.940476 ┆ 36.931818 ┆ 39.539474 ┆ 28.04198 ┆ 46.43797 │\n", + "│ 72 ┆ 33.265 ┆ 33.861111 ┆ 35.6875 ┆ 37.923913 ┆ 27.766908 ┆ 44.018116 │\n", + "│ 58 ┆ 36.77 ┆ 35.338235 ┆ 38.90625 ┆ 40.717105 ┆ 27.26993 ┆ 48.78541 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 26 ┆ 37.855 ┆ 38.028846 ┆ 40.078947 ┆ 40.951754 ┆ 33.644484 ┆ 45.336117 │\n", + "│ 9 ┆ 37.36 ┆ 37.34375 ┆ 39.014706 ┆ 39.502451 ┆ 34.105699 ┆ 42.740502 │\n", + "│ 71 ┆ 33.005 ┆ 33.791667 ┆ 35.851351 ┆ 38.583333 ┆ 26.604167 ┆ 45.770833 │\n", + "│ 11 ┆ 37.71 ┆ 37.784483 ┆ 39.094595 ┆ 39.542793 ┆ 35.147018 ┆ 42.180258 │\n", + "│ 24 ┆ 38.265 ┆ 38.397059 ┆ 40.095745 ┆ 41.125 ┆ 34.305147 ┆ 45.216912 │\n", + "└─────┴────────┴───────────┴───────────┴───────────┴───────────┴───────────┘" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a_lazyframe = pb.read_fastq(\"./example.fastq\")\n", + "print(type(a_lazyframe))\n", + "pb.base_sequence_quality(a_lazyframe)" + ] + }, + { + "cell_type": "markdown", + "id": "cdb4aad6", + "metadata": {}, + "source": [ + "#### Usage example - `polars.dataframe.frame.DataFrame` object" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "7830b8aa", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:polars_bio:Table: example registered for path: ./example.fastq\n", + "200rows [00:00, 66234.57rows/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "shape: (101, 7)
posavgq1medianq3lowerupper
u64f64f64f64f64f64f64
335.6935.48333337.20866137.60039432.30774340.775984
2938.59538.3540.25833341.0937534.23437545.209375
4437.56537.7187540.13392941.02403832.76081745.981971
7630.26530.91666735.41489437.07352921.68137346.308824
6735.9634.72368436.97222239.8127.09421147.439474
1538.72538.2261940.35245941.16803333.81342745.580796
2637.85538.02884640.07894740.95175433.64448445.336117
3937.89538.02777840.12540.85661833.78451845.099877
9730.6731.57534.89062535.55722925.60165741.530572
5137.5336.7187539.44736841.02403830.26081747.481971
" + ], + "text/plain": [ + "shape: (101, 7)\n", + "┌─────┬────────┬───────────┬───────────┬───────────┬───────────┬───────────┐\n", + "│ pos ┆ avg ┆ q1 ┆ median ┆ q3 ┆ lower ┆ upper │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ u64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", + "╞═════╪════════╪═══════════╪═══════════╪═══════════╪═══════════╪═══════════╡\n", + "│ 3 ┆ 35.69 ┆ 35.483333 ┆ 37.208661 ┆ 37.600394 ┆ 32.307743 ┆ 40.775984 │\n", + "│ 29 ┆ 38.595 ┆ 38.35 ┆ 40.258333 ┆ 41.09375 ┆ 34.234375 ┆ 45.209375 │\n", + "│ 44 ┆ 37.565 ┆ 37.71875 ┆ 40.133929 ┆ 41.024038 ┆ 32.760817 ┆ 45.981971 │\n", + "│ 76 ┆ 30.265 ┆ 30.916667 ┆ 35.414894 ┆ 37.073529 ┆ 21.681373 ┆ 46.308824 │\n", + "│ 67 ┆ 35.96 ┆ 34.723684 ┆ 36.972222 ┆ 39.81 ┆ 27.094211 ┆ 47.439474 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 15 ┆ 38.725 ┆ 38.22619 ┆ 40.352459 ┆ 41.168033 ┆ 33.813427 ┆ 45.580796 │\n", + "│ 26 ┆ 37.855 ┆ 38.028846 ┆ 40.078947 ┆ 40.951754 ┆ 33.644484 ┆ 45.336117 │\n", + "│ 39 ┆ 37.895 ┆ 38.027778 ┆ 40.125 ┆ 40.856618 ┆ 33.784518 ┆ 45.099877 │\n", + "│ 97 ┆ 30.67 ┆ 31.575 ┆ 34.890625 ┆ 35.557229 ┆ 25.601657 ┆ 41.530572 │\n", + "│ 51 ┆ 37.53 ┆ 36.71875 ┆ 39.447368 ┆ 41.024038 ┆ 30.260817 ┆ 47.481971 │\n", + "└─────┴────────┴───────────┴───────────┴───────────┴───────────┴───────────┘" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a_lazyframe = pb.read_fastq(\"./example.fastq\")\n", + "a_dataframe = a_lazyframe.collect()\n", + "print(type(a_dataframe))\n", + "pb.base_sequence_quality(a_dataframe)" + ] + }, + { + "cell_type": "markdown", + "id": "ddf5da9d", + "metadata": {}, + "source": [ + "#### Usage example - `pandas.core.frame.DataFrame` object" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "56817174", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:polars_bio:Table: example registered for path: ./example.fastq\n", + "200rows [00:00, 91799.17rows/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "shape: (101, 7)
posavgq1medianq3lowerupper
u64f64f64f64f64f64f64
7133.00533.79166735.85135138.58333326.60416745.770833
7430.8332.7535.53260937.48529425.64705944.588235
4537.4536.5937540.07142941.07727329.86846647.802557
4937.2136.67857139.64583340.76388930.55059546.891865
7531.0630.95833335.41836737.28333321.47083346.770833
5037.42535.97539.77083341.06018528.34722248.687963
335.6935.48333337.20866137.60039432.30774340.775984
030.13531.21363633.72222234.48214326.31087739.384903
3738.038.21527840.08035740.9687534.08506945.098958
6535.99535.1937.43333339.8128.2646.74
" + ], + "text/plain": [ + "shape: (101, 7)\n", + "┌─────┬────────┬───────────┬───────────┬───────────┬───────────┬───────────┐\n", + "│ pos ┆ avg ┆ q1 ┆ median ┆ q3 ┆ lower ┆ upper │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ u64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", + "╞═════╪════════╪═══════════╪═══════════╪═══════════╪═══════════╪═══════════╡\n", + "│ 71 ┆ 33.005 ┆ 33.791667 ┆ 35.851351 ┆ 38.583333 ┆ 26.604167 ┆ 45.770833 │\n", + "│ 74 ┆ 30.83 ┆ 32.75 ┆ 35.532609 ┆ 37.485294 ┆ 25.647059 ┆ 44.588235 │\n", + "│ 45 ┆ 37.45 ┆ 36.59375 ┆ 40.071429 ┆ 41.077273 ┆ 29.868466 ┆ 47.802557 │\n", + "│ 49 ┆ 37.21 ┆ 36.678571 ┆ 39.645833 ┆ 40.763889 ┆ 30.550595 ┆ 46.891865 │\n", + "│ 75 ┆ 31.06 ┆ 30.958333 ┆ 35.418367 ┆ 37.283333 ┆ 21.470833 ┆ 46.770833 │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 50 ┆ 37.425 ┆ 35.975 ┆ 39.770833 ┆ 41.060185 ┆ 28.347222 ┆ 48.687963 │\n", + "│ 3 ┆ 35.69 ┆ 35.483333 ┆ 37.208661 ┆ 37.600394 ┆ 32.307743 ┆ 40.775984 │\n", + "│ 0 ┆ 30.135 ┆ 31.213636 ┆ 33.722222 ┆ 34.482143 ┆ 26.310877 ┆ 39.384903 │\n", + "│ 37 ┆ 38.0 ┆ 38.215278 ┆ 40.080357 ┆ 40.96875 ┆ 34.085069 ┆ 45.098958 │\n", + "│ 65 ┆ 35.995 ┆ 35.19 ┆ 37.433333 ┆ 39.81 ┆ 28.26 ┆ 46.74 │\n", + "└─────┴────────┴───────────┴───────────┴───────────┴───────────┴───────────┘" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a_lazyframe = pb.read_fastq(\"./example.fastq\")\n", + "a_pandas_dataframe = a_lazyframe.collect().to_pandas()\n", + "print(type(a_pandas_dataframe))\n", + "pb.base_sequence_quality(a_pandas_dataframe)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "3.12.9", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/notebooks/example.fastq b/docs/notebooks/example.fastq new file mode 100644 index 00000000..4b357e52 --- /dev/null +++ b/docs/notebooks/example.fastq @@ -0,0 +1,800 @@ +@SRR9130495.1 D00236:723:HG32CBCX2:1:1108:1330:1935/1 +NCAATACAAAAGCAATATGGGAGAAGCTACCTACCATGCTTAAAAACGCCAATGAGCAGNGATTTGTCANCNNNNNNNNCNNNNNNNNTNNTANNANNCTC ++ +#4BDFDFFHGHGGJJJHIIIIGGIIJGJJGIIIIBHIJJJIIJIJJIJDHIGGGIJJJI#-@AEHGEFF#,########,########+##++##+##+2< +@SRR9130495.2 D00236:723:HG32CBCX2:1:1108:1472:1938/1 +NGTCAAAGATAAGATCAAAAGGCACTGGCTTACCTGATTAAGAAATTGTGTAGTCCAACATCAAAATACNTNTNNNNNAGAGNCANGNCAAGCNNANNAAT ++ +#1=DDDDD>DHFH@EFHHGHGGFGIIIGIGGGGIIGIIDDCHIIIIIID@FEGGGIIIIICHIIIIIIG#-#-#####,,;;#,5#,#,,85@A:AB@8>@:@A@9(:((+(834 +@SRR9130495.6 D00236:723:HG32CBCX2:1:1108:2392:1965/1 +CGATAAAGGACTTTCAGTCAACCAACTAGATAATGACCACTGGGCACCCATTCATTATGCATGCTGGTAAATAAATTATTCTGTTCAGGAACATTGAACTC ++ +CC@DDDBDFFHHHJJIJJIIJIJJJIIIHGIGGHCGGIGHHAACC??A< +@SRR9130495.11 D00236:723:HG32CBCX2:1:1108:4089:1977/1 +CATTCCAACCAGCCGCTTAAAGTTTCTAAAAGAAGCTGGTCATGGAACCCAGAAGGAGGAGATACCTGAGGAGGAATTAGCAGAGGATGTTGAAGAGATTG ++ +CCCFFFDFHHHHHJIJJJJIIJJJJJJJJJJJIIGJJJIGHGIJJIJJIJJIIFHGHIJJGHEHHFCEFFDEDDBDDDDDDDDDDDBACDDDDDDDDDDDD +@SRR9130495.12 D00236:723:HG32CBCX2:1:1108:6197:1936/1 +NCTTAAAGGCAAGGTGCTCGGCTTCCGCTATCAAGACCTCCGACAGAAAATCCGGCCTGNGGCTAAAGANCNNNNNNNNANNNNNNNNCNNGGNNCNNGGC ++ +#1BDFDEFGHHHHIJJJJJJIJJJJJJJJJJIJJIGIJJIJJJIJJJJHIJJHHHFFDC#,;?BBDDDD#,########+########+##++##+##++8 +@SRR9130495.13 D00236:723:HG32CBCX2:1:1108:6415:1939/1 +NTGTGTATGGGGATGAGGAAGGATATTAATATGTTCTATTTGAGATTTAGGGATTACATTTGTTTTTGCNCNCNNNNNTTTTNTCNTCATTTGNNGTNAAT ++ +#1:ADDDFHGGHFGGBHIGGIIJJEIIIJJIJIJJJFGJIIHGHGDGHJGHIHIIIIJJIIHFHIIJJJ#-#-#####,,;?#,;#,8?DDEE##,+#+2< +@SRR9130495.14 D00236:723:HG32CBCX2:1:1108:6361:1952/1 +TCAGATCTTATTTTAATAGTTGACTTTACCTCTTCTTTGACTTCCTCTTCCTCGGTCTCAGTAGATATAGATGGTACCTTGGGCTTATGCCATGAGATCTG ++ +CCCFFFFFHHHHDHIIIIJJJJJJJJJJHIIIJJJJJJJIHJHJII>GHIEGHIIIIJJJIJJIHHIJJJIJIGHIJJGJJHFHHFFDCFFEDCEDCCDEC +@SRR9130495.15 D00236:723:HG32CBCX2:1:1108:6263:1960/1 +CAATATCTGACTGAATGGGCCCATTTTCATAATATTCTGAAACTGTTCATACATGTCTCGCAATGTAAACTGACCTGAAATGCAATACAAAAAAATTCAGA ++ +CCCFFFFFHHGHGGGIIIIJJIJIJJIIIIIIIIEIHGGIEGGHGIJJIJJJCECGHIIIJJJJJIFGHIIHGIIJJHHGEFHDFFFFFCCCDDBBCCDCA +@SRR9130495.16 D00236:723:HG32CBCX2:1:1108:6338:1988/1 +AGACACTAAAATGCCATGTATGAGACTACATAGACATACCAATTTACAACACAAACACATGAAATATACATGAGAAAACATTAACTTACTTCCAGTTGGGA ++ +C@CFFDDDHGHHHIIIIHJJIIHIHGIJJJJJIJJJIJIIHEHIIJJJJJJIJJIIIIIJJDHIJIJGIJIJJIJJHHGGHFFFFFFFEEEDCCC@ACCBA +@SRR9130495.17 D00236:723:HG32CBCX2:1:1108:6742:1944/1 +NGAAACACTCTTTTTCTGGAATTTGCAAGTGGAGATTTCAGCCGCTTTGTGGTCAATGGTAGAAAAGGAAATATNTNCATATAAAAACTAGACAGAATGAT ++ +#1BDFDFFHGHGHJJJJJJJJHIJHHIJIIJJIJJGIJJIJIIJJGIJIHIJJJIIJJJHIEIIJJJJIHDEFH#,#,5=ADEEEDCDDDDDDDCDD5@CD +@SRR9130495.18 D00236:723:HG32CBCX2:1:1108:7076:1942/1 +NGTCTAAGAATGAAGTGCTTATGGTCAACATAGGCTCCCTGTCGACAGGAGGAAGAGTTAGTGCAGTCAAGGCTNANTTGGGCAAAATTGTTTNNACCAAT ++ +#1:B:BDDHHFFHIEGGIGIDEHGEEGIGHIIIFACHIGIGCFGEHIGGHHGHGH@DCGGIDEIIIIFFHHAEE#,#,5=@BBC@BCCCCCCC##,+8?BC +@SRR9130495.19 D00236:723:HG32CBCX2:1:1108:7440:1957/1 +CACCTGATGTCCCACAGTCCTCATAGACACTAGCACTGACTGCTGGCCATCGTCTCAGCCAGATGATGTTGACCTGCTAGCTTTTCAATTAAATTATTAAA ++ ++=144=DD>D4CCD?E@AECFFIIIIEI?E+??;3EBDECEIBEECDIEIIADDDDEIDCDCA;=A@CECE7ACD=(;;A@DA@A@A:ADAAAD>AADBB> +@SRR9130495.20 D00236:723:HG32CBCX2:1:1108:7363:1977/1 +ACTCATAGAGTTGAAGATTCCCTTTCATAGAGCAGGTTTGAAACACTCTTTCTGGAGTATCTGGATGTGGACATTTGGAGCGCTTTGATGCCTACGGTGGA ++ +CCCFFDDDFHHHHJJJIJJJJHHIIHGIJIJJJIGGHGIJIJJIIIJJJJJIJIGBFDHIIJIJGIJIGGCHGGIJHIIHHHFECDECEEDCDDCBD9?B? +@SRR9130495.21 D00236:723:HG32CBCX2:1:1108:7298:1979/1 +AACCGTCGCCAGGTACCATCCCAGAGAACTCTGTCTTCCTTACTTATAGCCAAGTTGCCGGCAGATCACAGCTGCATGCTTATCGGTCCATCCGTCATCGC ++ +BCCDDDFFGGFGFJIBBHHIJJIIHHIIIGJIIHIIJJJIIEIGHJIIJJJJIIHHGIJFGHGFDDCEECDDDDDDDDDDEDDDDDDDBDEDDDCCC +@SRR9130495.23 D00236:723:HG32CBCX2:1:1108:7307:1995/1 +TAATTTGGTATATGTCTTTTTAAAGGCATTTTTATTAGATATTTCCTTAATTTACATTTCAAATGTTATCCCCAAAGCCCCCTATAATCTACCCCTGCCTT ++ +;?7DD?;DBFHHFIA;A@AABBB@B?2<58+:?<<9<@: +@SRR9130495.25 D00236:723:HG32CBCX2:1:1108:7870:1955/1 +CTATCCCGTCGGGTGACTGTTTCCTGCTTTGCAGTTATTCAGTGGCAGAGCGTGGCGCTCTAATTTCTGCTTTCCTCTTTCCTGCAGATTGTGTGCTACAT ++ +CCCFFFDFFHHFHBHGJIIIHGIIIJJIJIIIIJHIIIIEIJJIIJJJJIJIJIJDFEDDDCEEEEEEDDDDDDDDCDDDCDDDDDDDCDDDCDDDDDDDD +@SRR9130495.26 D00236:723:HG32CBCX2:1:1108:8157:1994/1 +GAAGTGCTCTTCGTTACTACTTAAATCCCCCTGGGCATGTTTCATTATTTTACAATTTGTGCAGAACCCTATCCAAACACACATGGAGTACAAATGACTTC ++ +CCCFFDDDDHHDHHIGIIBIHHHJJJ@FGGA?@CCCCCC@CCCDD +@SRR9130495.27 D00236:723:HG32CBCX2:1:1108:8703:1937/1 +NATAAAAAAATAACATCCTTTCCTCCTAATAGCTTAATTATTTGAAAAAAAATATTTTCNAATCACATGNANNNNNNNNTNNNNNNNNTCTTTNNTNNCCT ++ +#1=DFDDDHHHHHIIJJJJJFIJJJIGIEGIIJJJHHIJJJJJIIJIIIIIJEHEHHHA#,;??AEDDC#,########,########++8??##+##(+2 +@SRR9130495.28 D00236:723:HG32CBCX2:1:1108:8702:1991/1 +CTCAGAGATTAAAAATGAATAACGCCTGCCGGCCAATGAGCGGACTCACAGTCCCTGTTTGTTTGTAAGCTAGGTGATTTTCAATCCACAGGGCAGGCTGA ++ +@@@DFDFFGGDHFGIIBGEHJIIIBGIIIJJIIJIJIIFGGEGIJJJHGGHHHDCFFDDDEDEDDDDDCDDDCBDDACDEDEDDDDCCCCDGII@GIFGFE???BFBBEHIGGIEEHIHIGGHHGIHA3?CH;?@CB@DCCEDEDD3 +@SRR9130495.34 D00236:723:HG32CBCX2:1:1108:10535:1962/1 +AATACAGAAAAGTTAAGAGCCAGCCCCAGGCGGATTGGATGAATAGGTTGCATCTCTTTCTTGCTTATATCAAATGCCTCTTGGCAGGCTCCTTGGGAATT ++ +???BD:A:3=?B:;?;;CA;;;A3-5>5-5:(::@8/2?8?9>>3(83(+ +@SRR9130495.35 D00236:723:HG32CBCX2:1:1108:11147:1968/1 +GCTGCCTTCTCCCCTCAAGGATGCAGTGGAAGTGTCAACCTGGAGAAGATGCTACACGATGCAGGAGGTGAACTCGGCCCTCAGTAAAATCCAGCTGGTGG ++ +CCCFFFDFHHHGHJFEGIHEGHIGIIGCCGGGIIJJIJJGGGGHCGIF>DGHIIIEHHGIIJJDBGHFHFCCFFFDDCDDDDBDDDCCDEDA:CACD@CDD +@SRR9130495.36 D00236:723:HG32CBCX2:1:1108:11124:1986/1 +GACAGGGTTTCACCATGTTAGCCAGGACGGTCTTGATCTCCTGACCTCGTGATCCGCCTGTCTCGGCCTCCCAAAGTCCTGGGATTACAGGCGTGAGCCAC ++ +CCCFFFFBFFFHGGHGHIHIIGIJFJJJIJHIIIFGHGGIIGEGHIH@@FGGIJIIGGIHIEEHEEFFDDCCCCBDDACCDCDBAACCDDDBDDDDBDDCC +@SRR9130495.37 D00236:723:HG32CBCX2:1:1108:11773:1947/1 +TACCTGCCTCTGCCTCTCGAGTGCTGGGATGAAAGATGTGCACACCCCCACCACCACCACCACCACTGCCTGGCNCNGTTTTTGATTTCTTATTCTCCAGA ++ +CCCFFFFFHGHHHJIJIJIIEHIJJIJJIJJIHIHJJIGFIIIIJBHHGFF>HGIJIEHHHFGDEEEEEECC?B#,#,,5?@HF1CGBGHCDDFF7=DEG.?AAC?3@EED;6@;>(-;@AC?31(,5?(4::A>AC +@SRR9130495.46 D00236:723:HG32CBCX2:1:1108:14226:1948/1 +AACCTGCACCCAGAATGGCAGGAGGTCCTGGTGGCCCAGGGGGTCCTGGTGGTCCAGGAACACCAGGTCTCCCANANCCAGGTGGCCCAGGCAGGCCTGGA ++ +??1D1BDDHFHHBBHGGGAFG)AF1:?CEGD@BGAFDGIGGB'5;CE77?=???>>CC3#+#++228>>28((2(>ACC?CCDDCCD@ +@SRR9130495.48 D00236:723:HG32CBCX2:1:1108:15049:1993/1 +TGACCAAGAACTCACAGAGATCCCCCCCCCCAGGGCTAAGATTAAAGGCATGTGCCACTGCCACTGGATAGATATTATCTTTTATTTTACCTGACTGGTTG ++ +@CCFFDDDHDFHGJJJJIJGIHHHIIJJIJFE=(5;?@>@A@;@C>CAB=@<:@C@CA:>CC3:>CB<@@?344>>@@A>CDC>ACDD:>4>>>ACD?CC( +@SRR9130495.49 D00236:723:HG32CBCX2:1:1108:15487:1951/1 +ACGGAGGGTGGGGCTGGGTGATTGTGGTTGTCTCCTTCTTCACCCAGTTCCTCTCTTACGGATCCCCGTTAGCTNTNGGGGTCTTGTATGTAGAATGGCTG ++ +BCCFD@DFDFHHGDGIIICGHIIJJIJGHIIJIIJJJIJIJIJEHIGHHHHHHHFFFFFFDDCDDCD@DDDDD:#+#+2<@8GHIIIEHGACFHIIGGIBHIIHHC??BDC@BBCECDCE63>@@CACCDD@ +@SRR9130495.51 D00236:723:HG32CBCX2:1:1108:15923:1957/1 +CCGCAACTGCCATGGAGCCACAGCCTGGTCCGTAATAGATGCAAAGCTTCTCAATAGTCAGGGGCGTGGTTTCGCGCAGCTTGGAGGCCAGCAACAGGCAG ++ +CCCFFFFFHFHHHIIFIIIGIIFIIGJIIJJIFHGHIIIIIJIIFIJIIIJJJJJJJGGIGJFFH@?BBBDBCDDDDDDDDDDDDDDDDDDDDDDDDDDD? +@SRR9130495.52 D00236:723:HG32CBCX2:1:1108:15793:1968/1 +GTTTTTCCACAGACCTCTGATCTCTTACATTCGAAAGTTCTACTACTATGATCCTCAGGAAGAGGTGTACCTGTCCCTAAAGGAAGCGCAGCTCATTTCCA ++ +BB@FFFFFFFHGDGFHJJJJIFIIGGG@FIIIGIBGGDHIIIJCHIIJJDHIIJJJIIIGIIJJFHHIJCHGDHIIHHHHHFFFFFDDDDDDCDDDDEDDE +@SRR9130495.53 D00236:723:HG32CBCX2:1:1108:16082:1968/1 +GAAACTTGTTTGTGACGTGTGTATTCAACTAACAGAGTTGAACCTTTCTTTTTACAGAGCAGCTTTGAAACACGCTTTTTGTAGAATCAGATCGGAAGAGC ++ +@<@ADDDECBFFHIFEGDG;EEHHB@FHGHIIGGEHGGHGGEGEGICGCEFFDB@D>AE>ADC@CBBFGIIJJJJIHHIJJGH?DGGHCFHHIJJEEIIJAHIJJFG=FGGGIFHHIIDHGEHIHHHHGHGEB;?CDCECEEDD +@SRR9130495.55 D00236:723:HG32CBCX2:1:1108:16048:2000/1 +AGGACAGGAAGGACGCTTTGAGATATGATTTCACAGGCGACAGTGAGAGAAAACCAATGTCTTTAATGCATTTCTCTGCAGCATGTGACAAACTTTCAACA ++ +CCCFFFFDFHGHHIIJJJJIFGGIJJIIIIJIJIHJIIJIHGIJJIJGGIIJIJJHHHHEDCDFFFEEEDDEDEEDDDDDDDDDDCDEDDCDDDDDDCCD@ +@SRR9130495.56 D00236:723:HG32CBCX2:1:1108:16580:1970/1 +GCCTGTACTCCCAGCTACTTGGGAGGCTGAGACAGGAGAATCACTTGAACCCAGGAGGTGGAGGTTGCAGTGAGCCAAGACCGGGCTATTGCACTAGATCG ++ +@@@FFD?DBFFHHIIEHCIJIFIJHEDCH@GGEBFHHIJJIIIIIJJJJJIFHFHF@G-6@AAEDFFF>AEC@@A?;=?BCD6/<@>BAAC>:@C:CDDA? +@SRR9130495.57 D00236:723:HG32CBCX2:1:1108:16594:1971/1 +TCCTCTGACTTTGACACTAGTGTTGACCTTGCATGAGGAGATGTTCTCCATTTGGACTAACCTGATGTACACAGACGTTACACTTATCACAGAATACCATA ++ +CCCFFDDFFHFHFHIJIJJIHIIIJJJJJIJJJJIIIIHIGIIJJJIGIIJJIIJEHIIJIJJJIGIJJJEIHJJIIHHEFFFFFFCCEEEEDCDDDDDDC +@SRR9130495.58 D00236:723:HG32CBCX2:1:1108:16808:1998/1 +ACCAATTTTCCCCTCCCCTTCCTCCCTCCCTCCCAGCCCCCTTCCTCTCTCTACCTCCTGTTATTGTTTTGTTCCTTGTTCTATGTAGGATTGAAGCATCT ++ +@@@FFDDFHDFHGIJJJJIIGGDFHI>DGC;DHI9??FHIJIIHG>>3>;ACCCC9:@>@:>AA +@SRR9130495.59 D00236:723:HG32CBCX2:1:1108:17428:1967/1 +TGCATGGTGCTGAAAGCTTTGTTGCAGCTTTTCTTGGGATTGCTTAGCTGCTCCGGGTCGATCCACTTGCAGATGAGCTCTTGCTTGATGCACTGCTGCCG ++ +?<A3>A>>AAAA?3 +@SRR9130495.60 D00236:723:HG32CBCX2:1:1108:17508:1988/1 +TTGTTCAGAAAAAAGTATCTTGAAACCAAAAGAACTGGGATCTTGTTAAATGCAGATTCTGTTCATTAGGTATAGGTATGCAGTCTTACAAAATGAGGTAG ++ +CCCFFEFFHHHHHJJJJJJJJJJJJJJJJJGIJJJJJJJIIJJJJJJJJIJJIJJJJJJJJJJJIJJIIIEHHBHGHHEFFFFFEEEEDEEDDCDCDDCDC +@SRR9130495.61 D00236:723:HG32CBCX2:1:1108:18425:1951/1 +CCACTTAATAAATCACCTATCAAGTTGAATTATTTGTGCAAAGGCACTAGGCTGAATAGAGACCACTCAGTAGCNTNTTTTTAATCTTGCTAAGAAAGAAT ++ +CCCFFDDFHFHHGJIIJJJJJJHGHEHHHIIJIJJIJGJIHIJHEFDHGHIFIJJGHIJJJFIGGIHIIIJIII#-#-5@DFFEEEDEEEDEDDACDCCBC +@SRR9130495.62 D00236:723:HG32CBCX2:1:1108:18468:1964/1 +CTATTGACTTTTATTAGAAAGGGTCTTGTTGCATAGGTAGGTCTTTAACAACCATCTCTTAAAGGGCTGGGATTGCCAGAGTAGGCCAACACGCCCAGCTA ++ +CCCFFFFFGGHHGIGIJIJJIIIJJJJIJJIJJJJIIJDGEHGIIJJJIJJJHIJJIGFHGIJGEGIJJJIIHHHHHHFFFFFEDEEDDDDDDDBDDBBD@ +@SRR9130495.63 D00236:723:HG32CBCX2:1:1108:18615:1941/1 +NAGCCGAGAGGCGCCGGCTCACCTGCCTGGGTCCCGGCCTTTCTCCTGCAGTGCCAGGGATTCACCTGANGNCNNNNNNTCTNCTAGGCAAGCNNATNCTT ++ +#1:DDFDDHHHHHIIJIIFGJJIEFHHIHHIIIJJIEHFFEEEEEEE?DFF;5:AAB9>29(#+#++8++4>>:@>AA:@1<@@A(:4? +@SRR9130495.72 D00236:723:HG32CBCX2:1:1108:1440:2047/1 +GCTGGTGCAGGACACCAGAATCCGCTCGATCATGCTCCCTAGAGAGGAGGGGCACAGTGAGTACACATAAGCACATGTACACACACACCCAGGACCCAAAG ++ +CCCFFFDEHHGHHHIIIIIIIIJJIJJJGEGHIJIJJJJIIJGHIIJJJIDFEDFFFFEEEEDEDDDDDDDDDDDDDEEEEDDDDDDDDDDDDDDDDDDDA +@SRR9130495.73 D00236:723:HG32CBCX2:1:1108:1468:2080/1 +ACTGTCTTTTTTTTAAAACAGGTGATTGCCCGTTGATTGTTCAGTTTGCTGCTAATGATGCAAGACTTTTATCTGATGCTGCCCTGCTAGTCTGTCCCTAT ++ +CCCFFDDEHFFDHIEIIJHGCGFHCIIHIIIHH@FGGIHIGHIJJIJGIIIJBHIGEIHGHHGGHFFFFFFEEEEDEDDDDDDDDDDDCDDDEDDDDDDDD +@SRR9130495.74 D00236:723:HG32CBCX2:1:1108:1333:2084/1 +ATGAGCACACAAGGGATGATCAGATTGATGGTGTAGAAGAGTGGCTTGCGCTTGATGATGAAGTCATAGGTCACGTCCACATAGCTGGGGTCCTGTGGGTT ++ +CCCFFFFFHHHHHJIJHDIIHHHIIJIIJJJJJIJJIIJIJJJIBDGIJJJJIJJJJGIIIJIAHEEHEFFFFF>EDCDDDDDCDDCDDDDDDDDDCDDDD +@SRR9130495.75 D00236:723:HG32CBCX2:1:1108:1447:2137/1 +TCCACTTGTACAAAAAATTACAAAAATTAGCTGGGCATGGTGGCACACACCTGTAGTCCCAGCTACTCGGGAGGCTGAAGTGGCAGGATCACTTGAGGCAG ++ +CCCFFEFFHFHGHJJJJIJJJJIIDIEHJIIJJJIGJJJHGJIIDHIDGIJIJJIGHIJJJJJHHHFEEBDCDBBDDDDDDDDDDDDDDDDDDDDDCDDD@ +@SRR9130495.76 D00236:723:HG32CBCX2:1:1108:1499:2151/1 +GAGAAAAAGCATCCCTTTAATAAGGCCGCCCCGGTTCCAAATCAATCCTGGCATTGCAGGAGGCAAGGGGGAAACACAGCCACGAAATTGGATTAGCTCTT ++ +CCCFFFFFGGHHHJIIJHIEIJJIIIIJIIGJJIHJIJJIGIJJJIIJJHHHHHFFFFFFDCDBDDDDDDDDDDDCDDDBDBDDBBDDDDDCDDDDCDDCD +@SRR9130495.77 D00236:723:HG32CBCX2:1:1108:1280:2166/1 +GCCTTCTTCCCAGCAGCAATATGGCTCTTTCTTCAGCTCTTATCAGTCACATCCATCAACGAGTGGCTTTTAAAAGGGTATGTTTAAACCTTTTGACGGGA ++ +CCCFFDEFHHHGGJIJJIJIJEIJJJJJIFGIIIIIIIJIIGIJJGIIBHIJJJJIJIEH>CG;CHCHGHICHFFFFFFDDC;@CCEEDDCDDCCDDDDDB +@SRR9130495.78 D00236:723:HG32CBCX2:1:1108:1458:2216/1 +TTTCTTTCCACACATCCCACCTAACACCCAAACTAAGCACTCAGTGCTTGGAATCTCCCCACCCATTCCCTCACCCCTGCTCTTCCATCATTTCCTCCAGC ++ +CCCFFFFFHHHHHIDHIIJJJJHIIJFHIJIJIGIJHGIIIIGJEHIGIIJCGEEHJJJJJJJDHEHHGFFFFFDCDDDDDDDDDDDECCDDEEEDDDDDD +@SRR9130495.79 D00236:723:HG32CBCX2:1:1108:1634:2001/1 +TGTGCATTTCTCATTTTTCACGATTTTCAGTGATTTCGTCATTTTTCAAGTCGTCAAGTGGATGTTTATGATTTTCCATGATTTTCAGTTTTCTTGCCATA ++ +CCCFFFFFHHHHGJJJJJJJJIJHGIJJJJIIIIJJIGIIJJJIJJJIIFIIJJJJJIJJJIJIIIIIIJIJGJJJJFHHGGHGFFFFCEFFDEEDEEDDD +@SRR9130495.80 D00236:723:HG32CBCX2:1:1108:1566:2120/1 +GGACGAAGTAAGGGAGGAGCAACTGACAACATTCATCTTGTCTGTCTCCTCCACGTCCCGAGGTACAAGGCGGATGTCATTCTTACTAATTTTTTTCTTCT ++ +CCCFFFFFHHHHHIIJJJJIJJJIJJJJJJJIFIJJJIJIJIJIIJJIJIJJJJIJJIIJHHFFEEEEEDDDDDDDCDEDEDEEDDDEDDEECDDDDDDDD +@SRR9130495.81 D00236:723:HG32CBCX2:1:1108:1863:2047/1 +AAATTCGGACCCCTTGGGTGGAATATTCCTTACGAATTCAATGAGACAGATCTAAGAATCAGTGTGCAGCAACTCCACATGTTCCTGGACCAGTATGAGGT ++ +@BCFFFFFHHFHHHIJIJGIIIJIJJJJIIJJIIIIJJJJGIJJJIJJIEGIIGIJIJJJEGHHHGEEHFFFFDEEEDDDDEEEDDDDDDDBBCDDCCCCC +@SRR9130495.82 D00236:723:HG32CBCX2:1:1108:1844:2145/1 +TAACTCTCTGCCTGCGATGTCCCTACCTTCCAGAATGGTGCCATGACAACGGTGTCAACTACAAGATCGGAGAGAAGTGGGATCGGCAGGGAGAAAATGGC ++ +@CCFFFFFHHGHHJJJJJJJIJJJJJIJIIIHIIIJIIJJJJIJIIJIJJJJJJJJJIJJIIJEHHGHHFDFDDDDDCDCDDDDDDDDDDD?>BDDDDDDD +@SRR9130495.83 D00236:723:HG32CBCX2:1:1108:1772:2188/1 +GAGGTAGGGGTGTGTGTGAATGGGTGAGTGTGTGCCTATGCTTGTATGCCATATGAGAGAAAATGCAGCATTTAAAATCAGTGGTTAACGGCCAGCACAGT ++ +B@BFDFDDHHDDHHGIGIJIJGIJ:CFHHIGGIJJIEHGIIIJIIIFHGIJBHIJJJJJJCHHIHHHHHGFDFFFFEEEECEEDDDDDDDDDDBDDDDDCA +@SRR9130495.84 D00236:723:HG32CBCX2:1:1108:2103:2085/1 +TACAAATGTGCCAGGCACTCTTCTAAGTCCTCACATGCATGAAGTTATACAACTCTACAACAAACCTAGGAATATAAACTGAGGGCAGGGACCCCCAGCAA ++ +CCCFFFFFHHHHHJIJIJJJJJJJJJJJJIJJJJJJIJJJIJJJFHIJJJJIIJJJJJIJIIHIICHHIJJJIIIJJHHHFHHFDDDDDBDDDDDBDDDDD +@SRR9130495.85 D00236:723:HG32CBCX2:1:1108:2067:2091/1 +ACCAGCCCTGCTGCCACCCAGCCCACGTCCCGCGCGCCACCCATGCTGCTGCCTCGGAGCTGCAGGGAGCCGGGGAGCCAGGGCCACACGCAGGTGCAGCT ++ +?@@D?A:BF8DDFFFFFFFFAECBF@GFECAEFIIIIIIFBE?DBBD;@CCCCBBBBBB@B::AABBBBBB7>BBB>@BB?B>B>BB?/?A?BCCCBDDDD>@ +@SRR9130495.87 D00236:723:HG32CBCX2:1:1108:2387:2038/1 +GGCTAACCACTGCCTTGTCAAGTTGTGTAGAGTGAGATTCAGGGGTGTTGAAGTAATGTCCTTGTTACTTGCTGTAGGGCATCTGTTTTCTGTGTATCCCA ++ +CCCFFDDDDHGHGJEIGHHIJIHGGIIHGIIIIIEGBEHGGHIGGAFHIJJJJJJJJIDGHIIGIJJJIIHGFHEHFDFEDCEECDCDDACCCAACDFCCC +@SRR9130495.88 D00236:723:HG32CBCX2:1:1108:2285:2075/1 +CTGAAAGCTGAGCGTGAGCGTGGTATCACTATTGACATCTCCCTGTGGAAATTCGAGACCAGCAAATACTATGTGACCATCATTGATGCCCCAGGACACAG ++ +?;@DDBDDDFFD>ACGED@D8@):E*::??FFC@;FEF>E;CC=CC=@DDD>?;>A>A>A;AB3;A(;@:??DFBCB4<CCD@=BB@-(4812>>> +@SRR9130495.93 D00236:723:HG32CBCX2:1:1108:2748:2098/1 +CATCATCTTTTTTTTTTTTTTCTCCTGAAAACTGTCTAGTAGTTTGATATATTTTGTCCGAGGTTATTTCAAGTGTTTTTTTTTTTTTTTTTAAAACGGTG ++ +@@@DDDDDHHHHHIIFEHIIH8))7)7CEF9).)7;;>B@>9BD;;(6(55>DDDCBCC@/8-084@CC>C(((+4>?CBBBBBBBBBBB>&23:A(5?(( +@SRR9130495.94 D00236:723:HG32CBCX2:1:1108:2733:2156/1 +GACTGAGAAGAACAGAAAGGGAGAGAGAGGCCAATGGAAATACATGAGAAGGGAGAGAGGGAGAGAGAGGGAGGGAGGGAGGGAAGGAGGGGGAGAGGGAG ++ +CCCFFFEFGHHHHJJGIIIJJHIIHHIIJJJJJIJIIHGJJJGIJJJJJJJJJHGGHHIHHHFDFCDCDDD>BDDBDDDDDDD>BDD?BDDDDBDBDBD@D?ABD@BD?BBDDDC +@SRR9130495.96 D00236:723:HG32CBCX2:1:1108:2818:2076/1 +ACACTTCATGGCAACCTGGCTTAGATTCTTCAAAATTTCTGATCCTATACCAAAGCCTCTGTAATCACTCATCACGAAGAAGTCTTCAAGATACAGTAACT ++ +CCCFFFDDHGHHHJJJJIIJIIHIJHGCHIFEGGJJJJIJIGIIJJJIIJCEHIIIIJIBGHGGIIJIIBFGGGGHHGHFFFFDEEDECEDDDDCD>CCDE +@SRR9130495.97 D00236:723:HG32CBCX2:1:1108:2848:2112/1 +AACCTCTTCTCTTTGTCTTTCTCTTTATCCTTCTCCCTCTTGCCAGGACTGGACTCGCTGGTGATGGTGACGACGCTGGTGGGTAAGGTCTGCGCCCGACT ++ +@@BFFFFEHGHHGIJDEHIJIIIIIIJIFHIIIJEIDHIIIIJJIIIGIEIIJJIIIJFEHFHIIIGIIJIFBEDDBD>=?BB@CACCCAACDDBDBB5;6@;ACBCCCC?@AA>C>?<<<9)?FHFCAG=GFFG>FGGEGHIEEBEDEFC>C@::=BB@BCACCC@CA3:(8@C<8?CC +@SRR9130495.100 D00236:723:HG32CBCX2:1:1108:3014:2117/1 +CCCTCCTGAAAAGGTCCAGCTCCAAAGCCTGACCCGTAGCTGCAGAGAAGAAAGCTTTTCCTCTAAAGGCTGAGGAAAAGATGAAAAATCACTGCTAGAAC ++ +CCCFFFFFHHHHGIJIJJJJJJJJJJJJJJJIJJJIJJIJJJJIJGIDHHIIIJJIJJJJJIJJJHHHHHFFFFDEEEEDDDDDDDDDDDDDDDDDDDDDD +@SRR9130495.101 D00236:723:HG32CBCX2:1:1108:3316:2011/1 +GCAGAGCTGAATGGGCAAGCCCAGGACCCTTTTCAGACATTCTGCTGGCCTTTGGAAAGTGTACTCCTGTTGTATTTGATTACTTTTAGAGGACAGTACAT ++ +CCCFFDDFHHHGHJJJJIJFGIGHJGIJIIIIIHIIGIIJIIIIJJJIIIIJIIIGIJGIJJJJIIJJJEEEE?ECFFFFFFCCEDEEEDDDDDDDDEDCD +@SRR9130495.102 D00236:723:HG32CBCX2:1:1108:3264:2036/1 +GGGTGCTGGAGATAGCCCACGTACACTCCTTCTTGCTGGGGTACTTGTCAGGCCAGTTGGGGCTGGTGATGGTGCCACTGGTGGATGTCACCTTGTGTTCA ++ +=@<=B+AD>BFDFIIDEDGEIGFIIIIIICDFGFFGIII;D?F>?*/9BF>DAF;CFFGI>/:=?>7@BAA:;@5=A5>@=,98?:>@(;:4>ABAB?ABD +@SRR9130495.103 D00236:723:HG32CBCX2:1:1108:3400:2065/1 +TCTGTCTGTCACCAGGTTGGAGTGCAGTGGTAGGATCATGGCTCACTGCAGCCTCGTCCTCTTGGGTTCAAGCAATCCTCCTGCCTCAGCCTCCCAGGTAG ++ +@@BDFFFFHHHGHIJJFHEG@GHHIIAFD@HGGGEHIJJJGIJJGGGIHFDAHHHHJFCGGGHGJI;CHFCEDDFFFCEDEEDDDDDDDD<C +@SRR9130495.104 D00236:723:HG32CBCX2:1:1108:3468:2219/1 +TGCACTTCGTTCTCTTAATGAAACCCTTTGACTTAACCATGACTCCGCTCTGCTCTTGAGTTTGCAAGTGTGTGCGAGTGCCCGAGAGACAGTTTTTTTTT ++ +CCCFFEFFHHHHHJIJIIJJIIJJJIIJJIHIHJJJJJFHFHIIJIJJJJIJIJJJJJGGIIJJJGDIJJJIHHHGFFDDEEEDDDDDDDCCDCEDDDDDD +@SRR9130495.105 D00236:723:HG32CBCX2:1:1108:3722:2006/1 +TCCATAGTTTCGCAGAAGACTTGGAAGGATGTTGATGTATATGCAGGTCCATTATCAGTTTTTAAATTAGATGGTTTTCCCCAAGCTGCCCATGCGTCTAA ++ +CCCFFFDDHHHHHJJIJJIIHGHIGIIDFHHIIIHHIJJIJJIJIJJJJJJIIJJIJG=DHHJJIJIIIJJJJJIGHGHHFFBFDDEEEDDDDDDBBDDDD +@SRR9130495.106 D00236:723:HG32CBCX2:1:1108:3517:2148/1 +CTCTGTTCTGTTCCATTGATCTATATCTCTGTTTTGGTACCAGTACCATGCTGTTTTGGTTACTGTAGCCTTGTAGTATAGTTTGAAGTCAGGTAACGTGA ++ +CCCFFEBFHHHHFGGGIEEHIIJIJJIJJIIJIJJHIIJIIJJIIFHGIIIIJJJJJJFIJIJJJJGIIJIJCHIJIJHGJIJHHHHHHFFFFFFEEDECA +@SRR9130495.107 D00236:723:HG32CBCX2:1:1108:3927:2234/1 +CTGTGCTCTATGTACACGCCCATCTGTTTGCCTGACTACCACAAGCCGCTACCACCGTGCCGTTCCGTGTGCGAGCGCGCCAAGGCCGGCTGCTCGCCGCT ++ +@C@FFFFFHHHHHJJJJGIIJIJJIJEHIIGHGJJIJJJJJJIIGHHIJIJIGHJIIJIHHGFFDEDE?BBDDBDCDDDBDDDDBDD>BDBDDDDBDDDB< +@SRR9130495.108 D00236:723:HG32CBCX2:1:1108:4124:2011/1 +GACTCAGAGCCAGGGCCCGGGAACAGAGATGACTCGAAGGCTAGGGCTCCAGCCAGACTTACCGGCACACGTACACCTCTAGGGGTGGCAGGGTGCTGGGT ++ +CCCFFDDEGGGHHIJIIIJIIIIJIHHJJIJIIJIJIGGIIGIIHIGGIGHBHGHFEFFEECEDDDDDDDDBDDDDDCBACCDDDDDDBDDDDDDDDCD?9 +@SRR9130495.109 D00236:723:HG32CBCX2:1:1108:4130:2090/1 +TTCTATTTCTATAAACTGGCCTATTTTGGGTATTTCATATATATGGAAATATATAATTTGATTTTTTTGTTCTCTTAGCTGTATGTTTTCAGGATTCTTTC ++ +@BBFDFFFHHGHHIJJJJJJJIIIJJJJIJJJIJJJJIJJIIIJJJIJJJJIJIIJJJJJJJJJIHJIJJHHHHHHFFFFFFEEEDECEEDDDDDCDDEDD +@SRR9130495.110 D00236:723:HG32CBCX2:1:1108:4176:2091/1 +AAATTGAAAGTAAATGTATACTGTAGTCCCACGCACGAGTGAATAAAGGGGTGTCTAAAAGGAGTGTGTTCTCTTCCAGGCTGCATCTCTCGGTACTCAGC ++ +;8;ABD?+AA=ADBHIGBHE?ACCCCC +@SRR9130495.111 D00236:723:HG32CBCX2:1:1108:4108:2121/1 +ATGCGGAAGTAGGCAAAAATGATGTGCTAGACTACAAGAATTCCTTTTACAGAAAGTAACAAATACAGAGCCAAGAAAGTTTTTGTTAATTATCACGGTGT ++ +@@@ADADA@AD>FIIBBBFGIBGHJDCIGEGGGHHHIJIIJJJJGHIIEHHEGHJIHGGGIFAGGGGIIG>=CHHFD?@;CCEDDDDCDD>>ACDCB@8<5 +@SRR9130495.112 D00236:723:HG32CBCX2:1:1108:4384:2110/1 +ACACAGGCAGCAATGATGTCTTTACTTCTTTATTTTTTTCGACTTCATCTACAGAGCTTAGCACAGCCATTGGAACAAAATTGGAGCTCAGTGCACAGTTA ++ +@@@FDEFDDFF3CCF?FHCH@DEEGEFHIIFGBGGGDGIHAFBGDDHHIBAGGDGHE@CHAHBFFFFBDD +@SRR9130495.115 D00236:723:HG32CBCX2:1:1108:4445:2247/1 +TCTGTATTCTGTGTCATCTGCCATTCCTTGACTCCCTGCGCCCTTCAGCCCACAGGAAACGTGTGGATGACACACGAGGAGATGGAGTCTCTGACGGCAGC ++ +CCCFFDDDHHHHGJJJJIJJJJBHHJJIIGIIJJJIIGIFIJJIIIIGIJJIIIJIJCHIIJJJIHHFHEFFFFDDDDDBDDCDDCDCDDDDCCCDBBDDB +@SRR9130495.116 D00236:723:HG32CBCX2:1:1108:4698:2005/1 +GAGGGAAGGAGGGAGGGAAAGAAGAAGGGAGAGAGGGAGGAAGGCAGGACTGTCGATGCAAGTACCTCGCTTCCTTGTTCTTAACTCATTTGATTCTTGCT ++ +C@BFFFFFGHHGHIIIJJIBGGDHC@FEGDHIIIHGEHHEGCGIHHHFFFFDEEDDDEDDDDCDCCCDDDDDDDDDDDDDEDCCCDDDCDDDED:CCDDDD +@SRR9130495.117 D00236:723:HG32CBCX2:1:1108:4588:2182/1 +CTGGGGTGCAGTGGTGCAATCATAGCTCACTGCAGCCTCAATTTCCTAGGCTTAAGCATTTCTTCCACCTCAACTTCCCAAGTAGCCAGGATTACAAGCAC ++ +CCCFFADDFFHHGGHHGIIJIJJJJJJHIIJIJJIFIJJEIIJIJIGIJIJJJJJJIJJJIIHIIJHIIFFHGHHFFFFFEDEECCCCBDBDDDDDDCDBC +@SRR9130495.118 D00236:723:HG32CBCX2:1:1108:4964:2029/1 +CCCCGTCTCTACTGAAACACACACACACACACACACACACACACACACACAATTAGCCAGGCGTGGCAGCGTCTGCCTGTAGTCCCAGCTACTCAGGAGGC ++ +;8=:DDDDFFFAFIIFFBEIIEFIFIFIEFFFIIBEGEF?BF<4;A@EE/?;AB>7;7;>@?>B?''5<@;@?;0((4:@>34@@>:4<@>BAB@(948&+ +@SRR9130495.119 D00236:723:HG32CBCX2:1:1108:4831:2078/1 +GCGAAGAAAACTGAAAAAGGTGGAAAATTTAGAAATGTCCACTGTAGGACGTGGAATATGGCAAGAAAACTGAAAATCATGGAAAATGAGAAACATCCACT ++ +CCCFFFFFGFHHFFHGIHDICFHIGGIDHIIJJJJIIIIIGIGHGIJJJJJJJJJJJIJJJIJJIHHHHHFFFFFEEEEEEEDDDDDDDDDDDDDDDDCDD +@SRR9130495.120 D00236:723:HG32CBCX2:1:1108:4877:2117/1 +GCATAATGTTGCCACTGCACTCCAGCTGGGACGACAAAGACTGTCTCTAAAAAAGTAATAAATAAATAAAAGTTTGAAATGCATTGTCCTAGGTTTTAGTC ++ +CCCFFDEFHHHHHIIJJJJJIJIIIGIJJJJJJIIJIJJJIJJJJJJJJJJJJJIIHHGHHHHHFDFFFFFEEEEEEDDDDDCEEDEEDDDDDCDDDDDDD +@SRR9130495.121 D00236:723:HG32CBCX2:1:1108:4918:2158/1 +AAACATGTCAATGGCCAAAAAAAACAGACAATCAAAAAATGGACAAATATATGAACAGACATTTCTCACAAGAGGACATACAAATGGCCAGCAAATATATA ++ +CCCFFFFDHHGHHIIJIJJJIIIJJJJJJJIJJIJJJIIJJJJIJJIJJIHGFHHHHFFFFFEFEEEEEDDDDDDDDCDDDDDDDCCBCDDDDDDDDEEEE +@SRR9130495.122 D00236:723:HG32CBCX2:1:1108:4939:2211/1 +CCTGGTCTCAGCATTCCTCACACGTCATAGCGAGGCCCATGGCTGTAGAAATCCCACCATTCTCTTCTCCCCAGGCCTGGCATCCGTAGAAGCCTACAGCT ++ +@CCFFFFFHHHHDHIDIHIJIJJJJJIGGEGIJIIGIIIJJJGGIJJJJIGGHGIIIJJGHHHHHFFFFEFDEDDDDDDBDDDDDDDDD?CDDDDDDDDAC +@SRR9130495.123 D00236:723:HG32CBCX2:1:1108:5169:2188/1 +TCTGACCCCATGTCCTCAGGCCAGAACCCGGGAGCCTGTCAGAAAAGGTCTCTCACCTAGAGTCCATGCTCTGGAAGCTCCAGGAGGACCTGCAGAGGGTG ++ +??@DD?DFHDFHFIIIIGFHEGGDFFHIBHGIAFIGGIIIFI@CHE@FGH@CDGGIFHEEFCCED@DEEEECCCCCCCCCCCCCCBB8ABC?ACAAABBBB +@SRR9130495.124 D00236:723:HG32CBCX2:1:1108:5192:2231/1 +ACTCTCCTGGCCCACGAGAGAGTCCACACAGGAGAGAAACCTTACCAGTGTCATGAGTGCGGCAAGAACTTTAGTCAGAAATCCTACCTTCAAAGCCATCA ++ +CCCFFFFFHFHHGJJGEFGHGIIEIIFJJJIHGIJIIJIIIJIIGIEHHIGIHIJIIJIIBDFDDDDDDDDDCDDDCCDDDDDDDCDDCDDDDDCDDDDDA +@SRR9130495.125 D00236:723:HG32CBCX2:1:1108:5408:2041/1 +TGTGTGCATCCTCATGTGTCCTTGATAAGTGGTGTGATAAATGAAGGCTTTGCCACATTCCTTACACATGTAGGGCTTCTCTCCAGTGTGAGTCCTCTCAT ++ +@@@DDDEBFHHHHIIIHIFHIDHIIIJHHJIHEGFIIHIJJIIJJEIJGHHIEIEGIIJIJJJGIGHGJJIIGIGIHJEHHHHHFFEFFFCC@CEEDDDDE +@SRR9130495.126 D00236:723:HG32CBCX2:1:1108:5351:2057/1 +CTCTATATATTTTAACAAATGCATAATGTCATGTGTTTACCATTACAGTAGGATAAAGAACAGTCTCATTGCCTTAAAAAGTTCCCTAACATTTTAATTGT ++ +CCCFFDEDFHHHHJIJIIJJJIJJJHJJJJIJIIIHHIJIJIJJJJIIHIJJIIJJJJJIJJJJJJIJIJIJIIJJJJJJJGGIGHHFHFFFFFFFEEEEE +@SRR9130495.127 D00236:723:HG32CBCX2:1:1108:5475:2108/1 +AGCCCAGAAGGCTGGACACACCTCCCCCTCACCCCATCCCGCTCCCCAATCAACCCAGTCCTCAAGAAGCACACTGTGGCTGCTTGCTCTCTTGCCCCCCT ++ +CCCFFDFFHGHGHJJIJJJIGIIIHIGIIJIIJJJIJJJHGIJIJIIIJHHGHHFFEDEEEECCDDDDDDDDDDDDDDDDDDDDDDDDDDCCDCDDDDDDB +@SRR9130495.128 D00236:723:HG32CBCX2:1:1108:5542:2138/1 +TGGCTAGCTACTGCTGCTGCTGCATCAAAGCCCAAATATTCACTGGCATCAGCTGTTTTGTTCTTTAGCATATTAGTAAAGTGCTCATTTAGAGACATCTT ++ +@CCFFFFFHHHHGJIJJIJJJIJJJJJJJJJJJJJJIGGIJJJJJHJJJJJJJJJHIEIIJJJIIJJJIJIIJJJJHHHHHHHF@DFFFFEDEEEEDDCDD +@SRR9130495.129 D00236:723:HG32CBCX2:1:1108:5707:2147/1 +CCAGCATCACTCATGGAACCGGAGGCACTAAGGCCCCTCGGGAGACGCTGAGCAGGTGGGTAGAGGCATACTTCTGGGAGATGGCATCAAGAGCCAGTCAA ++ +CC@FFDDFFDHG>FFFHGGIJIEIIIHIGIGHBHCEHGGGFH@FHGIHHFBFDEEDDEDDDDBDC?BBDCDDDDDC:?A?BDDDCCDC>ACDDDBDDACCC +@SRR9130495.130 D00236:723:HG32CBCX2:1:1108:5614:2168/1 +AAACCATGTCTCTACTAAAACTACAAAAATTAGCTGGGCAACATGGTGGGTGCCTGTATCCCAGCTACCTGGGAGGGTGAGGCACGAGAATCACTTGAACC ++ +CCCFFFFFHHHHHIJJJJIJGJIJJJIJJJJIJJJJJIJJJJIJJHIIHJIJJJJJJJJJJJJJJJJGHFHHF@DDDDDDDDDDDDDDDBDDDDDDDDDDD +@SRR9130495.131 D00236:723:HG32CBCX2:1:1108:5985:2027/1 +GCGGCAGCGGCCGCGATGGAAGAACTTACGGCGTTCGTCTCCAAGTCTTTTGACCAGAAAGTGAAGGAGAAGAAGGAGGCCATCACGTACCGGGAGGTGCT ++ +CCCFFFDDHHDGHFIJFHFFHHBFGHGEIJJJFFDDDDDDDDDDD@CCDCDDDDDAAACACDC@CCDBBBDBD@ +@SRR9130495.132 D00236:723:HG32CBCX2:1:1108:5816:2071/1 +TTTACATATAAGAACCTGATGACCTTTTGTTTTTGTCCAGGAGAGTCCTTCTTGTCTACGAAATGCAGCTATCACAGCAGCTGGACTTGTTTCCTGAATGC ++ +C@CFFDDAFFFHHHHHHGFIHJIJFHHHCAAEHIGHJGCCGHIHEHJJJEHGIIIFIGBBAEHGIJIGIIHHEHHHFFFFEECCABCDCCCCACDDACDA: +@SRR9130495.133 D00236:723:HG32CBCX2:1:1108:5835:2081/1 +CCAGGGCTCCAAGGGGCTGGTTACGAAGTGTCTCCTGCTGCATGAGGTCCCCACGGGAGAGATTGTGGTCCGCCTTGACCTGCAGTTGTTTGATGAGCCGT ++ +@CCDDDFFHHHGHGJGJFHIEHIGCGHIGJGIIIJJJJJJJJJIIIEIJFGIJJIJGCCDDBDDCDDDBDDDDCDDDDDDDD>BACDDDEDDCBD +@SRR9130495.134 D00236:723:HG32CBCX2:1:1108:5841:2101/1 +GGGCATGGTGGCATGCGCCTGTAGTCCCAGCTATTCGGGAAGCTGAGGCAGGAAAATCTCTTGAACCCAGGAGGCGGAGGTTGCAGTGAGCCAAGCTTGCA ++ +@CCF?BDDHFHGHIIIIIIIIIGHGHHIIIFIIIIIGGFGGIIIIEFGGEGHCD>CGGFHHHGGHFFFCDDADDDDDDDBDBCDCCCCA@CCCCBDDDDDD +@SRR9130495.135 D00236:723:HG32CBCX2:1:1108:6165:2044/1 +TTTATACCATTTTTTTTTTTAGCATATATCCTTGTACTTTATAGGAATTATTTGCTTTATTCTCTTGTGACTTGTAAATTGATGTACTTAATTAAATCTTT ++ +@@CBAB;DHFFHHIGIIGGHGFF@?BGB@8=FHEGHIGD=@CGHIA;@EHFHGHBFFFCBD;>(>@A@C>>;>CA;;35>@3;>5>,;3>:;(:@:>:@@C +@SRR9130495.136 D00236:723:HG32CBCX2:1:1108:6059:2069/1 +CTATGACCGCTATGTTGCCATCTGTAGCCCACTGCTTTATAACACTGTAATGTCCCACAAGGTCTGTTCCATAATGATGGCTGTGGTATACTCACTGGGCT ++ +CCBFFDBEHHHHFEGIIGHGIIIJIIHIJIFIJJJJJJJJJJIIJJIJJJJIIHHIJIIIFIHIJFIGIIIHGGHHHHFFFFDEEEEEDEFEDDDCDDDBC +@SRR9130495.137 D00236:723:HG32CBCX2:1:1108:6161:2181/1 +ATGAAGCAACAACCTTATAGGCATTTTAACTCATAGGTTTTAAAACTTAAGGTTATTTTCATAGGAGTCCCTTTTAGCAGAAATGCTCACCACAGGACCAG ++ +@@CGGDD@4???BDHCH@GH<=FHGEGGGIGCEG@E7ACH@:77?C@A@CBAA???2FFFIGIIGIIIIFDADBDDDDDDDBBBBBBCDD@CDDDD?BDDDDDDCDDBDDDBDDDCCCCCBBCCCDCCCCDDAC>ABDDDCDDDDA@C +@SRR9130495.144 D00236:723:HG32CBCX2:1:1108:6837:2146/1 +CCTGTTATTTTAGTTGTTAAAGGTGGCATTCTGTTCTTGTGGCTGTCTTCTTTTAGGTTTGTTGAGGGATTACCTTCTTGTTTTTTCTAGGGCATTGTTCC ++ +BCCFDDFDHFDHFGIIIIJIGIIJJIDGHIJIIJJJJJJGFIIIHFHHIJJJIGDGIGHGHIJICHGGEHGCHGFEHFHHFDDDDDDDDDDDC?CDDDDCC +@SRR9130495.145 D00236:723:HG32CBCX2:1:1108:6804:2189/1 +TGAATCTCTCTTGGCCTCCTCCCCTCTCATGTCCCCTCCTCCCTCCTCTCCACTTACTCCTCCTCCTCCCCTCCCTCCTCCCAGATGGTTCTGTGTCTTTT ++ +CCCFFEFFHHGHHJJJIJJIJJJJIJJJJJIJIFIJIGIIGGJIJJIIIIIGIIJIJIJGIIIJIJJHHHFDFFDCDCCDDDCDBCCDCDDD@ACCCDDDD +@SRR9130495.146 D00236:723:HG32CBCX2:1:1108:6940:2229/1 +CTTAATGCCACTATCACCACTTCCTTCAAGAGTGAGGGAGAGGAAGAGGAGGAAGAGGAGGAGGAAGAAGAGGAGGAGGAGGAAGAGGAGGGTGAAGGGGA ++ +@@@DBDDEFHHHHGIJIEGGIGECHIFIIJGFHHACCF?D:DFF;?FHH9DFCGGHFG@CA?AB?DD>?A@BDDBB=?B5@BBFCAA@A;;5?BB9;=>;BB?1>A?ABDCC +@SRR9130495.149 D00236:723:HG32CBCX2:1:1108:7167:2101/1 +TTTCATGTTTTAGGTCTTGTAAGCAAGATTTTTCCTGTTGAAAAACTGGTTGAAGAAGCCATCCAATGTGCAGAAAAAATTGCCAGCAATTCTAAAATCGT ++ +?@?DB?DDHHGDHIHHGBIECFDHHE>C7?BB7;@A(5==;88323>CDC9B +@SRR9130495.152 D00236:723:HG32CBCX2:1:1108:7449:2110/1 +GGCTTCAGGAGCTTCAGAAGTTAAGAGCTGCAAAAAAGAAGAAAAAGGATCGGCCAAGTAAAGACTGTTCCAAGTTGGACATGCTTGCTAGAAATTTCCAG ++ +CCCFFDFEHGHHGJIIBEEHHIIIHHIGGJIIJIIGIIGGHIIIJIGGGGGGGIIJJHHFHHGFFFFFEEEEEEEDDDDDDDCDDEDDDDDDDDDDDDEDC +@SRR9130495.153 D00236:723:HG32CBCX2:1:1108:7499:2197/1 +TTCTCATAGTTCAGCTTCCACTTGCGGTAGCTTGTTCCACTTGCGGAACATGTGGTGTTTGGTTTTTTGTACCTGCACTAGTTTGCTGAGAAAGATCGGAA ++ +@@@DDDDEHHFFHABECCAFHHHFDGIHJIIEFGHIIGIGEHEBGG0AEDHGFFFGIDG@EH=ADEBADDE@CCCBCACDDDEEDDCCCBCCC@CC>AB@@ +@SRR9130495.154 D00236:723:HG32CBCX2:1:1108:7309:2205/1 +GGAGGCTGAGGCAGGAGAATCGCTTGGACCCGGGAGGTGTAGGTTGCAGTGAGCCAAGATTGCGCCACTGCACTCCAGCCTGGGTGACAAAGTGAGATATT ++ +BC@FFFDFDHHGFIGIIIJIJJJJJIJJJIIJJJIIIHHIJIJJJIJHHHHGHFFFEBAEECEDDB:@CDDDDCDDDDDDDDDBCBDCDCDDDCDBDCC?ACDD34>C@BD<>>:CB<>@BA8 +@SRR9130495.156 D00236:723:HG32CBCX2:1:1108:7518:2119/1 +TTATCAAAGAGGCCCAAGAGAAACCACTTGTCTGACTTCTACCATATGAGTTTAGAATAAGATGATGGCTGCCTATGAGGAAGCAGGCCCTCAACAGATAC ++ +@@@DDBD4CFFAAHII=G9FDHG;?F@;EEBEFCF>BGBGDBECC@BBBBBBBCCABC@CC +@SRR9130495.157 D00236:723:HG32CBCX2:1:1108:7577:2169/1 +AGTTACTTAATATACCTTAGCCGAAACTTCTGCACTGATTTCCTCCTGTGTTTCAGCCAGCCGCTTTTTGGCAAGTTCGGTTCTCCGATCACACTCTGCAA ++ +@@@DDDFFFFHGHIICGHFH@FGGHGHHJGIHJIJIJGIIIBDHCBGGHEHIJJIGIIIBGIEIGGHHCEBDFEDECD?@DDDDDDDDDDBCCDDDDDDC> +@SRR9130495.158 D00236:723:HG32CBCX2:1:1108:7659:2196/1 +TTCTGATTTTTGCTGCAGCTTCTGCTTATAATCATATGGCCAGTTGTGCTTGTCAGAGTAATGGTGAAGTCCACAAAACAAATTTCCACATCGGCAGTCAA ++ +CCCFDDDDHDFHFGIFCHIII9FHHIGGHHIJJJJIJJEHGEGIIJJJJJJJJDHGI:DFGIIGHGJGCGHGIJJGIIHGHGFFECB@CEC@B@BDD?CCD +@SRR9130495.159 D00236:723:HG32CBCX2:1:1108:7733:2213/1 +GGCCAGATGTTTCTGTAAAGATTGAATTAGATCCCCAGGGAGAGGCAGCACAAAGTGCAAATGAATCAAAAACTGAGTAGAATATTGTAGAGTGCCAATTA ++ +@<@DDA+AFFHHHIIFBHC@@F>@>CC: +@SRR9130495.160 D00236:723:HG32CBCX2:1:1108:7590:2217/1 +GGGGCTGGGCCCACCTGGGACAGAGGGCCACATGTAGAGGCAGCGCTCCCCCGTCTTGAGCTGATCTTTGCAGTCGAATAGCATGAGGTTGGCCCAAGCGA ++ +CC@FFDDDGHHDHIJDGIIACBDDBBBDDD@ +@SRR9130495.161 D00236:723:HG32CBCX2:1:1108:7735:2228/1 +GCAGCACTGTCTGAGTATGGGAGCAAAGCCTAATCTGGCTTGCCCGGCCTCTCACCTCTGTGGCGCTCTGCATCATGGTGCTTCTTGTCATCTTTTATTGC ++ +?@;DDDDDFCFFHIHHGBHIBH?FHIBDEHB@GEIHIEHGHAFFHGEEH<@CC@C@CCA +@SRR9130495.162 D00236:723:HG32CBCX2:1:1108:7898:2065/1 +GTTGGCTTCCCCCTCCCCTCTCCCGTGAGCTGAAAAGCAACAAGGGCTCCACCAGCCTGCAAAATAAGACTTGGGGGGGGGGGGGCAGGGATTGCTTTTTT ++ +@@@FDDDDHHFHFIJIFHIIJJJDHFGIGIGJAFHIDG>@GGFHGGHJIGCHIIGEEHFGF@CFEECC>;>CCABBD<99B@BD99&)&&+9(3(4>(+:0 +@SRR9130495.163 D00236:723:HG32CBCX2:1:1108:7872:2066/1 +CACGCTGGATGAGTTCCTGTTCAGCGACCTGCAGGCGCTGGAAGTGCTGTTGCTCTACAATAACCACATTGTGGTGGTGGACCGGAATGCCTTTGAGGACA ++ +CCCFFFFFGHGGHJJJJJJJJJJIJJJJJJHJJJJJIJJIJJJIGIJJIJJJJIHHHHHHFFDFFEDDEEEEDDDDBDDDDDDDDDDDDDDDDDDDDDDDD +@SRR9130495.164 D00236:723:HG32CBCX2:1:1108:7826:2191/1 +ATCTCTGGACCCAAACTGGAGGGTGACATTAAAGTTCCCAGGGTGGATTTGAAGGGCCCAGAAGTGGACATTTCTGCTCCCAAGGTCAATATTGATGGGAA ++ +CCCFFEFFGHHGHIHHIIJIIIJJIJIGIIJIHIGIJJJJIJJBFGGIJJIJJJJJIJIHFHHFFF@EEDEEFEEDDDDDDDDDBACDDF@CDEDEDDBDD +@SRR9130495.165 D00236:723:HG32CBCX2:1:1108:7791:2195/1 +GGAGAACAGCGTGTAGAGCACTCACAGTCTGCTGCCTTCAGGTGTGGGAGGCACTGCTCACACTGATCTTCTTCCCGGTGTGTGTGGTGTTTGCCTGGATG ++ +CCCFFDEFFGHHHJDIIJJIFJJJJJIJJJGIJJIIJIIIJIJIIGEGHHJJJJJIIFIJJJHHHHFHFFDDFFEDC>9;?BBDDD?CDDDDDDDCC?BDC +@SRR9130495.166 D00236:723:HG32CBCX2:1:1108:7767:2199/1 +GACTAGCCTGGCCAACATGGCAAAACCCAGTCTCTATTAAAAATACAAAAATTAGCTGGGCATGGTGGTGCACGCCTGTAGTCCCAGCTATTCAAGAGGCT ++ +@@@DDDFFFHHFBHBHBDEHHGGGHIIJIIHIHJIEEEIGHEIHIIGHIIHHGJIEHGI?G@CDHI=CA?BDFFACDCCDFCC32??A +@SRR9130495.167 D00236:723:HG32CBCX2:1:1108:7824:2210/1 +GCACCACCGTGCCTGGCTAATTTTTATATATTTAGTAGAGATTGGGTTTCACTGTGTTGGCCAGGCTGATCTTGAACTCTGGACCTCAGGTGATCCTCCCG ++ +@@@FFDDFDFAHHJJIJIIGHHHHHIIJIIDGG>GHHIGGCGHGIEHGGH>FFHJJJHDGBGHJCGGGHFEHHHHFFFFFECDECEDDDCDDDDCCDDCDD +@SRR9130495.168 D00236:723:HG32CBCX2:1:1108:8205:2084/1 +CTTAGCCGCTGGTGATGCTAAGGGCATGGTCAAAGTGTGGCAGCTGAGCACAGCCTTCACAGAACAAGGGCCCCGGGAGGTGGAGGACTTGGATCAGCTAG ++ +CCCFFDEDFFHHHJEGIDHHIIIIJJJIJGIJJJGHJIJIJJJIIIJIGGIIGIJJIGHIIJGEFHGBEFDDDBDDD;>B2<@BBDBBCCCCDDDDDDDDD +@SRR9130495.169 D00236:723:HG32CBCX2:1:1108:8202:2124/1 +GAGACTCTTGCACACATACCGGGGAGCTGGCTCACCCTGGCCCCTCCATCCTGTCAGACTGAAGAGAACAAGTGTCTTAATTTGGGTTTTTCTTATTATTA ++ +CCCFFEFDHFHGHGJIIIJJIJJJIIIGIJJJJFIJJHGIJGHIIJJEEEHFGFFEFECCEEEEDDDDCCAD>CCDDDDCCD?@@A=?8=?=BA93>CA??B@A????8CD(8&8?()(+224@?@>35 +@SRR9130495.179 D00236:723:HG32CBCX2:1:1108:8868:2131/1 +GTACATTGTATCTTTGTTCTCATTAGTTTCAGAGAAATTATTGATTTCTGCCTTTATTTCATTATTTACCCAAGAGTGATTTGGAAGCAGGTTGTTCAGTT ++ +<;;B?D>DFC:DBF@AEDHHAHHGH:A:AC4?BFFEDA?ABD@ACCD:> +@SRR9130495.183 D00236:723:HG32CBCX2:1:1108:9106:2031/1 +CTAGAAATCCTGGATTTTCAGCACAATAACTTAGCCAGGCTCTGGAAACGCGCAAACCCCGGTGGTCCCGTTAATTTCCTGAAGGGGCTGTCTCACCTCCA ++ +C@CFFDFFGGHHFJHIIJGIHCCEHHIGIFIIHIJJFIGJIGIJJIIJHIFGIJIJFFHHFDAD@BCCBDDDDBCC@CDDCCDDAEFC'8;&+)+((&28&&&+((&)&&++((++8((2(((25(&&&(++(0&&&(+((+4(+ +@SRR9130495.186 D00236:723:HG32CBCX2:1:1108:9230:2213/1 +ACTTAGTGCAGTACCCACTATTCCCGCTCAGGCTCCGAATAGTAGATAGAGGGTTCCGATATCTTTGTGATTGGTTGAGAATAATCAACGATTAATGAACA ++ +CCCFFFFEHGFFHFIICIIFIJHHGBHHGIGJJIJGGHIGFHIGIIJJJIIGJCFIJI@EFDFEDFFCCEEEEDDDBDDDDDDDECCDD@BDDDCDDDCDC +@SRR9130495.187 D00236:723:HG32CBCX2:1:1108:9264:2024/1 +AACATAAGGTTTCTCATAAAACAAAGAAAAATGTCAATTCAGTTGTGAATTCATATTGATACCTGGAACTCTCCTGCTAGACCACCTCTAAAGGCCCAGGG ++ +CCCFFFFFHHHGHIJJIDJIJJJDHIIJJJJIJJJJJJJIJJGCHIIJJJJJJJJJJIHJJJJJJGIJJIJIJIIIIJJHHHHGFFFFFECEEDDDDBBBB +@SRR9130495.188 D00236:723:HG32CBCX2:1:1108:9293:2034/1 +GTGGGGAGGTTTGGGAGTGAGCAGCACACCCCAGTTAGACTCCTGTTGGGTTTCATAGGAGCTGGCTGCTGAATGTAAGAGTGCAGGCTACCCCGGGACTT ++ +@@@FDA;1DAFHFIIFBGIGHGADHIGIIIIIIIIHBFEHIIIIIIIIHIACEEEHHBD@CDECECCBBCCACCCCEECCCCCCCCBBBCCCB?9>>>>BC +@SRR9130495.189 D00236:723:HG32CBCX2:1:1108:9484:2048/1 +ATGTAGAGAGAGGGAAAAAAGGAGAGAGAGAAGGATAAAGAGAAGGATGCACAAGAAGACCAAAATACCTGATCATGTAGGGGAGAGCCTCTGGGAGAAGG ++ +@@@DFDDDCDDFHICFHHDGIIHHCHCFFDGHHDCGIJIH@FFHFHGIIGIJIGIGF@CEBDFF@EECDCCCCCCDDEEEDDDDDDDDDDCDDCD<@BDCB +@SRR9130495.190 D00236:723:HG32CBCX2:1:1108:9388:2219/1 +AGCTGCTGCGAGATGGTGGCTTGCATCTCCTTGGACGGCCGCTTGTTCTCCTTGAAGATGGCAATCAGCGTGCGGCGCTGCAGGTCTGTGAACACGAGGCG ++ +B?>3=?>;@DF@>CACC>;>@A>',,88>>-09599(+8>C95>>>00 +@SRR9130495.191 D00236:723:HG32CBCX2:1:1108:9404:2245/1 +TCCGGCTGGTACCTTCATAACTACAGTAATAGAAGACATTGAGTGCCTCCACCGCAGCTGGCCCTCTCTGTTTGTAGCCAAAGATCAGATCTATCCATTCA ++ +CCCFFFFFHHHHHJJIJIIJEIIJJJJJIJJJIJJJIJIIIIIJJJIJJIIJJJJEIJJIHIGHHHFFFFFFDEEEEEDDDDDDDDDDDDDEDCDDDCDCC +@SRR9130495.192 D00236:723:HG32CBCX2:1:1108:9632:2134/1 +ATAGTGGCTGCTGATGGATGTGCTCTATGCAAGGGAGGTGCTCACTATTTCTGTTCGTCAATTTGTAACCCACGGGAGGAAAGGGAACAAAGAGTGAACAA ++ +CCCFFFFFHHHGAEFFHIICHGIJHGFIIIHIJDGIJJIIIEGIJJIIFHEGHGIICHIIFIJJJJIIJJHHHFFDBDD?BDDDDBDBDDBCDD:@CCCCD +@SRR9130495.193 D00236:723:HG32CBCX2:1:1108:9647:2175/1 +AATACCAGCCCAAGACTTTGGGAGAAGGGAAGAAAACAAAGTAAAATAACTTACCACTTTGGCCCAGTCCGAGAACAAGTGAAAATACCCAGGCTGCCCCA ++ +CCCFFFDFGHHFHIIJJGIIIJDIIJIJJJJIIHIIIIJJJHCFHHFHIJJJJJJGIHJJJIJJHGHHHFFFDDDDDCD>CCDDDDDDCDDDBDDDDDDDD +@SRR9130495.194 D00236:723:HG32CBCX2:1:1108:9552:2194/1 +CCATGCCGACACAGGTAGATGGTACGGGGCTGCACGTGGATGTTCATCAGGTAGTATACAATTCGGCTCTGGATGTGGTCCTGCACTCTGTTCACCAAGAA ++ +@@CFFFFFGHGHHJJJIJJJJIJJJIGHFEGGIFIJIJIIIIIJJIIJJIHHHHHHHFFFFFFFDCDDDC@@ACDDCDDDDDDDDDDDDCDDD@CCDDDDD +@SRR9130495.195 D00236:723:HG32CBCX2:1:1108:9620:2235/1 +CCAATGATGGCCAACTAGGCCATCTTCTACTATGTACGCAGCTAGAGGCACGAGCGCTGGGGGTACCGATTAGTTCATATTGGTGTTCCACCTATAGGGTT ++ +===BD?DBAABD8AC3?F?D9C6'5;:>7<<>8:(',&22>(((:>3>+:(+28(43>@B(>:>A((32 +@SRR9130495.196 D00236:723:HG32CBCX2:1:1108:9908:2124/1 +TCTTGTGAAGAAGATGCTGTTGGAAGCCTCTAAGAAGCCCGAACTGAATGCTCTTATAAACAATACCAGAGGAATTATTTTTTACAGTGTCCCTCACCATG ++ +CCCFFFFFHGDFGIIJJIJJIGGJJIIJJIIJHIIIIJJJJI>HHIFIJIJIJJIIHIHIJJJJIHIHHHGFDEFFEEEEEEDDDDDFDEDCCDCDDDDDD +@SRR9130495.197 D00236:723:HG32CBCX2:1:1108:9923:2206/1 +TGGGGCTGTGAACCGAAGTCTGCTCCTTTGCGTGAGCCACCCCTGCAGCCCCTCCCACAGTTCCTGAGGAGCCTTTAGTCCTCGTCCTTTCTCAGCTGTAT ++ +@BCFFDAFHHHHHIIIIHIIJIIJIIJJJIIIJJIGGIIIJJFGGGGGEIIIGFHHEFFFEECCCC@CB@BDDDCDDCCDCCDDABBBCDDCC@CCDCCCD +@SRR9130495.198 D00236:723:HG32CBCX2:1:1108:10131:2036/1 +ATGTGCTCAAAGGCTGGGTGGACCTTACCTCCAGTAAACCCCACGTTGTGAAGAAATCCATCAAGTACCTGGAACAAGGAACTCAAGACACCAAAGATGTG ++ +@CCFFFDDFFBHDHIIIBFEGFGHGHGDFHFGIJJIIHDGEGHIJHFHIIIIJJJJJIIIFHHHFHFFFFFFBAEAACBBCADDDDDDDDADBABCCC>AA +@SRR9130495.199 D00236:723:HG32CBCX2:1:1108:10246:2089/1 +GCCCTGGGATTGTCCCTCTGGGCACAGGGAGTCCTGGGGTTGTCCCTCTGAGTAGTTCTGTTGGGAGAGGAGGCCCTGGGATTGTCCCTCTGGGTACAGGG ++ +CCCFFBDDHHHFHJIIIJJJJIIIIIFIIEHGIGJIJDHJIJIIJJFJIJ@FFCHJJJBEGEHHHDFDCD@DBB;=?A?@BDDDCACDDDDD@?CCCCDCB +@SRR9130495.200 D00236:723:HG32CBCX2:1:1108:10051:2156/1 +ATGGAGGATGGCACCCTGCAGGCTGGCCCAGGAGGTGCCAGTGGGCCTCGTGCCCTGGAAATAAATAAAATGATTTCTTTTTGGAGGAATGCTCATAAACG ++ +@@CDFFDFGFHFFIJGGIICCDDADDB diff --git a/docs/notebooks/example.parquet b/docs/notebooks/example.parquet new file mode 100644 index 00000000..017772b7 Binary files /dev/null and b/docs/notebooks/example.parquet differ diff --git a/docs/notebooks/report.html b/docs/notebooks/report.html new file mode 100644 index 00000000..bbdb1bf1 --- /dev/null +++ b/docs/notebooks/report.html @@ -0,0 +1,250 @@ + + + + + fastqc-rs report + + + + + + + + + + + + + +
+ +
+
+ +
+
+ +
+ +
+
+ +
+ +
+
+ +
+ +
+
+ +
+ +
+
+ +
+ +
+
+ +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ average GC content + + 46.15999984741211 +
+ average read length + + 101 +
+ canonical + + True +
+ file name + + example.fastq +
+ k + + 5 +
+ total reads + + 200 +
+
+
+ + \ No newline at end of file diff --git a/polars_bio/__init__.py b/polars_bio/__init__.py index 01d66d58..91eb9b0b 100644 --- a/polars_bio/__init__.py +++ b/polars_bio/__init__.py @@ -16,6 +16,7 @@ from .polars_ext import PolarsRangesOperations as LazyFrame from .range_op import FilterOp, count_overlaps, coverage, merge, nearest, overlap from .range_viz import visualize_intervals +from .quality_stats import base_sequence_quality POLARS_BIO_MAX_THREADS = "datafusion.execution.target_partitions" @@ -45,4 +46,5 @@ "ReadOptions", "VcfReadOptions", "set_option", + "base_sequence_quality", ] diff --git a/polars_bio/quality_stats.py b/polars_bio/quality_stats.py new file mode 100644 index 00000000..501bd44a --- /dev/null +++ b/polars_bio/quality_stats.py @@ -0,0 +1,63 @@ +from pathlib import Path +from typing import Union +import datafusion +import polars as pl +import pandas as pd +import pyarrow as pa +from .context import ctx +from polars_bio.polars_bio import ( + base_sequance_quality_scan, + base_sequance_quality_frame, +) + + +def base_sequence_quality( + df: Union[str, pl.DataFrame, pl.LazyFrame, pd.DataFrame], + quality_scores_column: str = "quality_scores", + output_type: str = "polars.DataFrame", + target_partitions: int = 8, +) -> Union[pl.DataFrame, pd.DataFrame]: + """ + Compute base sequence quality statistics from various dataframe/file types. + + Args: + df: Input data as a file path or dataframe. + quality_scores_column: Name of the column with quality scores. + output_type: Output type, either "polars.DataFrame" or "pandas.DataFrame". + + Returns: + DataFrame with base sequence quality statistics. + """ + ctx.set_option( + "datafusion.execution.target_partitions", str(target_partitions), False + ) + + if isinstance(df, str): + supported_exts = {".parquet", ".csv", ".bed", ".vcf", ".fastq"} + ext = set(Path(df).suffixes) + if not (supported_exts & ext or not ext): + raise ValueError( + "Input file must be a Parquet, CSV, BED, VCF, or FASTQ file." + ) + result: datafusion.DataFrame = base_sequance_quality_scan( + ctx, df, quality_scores_column + ) + else: + if isinstance(df, pl.LazyFrame): + arrow_table = df.collect().to_arrow() + elif isinstance(df, pl.DataFrame): + arrow_table = df.to_arrow() + elif isinstance(df, pd.DataFrame): + arrow_table = pa.Table.from_pandas(df) + else: + raise TypeError("Unsupported dataframe type.") + result: datafusion.DataFrame = base_sequance_quality_frame( + ctx, arrow_table, quality_scores_column + ) + + if output_type == "polars.DataFrame": + return result.to_polars() + elif output_type == "pandas.DataFrame": + return result.to_pandas() + else: + raise ValueError("output_type must be 'polars.DataFrame' or 'pandas.DataFrame'") diff --git a/src/context.rs b/src/context.rs index 5f47f30e..1525ef05 100644 --- a/src/context.rs +++ b/src/context.rs @@ -25,7 +25,6 @@ impl PyBioSessionContext { pub fn new(seed: String, catalog_dir: String) -> PyResult { let ctx = create_context().unwrap(); let session_config: HashMap = HashMap::new(); - Ok(PyBioSessionContext { ctx, session_config, diff --git a/src/lib.rs b/src/lib.rs index c8890a8d..152b0736 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,8 @@ +mod sequence_quality_histogram; mod context; mod operation; mod option; +mod quantile_stats; mod query; mod scan; mod streaming; @@ -16,6 +18,7 @@ use datafusion::datasource::MemTable; use datafusion_python::dataframe::PyDataFrame; use datafusion_vcf::storage::VcfReader; use log::{debug, error, info}; +use operation::do_base_sequence_quality; use polars_lazy::prelude::{LazyFrame, ScanArgsAnonymous}; use polars_python::error::PyPolarsErr; use polars_python::lazyframe::PyLazyFrame; @@ -33,6 +36,7 @@ use crate::utils::convert_arrow_rb_schema_to_polars_df_schema; const LEFT_TABLE: &str = "s1"; const RIGHT_TABLE: &str = "s2"; +const DEFAULT_TABLE_NAME: &str = "unnamed_table"; const DEFAULT_COLUMN_NAMES: [&str; 3] = ["contig", "start", "end"]; #[pyfunction] @@ -403,6 +407,48 @@ fn py_from_polars( }) } +#[pyfunction] +#[pyo3(signature = (py_ctx, path, column))] +fn base_sequance_quality_scan( + py: Python<'_>, + py_ctx: &PyBioSessionContext, + path: String, + column: String, +) -> PyResult { + py.allow_threads(|| { + let ctx = &py_ctx.ctx; + let rt = Runtime::new().unwrap(); + maybe_register_table(path, &DEFAULT_TABLE_NAME.to_string(), None, ctx, &rt); + let data_frame = rt.block_on(do_base_sequence_quality( + ctx, + DEFAULT_TABLE_NAME.to_string(), + column.to_string(), + )); + Ok(PyDataFrame::new(data_frame)) + }) +} + +#[pyfunction] +#[pyo3(signature = (py_ctx, df, column))] +fn base_sequance_quality_frame( + py: Python<'_>, + py_ctx: &PyBioSessionContext, + df: PyArrowType, + column: String, +) -> PyResult { + py.allow_threads(|| { + let ctx = &py_ctx.ctx; + let rt = Runtime::new().unwrap(); + register_frame(py_ctx, df, DEFAULT_TABLE_NAME.to_string()); + let data_frame = rt.block_on(do_base_sequence_quality( + ctx, + DEFAULT_TABLE_NAME.to_string(), + column.to_string(), + )); + Ok(PyDataFrame::new(data_frame)) + }) +} + #[pymodule] fn polars_bio(_py: Python, m: &Bound) -> PyResult<()> { pyo3_log::init(); @@ -417,7 +463,8 @@ fn polars_bio(_py: Python, m: &Bound) -> PyResult<()> { m.add_function(wrap_pyfunction!(py_describe_vcf, m)?)?; m.add_function(wrap_pyfunction!(py_register_view, m)?)?; m.add_function(wrap_pyfunction!(py_from_polars, m)?)?; - // m.add_function(wrap_pyfunction!(unary_operation_scan, m)?)?; + m.add_function(wrap_pyfunction!(base_sequance_quality_frame, m)?)?; + m.add_function(wrap_pyfunction!(base_sequance_quality_scan, m)?)?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/src/operation.rs b/src/operation.rs index 13ff545f..bd50e182 100644 --- a/src/operation.rs +++ b/src/operation.rs @@ -6,8 +6,10 @@ use log::{debug, info}; use sequila_core::session_context::{Algorithm, SequilaConfig}; use tokio::runtime::Runtime; +use crate::sequence_quality_histogram::SequenceQualityHistogramProvider; use crate::context::set_option_internal; use crate::option::{FilterOp, RangeOp, RangeOptions}; +use crate::quantile_stats::QuantileStatsTableProvider; use crate::query::{count_overlaps_query, nearest_query, overlap_query}; use crate::udtf::CountOverlapsProvider; use crate::utils::default_cols_to_string; @@ -191,6 +193,45 @@ async fn do_count_overlaps_coverage_naive( ctx.sql(&query).await.unwrap() } +pub(crate) async fn do_base_sequence_quality( + ctx: &ExonSession, + table: String, + column: String, +) -> datafusion::dataframe::DataFrame { + let session = Arc::new(ctx.session.clone()); + let base_provider = Arc::new(SequenceQualityHistogramProvider::new( + session.clone(), + table.clone(), + column, + )); + + let base_table_name = format!("{}_decoded", table); + session.deregister_table(base_table_name.clone()).unwrap(); + session + .register_table(&base_table_name, base_provider) + .unwrap(); + + let query = format!( + "SELECT pos, score, SUM(count) as count FROM {} GROUP BY pos, score", + base_table_name + ); + let base_df = ctx.sql(&query).await.unwrap(); + let base_plan = base_df.create_physical_plan().await.unwrap(); + + let quantile_provider = Arc::new(QuantileStatsTableProvider::new(base_plan)); + + let quantile_table_name = format!("{}_quantiles", table); + session + .deregister_table(quantile_table_name.clone()) + .unwrap(); + session + .register_table(&quantile_table_name, quantile_provider) + .unwrap(); + + let query = format!("SELECT * FROM {}", quantile_table_name); + ctx.sql(&query).await.unwrap() +} + async fn get_non_join_columns( table_name: String, join_columns: Vec, diff --git a/src/quantile_stats.rs b/src/quantile_stats.rs new file mode 100644 index 00000000..36bf01b2 --- /dev/null +++ b/src/quantile_stats.rs @@ -0,0 +1,266 @@ +use std::any::Any; +use std::collections::HashMap; +use std::fmt::{Debug, Formatter}; +use std::sync::Arc; + +use arrow::array::{Array, Float64Builder, UInt64Array, UInt64Builder, UInt8Array}; +use arrow::compute::concat_batches; +use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use arrow::record_batch::RecordBatch; +use async_trait::async_trait; +use datafusion::catalog::{Session, TableProvider}; +use datafusion::datasource::TableType; +use datafusion::error::Result; +use datafusion::execution::context::TaskContext; +use datafusion::physical_expr::EquivalenceProperties; +use datafusion::physical_plan::memory::MemoryStream; +use datafusion::physical_plan::{ + collect, DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, + PlanProperties, SendableRecordBatchStream, +}; + +pub struct QuantileStatsTableProvider { + input: Arc, + schema: SchemaRef, +} + +impl QuantileStatsTableProvider { + pub fn new(input: Arc) -> Self { + let schema = Arc::new(Schema::new(vec![ + Field::new("pos", DataType::UInt64, false), + Field::new("avg", DataType::Float64, true), + Field::new("q1", DataType::Float64, true), + Field::new("median", DataType::Float64, true), + Field::new("q3", DataType::Float64, true), + Field::new("lower", DataType::Float64, true), + Field::new("upper", DataType::Float64, true), + ])); + Self { + input, + schema, + } + } +} + +impl Debug for QuantileStatsTableProvider { + fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result { + Ok(()) + } +} + +#[async_trait] +impl TableProvider for QuantileStatsTableProvider { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn table_type(&self) -> TableType { + TableType::Base + } + + async fn scan( + &self, + _state: &dyn Session, + _projection: Option<&Vec>, + _filters: &[datafusion::logical_expr::Expr], + _limit: Option, + ) -> datafusion::error::Result> { + Ok(Arc::new(QuantileStatsExec::new(self.input.clone()))) + } +} + +#[derive(Debug)] +pub struct QuantileStatsExec { + input: Arc, + schema: SchemaRef, + properties: PlanProperties, +} + +impl QuantileStatsExec { + pub fn new(input: Arc) -> Self { + let schema = Arc::new(Schema::new(vec![ + Field::new("pos", DataType::UInt64, false), + Field::new("avg", DataType::Float64, true), + Field::new("q1", DataType::Float64, true), + Field::new("median", DataType::Float64, true), + Field::new("q3", DataType::Float64, true), + Field::new("lower", DataType::Float64, true), + Field::new("upper", DataType::Float64, true), + ])); + + let schema_clone = schema.clone(); + + Self { + input, + schema, + properties: PlanProperties::new( + EquivalenceProperties::new(schema_clone), + Partitioning::UnknownPartitioning(1), + ExecutionMode::Bounded, + ), + } + } +} + +impl ExecutionPlan for QuantileStatsExec { + fn name(&self) -> &str { + "QuantileAggregateExec" + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + Arc::clone(&self.schema) + } + + fn properties(&self) -> &PlanProperties { + &self.properties + } + + fn children(&self) -> Vec<&Arc> { + vec![&self.input] + } + + fn with_new_children( + self: Arc, + _children: Vec>, + ) -> Result> { + Ok(self) + } + + fn execute( + &self, + _partition: usize, + context: Arc, + ) -> Result { + let input = self.input.clone(); + let schema = self.schema.clone(); + + let batches = futures::executor::block_on(collect(input, context.clone()))?; + let combined = concat_batches(&self.input.schema(), &batches)?; + let pos_array = combined + .column_by_name("pos") + .expect("Column 'pos' not found") + .as_any() + .downcast_ref::() + .expect("Expected UInt64 for pos"); + + let score_array = combined + .column_by_name("score") + .expect("Column 'score' not found") + .as_any() + .downcast_ref::() + .expect("Expected UInt8 for score"); + + let count_array = combined + .column_by_name("count") + .expect("Column 'count' not found") + .as_any() + .downcast_ref::() + .expect("Expected UInt64 for count"); + + let mut groups: HashMap> = HashMap::new(); + for i in 0..combined.num_rows() { + if pos_array.is_valid(i) && score_array.is_valid(i) && count_array.is_valid(i) { + let pos = pos_array.value(i); + let score = score_array.value(i); + let count = count_array.value(i); + let entry = groups.entry(pos).or_insert_with(|| vec![0; 256]); + entry[score as usize] += count; + } + } + + let mut pos_builder = UInt64Builder::with_capacity(groups.len()); + let mut avg_builder = Float64Builder::with_capacity(groups.len()); + let mut q1_builder = Float64Builder::with_capacity(groups.len()); + let mut median_builder = Float64Builder::with_capacity(groups.len()); + let mut q3_builder = Float64Builder::with_capacity(groups.len()); + let mut lower_builder = Float64Builder::with_capacity(groups.len()); + let mut upper_builder = Float64Builder::with_capacity(groups.len()); + + for (pos, hist) in groups { + if let Some((average, q1, median, q3, lower, upper)) = calculate_histogram_stats(&hist) + { + pos_builder.append_value(pos); + avg_builder.append_value(average); + q1_builder.append_value(q1); + median_builder.append_value(median); + q3_builder.append_value(q3); + lower_builder.append_value(lower); + upper_builder.append_value(upper); + } + } + + let result_batch = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(pos_builder.finish()), + Arc::new(avg_builder.finish()), + Arc::new(q1_builder.finish()), + Arc::new(median_builder.finish()), + Arc::new(q3_builder.finish()), + Arc::new(lower_builder.finish()), + Arc::new(upper_builder.finish()), + ], + )?; + let mem_stream = MemoryStream::try_new(vec![result_batch], schema, None)?; + Ok(Box::pin(mem_stream)) + } +} + +impl DisplayAs for QuantileStatsExec { + fn fmt_as(&self, _t: DisplayFormatType, _f: &mut Formatter) -> std::fmt::Result { + Ok(()) + } +} + +fn calculate_histogram_stats(hist: &[u64]) -> Option<(f64, f64, f64, f64, f64, f64)> { + let total_count: u64 = hist.iter().sum(); + if total_count == 0 { + return None; + } + + let weighted_sum: u64 = hist + .iter() + .enumerate() + .map(|(score, &count)| score as u64 * count) + .sum(); + let average = weighted_sum as f64 / total_count as f64; + + fn quantile(hist: &[u64], quantile: f64, total: u64) -> f64 { + let target = quantile * (total - 1) as f64; + let mut acc = 0u64; + let mut prev_idx = 0usize; + for (idx, &count) in hist.iter().enumerate() { + if count == 0 { + continue; + } + if (acc as f64) <= target && (acc + count) as f64 > target { + let delta = target - acc as f64; + if count > 1 && delta > 0.0 { + return idx as f64 + delta / count as f64; + } else { + return idx as f64; + } + } + acc += count; + prev_idx = idx; + } + prev_idx as f64 + } + + let q1 = quantile(hist, 0.25, total_count); + let median = quantile(hist, 0.5, total_count); + let q3 = quantile(hist, 0.75, total_count); + let iqr = q3 - q1; + let lower = q1 - 1.5 * iqr; + let upper = q3 + 1.5 * iqr; + + Some((average, q1, median, q3, lower, upper)) +} diff --git a/src/scan.rs b/src/scan.rs index 4d5b288e..34412780 100644 --- a/src/scan.rs +++ b/src/scan.rs @@ -61,6 +61,8 @@ pub(crate) fn get_input_format(path: &str) -> InputFormat { InputFormat::Bed } else if path.ends_with(".vcf") || path.ends_with(".vcf.gz") || path.ends_with(".vcf.bgz") { InputFormat::Vcf + } else if path.ends_with(".fastq") { + InputFormat::Fastq } else { panic!("Unsupported format") } diff --git a/src/sequence_quality_histogram.rs b/src/sequence_quality_histogram.rs new file mode 100644 index 00000000..38c922aa --- /dev/null +++ b/src/sequence_quality_histogram.rs @@ -0,0 +1,245 @@ +use std::any::Any; +use std::collections::HashMap; +use std::fmt::{Debug, Formatter}; +use std::sync::Arc; + +use arrow_array::{Array, StringArray}; +use async_trait::async_trait; +use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::catalog::{Session, TableProvider}; +use datafusion::datasource::TableType; +use datafusion::error::{DataFusionError, Result}; +use datafusion::execution::context::TaskContext; +use datafusion::physical_expr::EquivalenceProperties; +use datafusion::physical_plan::repartition::RepartitionExec; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; +use datafusion::physical_plan::{ + DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, PlanProperties, + SendableRecordBatchStream, +}; +use datafusion::prelude::{col, SessionContext}; +use futures::stream::BoxStream; +use futures::{StreamExt, TryStreamExt}; +pub struct SequenceQualityHistogramProvider { + session: Arc, + table_name: String, + column_name: String, + schema: SchemaRef, +} + +impl SequenceQualityHistogramProvider { + pub fn new(session: Arc, table_name: String, column_name: String) -> Self { + let schema = Arc::new(Schema::new(vec![ + Field::new("pos", DataType::UInt64, false), + Field::new("score", DataType::UInt8, false), + Field::new("count", DataType::UInt64, false), + ])); + Self { + session, + table_name, + column_name, + schema, + } + } +} + +impl Debug for SequenceQualityHistogramProvider { + fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result { + Ok(()) + } +} + +#[async_trait] +impl TableProvider for SequenceQualityHistogramProvider { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn table_type(&self) -> TableType { + todo!() + } + + async fn scan( + &self, + _state: &dyn Session, + _projection: Option<&Vec>, + _filters: &[datafusion::prelude::Expr], + _limit: Option, + ) -> Result> { + let target_partitions = self.session.state().config().target_partitions(); + Ok(Arc::new(SequenceQualityHistogramExec { + schema: self.schema.clone(), + session: self.session.clone(), + table_name: self.table_name.clone(), + column_name: self.column_name.clone(), + properties: PlanProperties::new( + EquivalenceProperties::new(self.schema.clone()), + Partitioning::UnknownPartitioning(target_partitions), + ExecutionMode::Bounded, + ), + })) + } +} + +pub struct SequenceQualityHistogramExec { + schema: SchemaRef, + session: Arc, + table_name: String, + column_name: String, + properties: PlanProperties, +} + +impl Debug for SequenceQualityHistogramExec { + fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result { + Ok(()) + } +} + +impl DisplayAs for SequenceQualityHistogramExec { + fn fmt_as(&self, _t: DisplayFormatType, _f: &mut Formatter) -> std::fmt::Result { + Ok(()) + } +} + +impl ExecutionPlan for SequenceQualityHistogramExec { + fn name(&self) -> &str { + "BaseSequenceQualityExec" + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn properties(&self) -> &PlanProperties { + &self.properties + } + + fn children(&self) -> Vec<&Arc> { + vec![] + } + + fn with_new_children( + self: Arc, + _children: Vec>, + ) -> Result> { + Ok(self) + } + + fn execute( + &self, + partition: usize, + context: Arc, + ) -> Result { + let fut = get_stream( + self.session.clone(), + self.table_name.clone(), + self.column_name.clone(), + self.properties.partitioning.partition_count(), + partition, + context, + self.schema.clone(), + ); + let stream = futures::stream::once(fut).try_flatten(); + let schema = self.schema.clone(); + Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream))) + } +} + +fn decode_score(c: char) -> Option { + let ascii = c as u8; + if ascii >= 33 { + Some(ascii - 33) + } else { + None + } +} + +async fn get_stream( + session: Arc, + table_name: String, + column_name: String, + target_partitions: usize, + partition: usize, + context: Arc, + new_schema: SchemaRef, +) -> Result { + let df = session + .table(table_name.clone()) + .await? + .select(vec![col(&column_name)])?; + + let plan = df.create_physical_plan().await?; + + let repartition_stream = + RepartitionExec::try_new(plan, Partitioning::RoundRobinBatch(target_partitions))?; + + let mut partition_stream = repartition_stream.execute(partition, context)?; + + let mut pos_map: HashMap> = HashMap::new(); + + while let Some(batch_result) = partition_stream.next().await { + let batch = batch_result?; + let col = batch.column(0); // tylko jedna kolumna + + let col = arrow::compute::cast(col, &DataType::Utf8) + .map_err(|e| DataFusionError::Internal(format!("Cast error: {e}")))?; + + let col = col + .as_any() + .downcast_ref::() + .ok_or_else(|| DataFusionError::Internal("Expected StringArray".into()))?; + + for row in 0..col.len() { + if col.is_null(row) { + continue; + } + let s = col.value(row); + for (pos, byte) in s.bytes().enumerate() { + if let Some(score) = decode_score(byte as char) { + let entry = pos_map.entry(pos).or_insert_with(|| vec![0u64; 94]); + if (score as usize) < entry.len() { + entry[score as usize] += 1; + } + } + } + } + } + + let mut positions = Vec::new(); + let mut scores = Vec::new(); + let mut counts = Vec::new(); + + for (pos, counts_vec) in pos_map { + for (score, &count) in counts_vec.iter().enumerate() { + if count > 0 { + positions.push(pos as u64); + scores.push(score as u8); + counts.push(count as u64); + } + } + } + let pos_array = Arc::new(arrow_array::UInt64Array::from(positions)); + let score_array = Arc::new(arrow_array::UInt8Array::from(scores)); + let count_array = Arc::new(arrow_array::UInt64Array::from(counts)); + let new_batch = RecordBatch::try_new( + new_schema.clone(), + vec![pos_array, score_array, count_array], + ) + .unwrap(); + + let iter = futures::stream::once(async move { Ok(new_batch) }); + + let adapted_stream = + RecordBatchStreamAdapter::new(new_schema.clone(), Box::pin(iter) as BoxStream<_>); + + Ok(Box::pin(adapted_stream)) +}