Skip to content
This repository was archived by the owner on Oct 14, 2025. It is now read-only.

Commit edd72cf

Browse files
committed
consensus calculation using HPCell function
1 parent f3b59b4 commit edd72cf

File tree

1 file changed

+55
-0
lines changed

1 file changed

+55
-0
lines changed

dev/HCA_cell_type_consensus.R

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
library(arrow)
2+
library(dplyr)
3+
library(duckdb)
4+
library(HPCell)
5+
6+
# Read the Parquet file into an R data frame
7+
con <- dbConnect(duckdb::duckdb(), dbdir = ":memory:")
8+
parquet_file = "/vast/projects/cellxgene_curated/census_samples/concensus_input.parquet"
9+
10+
data_tbl <- tbl(con, sql(paste0("SELECT * FROM read_parquet('", parquet_file, "')")))
11+
12+
annotation_combination =
13+
data_tbl |>
14+
#select(azimuth_predicted.celltype.l2, monaco_first.labels.fine, blueprint_first.labels.fine) |>
15+
select(cell_, dataset_id, cell_type, cell_type_ontology_term_id, azimuth_predicted.celltype.l2, monaco_first.labels.fine, blueprint_first.labels.fine)
16+
#arrange(desc(n)) |>
17+
18+
19+
20+
21+
22+
annotation_consensus =
23+
annotation_combination |>
24+
distinct(azimuth_predicted.celltype.l2, monaco_first.labels.fine, blueprint_first.labels.fine) |>
25+
as_tibble() |>
26+
mutate(reannotation_consensus = reference_annotation_to_consensus(azimuth_input = azimuth_predicted.celltype.l2, monaco_input = monaco_first.labels.fine, blueprint_input = blueprint_first.labels.fine ))
27+
28+
29+
annotation_combination =
30+
annotation_combination |>
31+
left_join(annotation_consensus, copy = TRUE)
32+
33+
output_parquet <- "/vast/projects/mangiola_immune_map/PostDoc/CuratedAtlasQueryR/dev/consensus_output.parquet"
34+
35+
# Use DuckDB's COPY TO command to write the data back to Parquet
36+
# We need to execute a SQL command using dbExecute()
37+
copy_query <- paste0("
38+
COPY (
39+
SELECT *
40+
FROM (
41+
", dbplyr::sql_render(annotation_combination), "
42+
)
43+
) TO '", output_parquet, "' (FORMAT PARQUET);
44+
")
45+
46+
# Execute the COPY command
47+
dbExecute(con, copy_query)
48+
49+
# Disconnect from the database
50+
dbDisconnect(con, shutdown = TRUE)
51+
52+
# Read back
53+
con <- dbConnect(duckdb::duckdb(), dbdir = ":memory:")
54+
data_consensus <- tbl(con, sql(paste0("SELECT * FROM read_parquet('", output_parquet, "')")))
55+

0 commit comments

Comments
 (0)