Source code for querynator.report_scripts.combine_cgi_civic

""" Combine CIViC-VEP with CGI-VEP """

import pandas as pd


[docs]def merge_civic_cgi(alterations_vep, civic_vep): """ merge CIViC and CGI alterations annotations for each variant based on the similar variant VEP annotation :param alterations_vep: DataFrame of variants and their VEP and CGI alterations annotations :type alterations_vep: pandas DataFrame :param civic_vep: DataFrame of variants and their VEP and CIViC alterations annotations :type civic_vep: pandas DataFrame :return: merged DataFrame of variants and their VEP & CIViC & CGI alterations annotations :rtype: pandas DataFrame """ # drop cols created for earlier steps alterations_vep = alterations_vep.drop(["chr_merge_VEP", "pos_merge_VEP", "ref_merge_VEP", "alt_merge_VEP"], axis=1) # CIViC (merge VEP in CIViC) might be all nan for cols in which alterations does have items (CGI in VEP). # so, all _VEP cols on which the merging is performed must be of the same type for i in ["chr_VEP", "pos_VEP", "ref_VEP", "alt_VEP"]: civic_vep[i] = civic_vep[i].astype(str) alterations_vep[i] = alterations_vep[i].astype(str) # merge vep_civic_cgi_merge = alterations_vep.merge( civic_vep, on=["chr_VEP", "pos_VEP", "ref_VEP", "alt_VEP"], suffixes=("_cgi", "_civic"), how="left" ) # remove all unnecessary VEP cols vep_civic_cgi_merge = vep_civic_cgi_merge[ [col for col in vep_civic_cgi_merge.columns if not col.endswith("_VEP_civic")] ] # rename VEP columns vep_civic_cgi_merge.columns = [ col.replace("_VEP_cgi", "_VEP") if col.endswith("_VEP_cgi") else col for col in vep_civic_cgi_merge.columns ] return vep_civic_cgi_merge
[docs]def combine_cgi_civic(outdir, logger): """ Combine cgi-vep table with the civic-vep table :param outdir: Path to report directory :type outdir: str :return: None :rtype: None """ logger.info("Combining CIViC-VEP & CGI-VEP") # read in the files alterations_vep = pd.read_csv(f"{outdir}/combined_files/alterations_vep.tsv", sep="\t") civic_vep = pd.read_csv(f"{outdir}/combined_files/civic_vep.tsv", sep="\t") vep_civic_cgi_merge = merge_civic_cgi(alterations_vep, civic_vep) # write combined results to result dir vep_civic_cgi_merge.to_csv(f"{outdir}/combined_files/civic_cgi_vep.tsv", sep="\t", index=False) logger.info("Files combined")