def qc_charr_score(self, df: pd.DataFrame) -> float: """ calculate charr score Args: df(pd.DataFrame): dataframe of parser vcf Returns: float, charr score """ def charr_score(row: pd.Series) -> float: """ charr score for one site Args: row(pd.Series): one site Returns: float, charr score """ ad = [int(i) for i in row["ad"].split(",")] # allele depth freq = float(row["freq"]) if (ad[0] + ad[1]) == 0 or freq == 0: return 0 # The denominator cannot be zero return ad[0] / (freq * (ad[0] + ad[1])) # autosome data = df[df["chrom"].isin([f"chr{i}" for i in range(1, 23)])] data = data[data["gt"].isin(["1/1", "1|1"])] # Homogenous variant sites data = data[(data["dp"] > 30) & (data["gq"] > 20)] # filter def get_freq(row: pd.Series) -> float: """ get reference freq for one site in population Args: row(pd.Series): one site Returns: float, reference freq """ key = (row["chrom"], row["position"]) return self.charr_required_freq.get(key, 0.0001) # If not, fill in the minimum value to avoid division by 0 data["freq"] = data.apply(get_freq, axis=1) if data.shape[0] == 0: return 0.0 # no site after filter score = round(data.apply(charr_score, axis=1).sum() / data.shape[0], 4) # mean and retain four decimal places return score