添加 share.py

This commit is contained in:
2025-12-16 11:19:26 +08:00
commit bd8e9f0b2d

46
share.py Normal file
View File

@@ -0,0 +1,46 @@
def qc_charr_score(self, df: pd.DataFrame) -> float:
"""
calculate charr score
Args:
df(pd.DataFrame): dataframe of parser vcf
Returns:
float, charr score
"""
def charr_score(row: pd.Series) -> float:
"""
charr score for one site
Args:
row(pd.Series): one site
Returns:
float, charr score
"""
ad = [int(i) for i in row["ad"].split(",")] # allele depth
freq = float(row["freq"])
if (ad[0] + ad[1]) == 0 or freq == 0:
return 0 # The denominator cannot be zero
return ad[0] / (freq * (ad[0] + ad[1]))
# autosome
data = df[df["chrom"].isin([f"chr{i}" for i in range(1, 23)])]
data = data[data["gt"].isin(["1/1", "1|1"])] # Homogenous variant sites
data = data[(data["dp"] > 30) & (data["gq"] > 20)] # filter
def get_freq(row: pd.Series) -> float:
"""
get reference freq for one site in population
Args:
row(pd.Series): one site
Returns:
float, reference freq
"""
key = (row["chrom"], row["position"])
return self.charr_required_freq.get(key, 0.0001) # If not, fill in the minimum value to avoid division by 0
data["freq"] = data.apply(get_freq, axis=1)
if data.shape[0] == 0:
return 0.0 # no site after filter
score = round(data.apply(charr_score, axis=1).sum() / data.shape[0], 4) # mean and retain four decimal places
return score