添加 share.py
This commit is contained in:
46
share.py
Normal file
46
share.py
Normal file
@@ -0,0 +1,46 @@
|
||||
def qc_charr_score(self, df: pd.DataFrame) -> float:
|
||||
"""
|
||||
calculate charr score
|
||||
Args:
|
||||
df(pd.DataFrame): dataframe of parser vcf
|
||||
Returns:
|
||||
float, charr score
|
||||
"""
|
||||
|
||||
def charr_score(row: pd.Series) -> float:
|
||||
"""
|
||||
charr score for one site
|
||||
Args:
|
||||
row(pd.Series): one site
|
||||
Returns:
|
||||
float, charr score
|
||||
"""
|
||||
ad = [int(i) for i in row["ad"].split(",")] # allele depth
|
||||
freq = float(row["freq"])
|
||||
if (ad[0] + ad[1]) == 0 or freq == 0:
|
||||
return 0 # The denominator cannot be zero
|
||||
return ad[0] / (freq * (ad[0] + ad[1]))
|
||||
|
||||
# autosome
|
||||
data = df[df["chrom"].isin([f"chr{i}" for i in range(1, 23)])]
|
||||
|
||||
data = data[data["gt"].isin(["1/1", "1|1"])] # Homogenous variant sites
|
||||
data = data[(data["dp"] > 30) & (data["gq"] > 20)] # filter
|
||||
|
||||
def get_freq(row: pd.Series) -> float:
|
||||
"""
|
||||
get reference freq for one site in population
|
||||
Args:
|
||||
row(pd.Series): one site
|
||||
Returns:
|
||||
float, reference freq
|
||||
"""
|
||||
key = (row["chrom"], row["position"])
|
||||
return self.charr_required_freq.get(key, 0.0001) # If not, fill in the minimum value to avoid division by 0
|
||||
|
||||
data["freq"] = data.apply(get_freq, axis=1)
|
||||
if data.shape[0] == 0:
|
||||
return 0.0 # no site after filter
|
||||
score = round(data.apply(charr_score, axis=1).sum() / data.shape[0], 4) # mean and retain four decimal places
|
||||
|
||||
return score
|
||||
Reference in New Issue
Block a user