@article {315540, title = {Distinguishing functional polymorphism from random variation in the sequences of >10,000 HLA-A, -B and -C alleles.}, journal = {PLoS Genet}, volume = {13}, number = {6}, year = {2017}, month = {06/2017}, abstract = {

HLA class I glycoproteins contain the functional sites that bind peptide antigens and engage lymphocyte receptors. Recently, clinical application of sequence-based HLA typing has uncovered an unprecedented number of novel HLA class I alleles. Here we define the nature and extent of the variation in 3,489 HLA-A, 4,356 HLA-B and 3,111 HLA-C alleles. This analysis required development of suites of methods, having general applicability, for comparing and analyzing large numbers of homologous sequences. At least three amino-acid substitutions are present at every position in the polymorphic α1 and α2 domains of HLA-A, -B and -C. A minority of positions have an incidence \>1\% for the {\textquoteright}second{\textquoteright} most frequent nucleotide, comprising 70 positions in HLA-A, 85 in HLA-B and 54 in HLA-C. The majority of these positions have three or four alternative nucleotides. These positions were subject to positive selection and correspond to binding sites for peptides and receptors. Most alleles of HLA class I (\>80\%) are very rare, often identified in one person or family, and they differ by point mutation from older, more common alleles. These alleles with single nucleotide polymorphisms reflect the germ-line mutation rate. Their frequency predicts the human population harbors 8-9 million HLA class I variants. The common alleles of human populations comprise 42 core alleles, which represent all selected polymorphism, and recombinants that have assorted this polymorphism.

}, isbn = {1553-7390}, doi = {10.1371/journal.pgen.1006862}, url = {https://www.ncbi.nlm.nih.gov/pubmed/28650991}, author = {Robinson, J and Guethlein, LA and Cereb, N and Yang, SY and Norman, PJ and Marsh, SGE and Parham, P} }