@article {pmid39261641, year = {2024}, author = {Sirén, J and Eskandar, P and Ungaro, MT and Hickey, G and Eizenga, JM and Novak, AM and Chang, X and Chang, PC and Kolmogorov, M and Carroll, A and Monlong, J and Paten, B}, title = {Personalized pangenome references.}, journal = {Nature methods}, volume = {}, number = {}, pages = {}, pmid = {39261641}, issn = {1548-7105}, support = {R01HG010485//U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute (NHGRI)/ ; U24HG010262//U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute (NHGRI)/ ; U24HG011853//U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute (NHGRI)/ ; U01HG010961//U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute (NHGRI)/ ; OT2OD033761//U.S. Department of Health & Human Services | National Institutes of Health (NIH)/ ; OT3HL142481//U.S. Department of Health & Human Services | NIH | National Heart, Lung, and Blood Institute (NHLBI)/ ; }, abstract = {Pangenomes reduce reference bias by representing genetic diversity better than a single reference sequence. Yet when comparing a sample to a pangenome, variants in the pangenome that are not part of the sample can be misleading, for example, causing false read mappings. These irrelevant variants are generally rarer in terms of allele frequency, and have previously been dealt with by filtering rare variants. However, this blunt heuristic both fails to remove some irrelevant variants and removes many relevant variants. We propose a new approach that imputes a personalized pangenome subgraph by sampling local haplotypes according to k-mer counts in the reads. We implement the approach in the vg toolkit (https://github.com/vgteam/vg) for the Giraffe short-read aligner and compare its accuracy to state-of-the-art methods using human pangenome graphs from the Human Pangenome Reference Consortium. This reduces small variant genotyping errors by four times relative to the Genome Analysis Toolkit and makes short-read structural variant genotyping of known variants competitive with long-read variant discovery methods.}, }
@article {pmid39259908, year = {2024}, author = {Thorgersen, MP and Goff, JL and Trotter, VV and Poole, FL and Arkin, AP and Deutschbauer, AM and Adams, MWW}, title = {Fitness factors impacting survival of a subsurface bacterium in contaminated groundwater.}, journal = {The ISME journal}, volume = {}, number = {}, pages = {}, doi = {10.1093/ismejo/wrae176}, pmid = {39259908}, issn = {1751-7370}, abstract = {Many factors contribute to the ability of a microbial species to persist when encountering complexly contaminated environments including time of exposure, the nature and concentration of contaminants, availability of nutritional resources, and possession of a combination of appropriate molecular mechanisms needed for survival. Herein we sought to identify genes that are most important for survival of Gram-negative Enterobacteriaceae in contaminated groundwater environments containing high concentrations of nitrate and metals using the metal-tolerant Oak Ridge Reservation (ORR) isolate, Pantoea sp. MT58 (MT58). Survival fitness experiments in which a randomly barcoded transposon insertion (RB-TnSeq) library of MT58 was exposed directly to contaminated ORR groundwater samples from across a nitrate and mixed metal contamination plume were used to identify genes important for survival with increasing exposure times and concentrations of contaminants, and availability of a carbon source. Genes involved in controlling and using carbon, encoding transcriptional regulators, and related to Gram-negative outer membrane processes were among those found to be important for survival in contaminated ORR groundwater. A comparative genomics analysis of 75 Pantoea genus strains allowed us to further separate the survival determinants into core and non-core genes in the Pantoea pangenome, revealing insights into the survival of subsurface microorganisms during contaminant plume intrusion.}, }
@article {pmid39257004, year = {2024}, author = {Liu, Z and Yang, F and Wan, H and Deng, C and Hu, W and Fan, X and Wang, J and Yang, M and Feng, J and Wang, Q and Yang, N and Cai, L and Liu, Y and Tang, H and Li, S and Luo, J and Zheng, J and Wu, L and Yang, E and Pu, Z and Jia, J and Li, J and Yang, W}, title = {Genome architecture of the allotetraploid wild grass Aegilops ventricosa reveals its evolutionary history and contributions to wheat improvement.}, journal = {Plant communications}, volume = {}, number = {}, pages = {101131}, doi = {10.1016/j.xplc.2024.101131}, pmid = {39257004}, issn = {2590-3462}, abstract = {The allotetraploid wild grass Aegilops ventricosa (2n=4X=28, genome D[v]D[v]N[v]N[v]) has been recognized as an important germplasm resource for wheat improvement due to its ability to tolerate biotic stresses. Especially 2N[v]S segment from Aegilops ventricosa, as a stable and effective resistance source, has greatly contributed to wheat improvement. The 2N[v]S/2AS translocation is a prevalent chromosomal translocation between common wheat and wild relatives, ranking just behind the 1B/1R translocation in importance for modern wheat breeding. Here, we assembled a high-quality chromosome-level reference genome of Ae. ventricosa RM271 with a total length of 8.67 Gb. Phylogenomic analyses revealed that the progenitor of the D[v] subgenome of Ae. ventricosa was Ae. tauschii ssp. tauschii (genome DD); in contrast, the progenitor of the D subgenome of bread wheat (Triticum aestivum L.) was Ae. tauschii ssp. strangulata (genome DD). The oldest polyploidization time of Ae. ventricosa occurred ∼0.7 million years ago. The D[v] subgenome of Ae. ventricosa was less conserved than the D subgenome of bread wheat. Construction of a graph-based pangenome of 2AS/6N[v]L (originally known as 2N[v]S) segments from Ae. ventricosa and other genomes in the Triticeae enables us identifying candidate resistance genes sourced from Ae. ventricosa. We identified 12 nonredundant introgressed segments from the D[v] and N[v] subgenomes using a large winter wheat collection representing the full diversity of the wheat European genetic pool, and 29.40% of European wheat varieties inherited at least one of these segments. The high-quality RM271 reference genome will provide a basis for cloning key genes, including the Yr17-Lr37-Sr38-Cre5 resistance gene cluster in Ae. ventricosa, and facilitate the full use of elite wild genetic resources to accelerate wheat improvement.}, }
@article {pmid39256695, year = {2024}, author = {Li, X and Huo, L and Li, X and Zhang, C and Gu, M and Fan, J and Xu, C and Gong, J and Hu, X and Zheng, Y and Sun, X}, title = {Genomes of diverse Actinidia species provide insights into cis-regulatory motifs and genes associated with critical traits.}, journal = {BMC biology}, volume = {22}, number = {1}, pages = {200}, pmid = {39256695}, issn = {1741-7007}, support = {LR23C150001//Zhejiang Provincial Natural Science Foundation of China/ ; }, abstract = {BACKGROUND: Kiwifruit, belonging to the genus Actinidia, represents a unique fruit crop characterized by its modern cultivars being genetically diverse and exhibiting remarkable variations in morphological traits and adaptability to harsh environments. However, the genetic mechanisms underlying such morphological diversity remain largely elusive.
RESULTS: We report the high-quality genomes of five Actinidia species, including Actinidia longicarpa, A. macrosperma, A. polygama, A. reticulata, and A. rufa. Through comparative genomics analyses, we identified three whole genome duplication events shared by the Actinidia genus and uncovered rapidly evolving gene families implicated in the development of characteristic kiwifruit traits, including vitamin C (VC) content and fruit hairiness. A range of structural variations were identified, potentially contributing to the phenotypic diversity in kiwifruit. Notably, phylogenomic analyses revealed 76 cis-regulatory elements within the Actinidia genus, predominantly associated with stress responses, metabolic processes, and development. Among these, five motifs did not exhibit similarity to known plant motifs, suggesting the presence of possible novel cis-regulatory elements in kiwifruit. Construction of a pan-genome encompassing the nine Actinidia species facilitated the identification of gene DTZ79_23g14810 specific to species exhibiting extraordinarily high VC content. Expression of DTZ79_23g14810 is significantly correlated with the dynamics of VC concentration, and its overexpression in the transgenic roots of kiwifruit plants resulted in increased VC content.
CONCLUSIONS: Collectively, the genomes and pan-genome of diverse Actinidia species not only enhance our understanding of fruit development but also provide a valuable genomic resource for facilitating the genome-based breeding of kiwifruit.}, }
@article {pmid39253572, year = {2024}, author = {Duan, S and Yan, L and Shen, Z and Li, X and Chen, B and Li, D and Qin, H and Meegahakumbura, MK and Wambulwa, MC and Gao, L and Chen, W and Dong, Y and Sheng, J}, title = {Genomic analyses of agronomic traits in tea plants and related Camellia species.}, journal = {Frontiers in plant science}, volume = {15}, number = {}, pages = {1449006}, doi = {10.3389/fpls.2024.1449006}, pmid = {39253572}, issn = {1664-462X}, abstract = {The genus Camellia contains three types of domesticates that meet various needs of ancient humans: the ornamental C. japonica, the edible oil-producing C. oleifera, and the beverage-purposed tea plant C. sinensis. The genomic drivers of the functional diversification of Camellia domesticates remain unknown. Here, we present the genomic variations of 625 Camellia accessions based on a new genome assembly of C. sinensis var. assamica ('YK10'), which consists of 15 pseudo-chromosomes with a total length of 3.35 Gb and a contig N50 of 816,948 bp. These accessions were mainly distributed in East Asia, South Asia, Southeast Asia, and Africa. We profiled the population and subpopulation structure in tea tree Camellia to find new evidence for the parallel domestication of C. sinensis var. assamica (CSA) and C. sinensis var. sinensis (CSS). We also identified candidate genes associated with traits differentiating CSA, CSS, oilseed Camellia, and ornamental Camellia cultivars. Our results provide a unique global view of the genetic diversification of Camellia domesticates and provide valuable resources for ongoing functional and molecular breeding research.}, }
@article {pmid39252931, year = {2024}, author = {Stanley, S and Silva-Costa, C and Gomes-Silva, J and Melo-Cristino, J and Malley, R and Ramirez, M}, title = {CC180 clade dynamics does not universally explain Streptococcus pneumoniae serotype 3 persistence post-vaccine: a global comparative population genomics study.}, journal = {medRxiv : the preprint server for health sciences}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.08.29.24312665}, pmid = {39252931}, abstract = {BACKGROUND: Clonal complex 180 (CC180) is currently the major clone of serotype 3 Streptococcus pneumoniae (Spn). The 13-valent pneumococcal conjugate vaccine (PCV13) does not have significant efficacy against serotype 3 despite polysaccharide inclusion in the vaccine. It was hypothesized that PCV13 may effectively control Clade I of CC180 but that Clades III and IV are resistant, provoking a population shift that enables serotype 3 persistence. This has been observed in the United States, England, and Wales but not Spain. We tested this hypothesis further utilizing a dataset from Portugal.
METHODS: We whole-genome sequenced (WGS) 501 serotype 3 strains from Portugal isolated from patients with pneumococcal infections between 1999-2020. The draft genomes underwent phylogenetic analyses, pangenome profiling, and a genome-wide association study (GWAS). We also completed antibiotic susceptibility testing and compiled over 2,600 serotype 3 multilocus sequence type 180 (MLST180) WGSs to perform global comparative genomics.
FINDINGS: CC180 Clades I, II, III, IV, and VI distributions were similar when comparing non-invasive pneumonia isolates and invasive disease isolates (Fisher's exact test, P=0.29), and adult and pediatric cases (Fisher's exact test, P=0.074). The serotype 3 CCs shifted post-PCV13 (Fisher's exact test, P<0.0001) and Clade I became dominant. Clade I is largely antibiotic-sensitive and carries the ΦOXC141 prophage but the pangenome is heterogenous. Strains from Portugal and Spain, where Clade I remains dominant post-PCV13, have larger pangenomes and are associated with the presence of two genes encoding hypothetical proteins.
INTERPRETATION: Clade I became dominant in Portugal post-PCV13, despite the burden of the prophage and antibiotic sensitivity. The accessory genome content may mitigate these fitness costs. Regional differences in Clade I prevalence and pangenome heterogeneity suggest that clade dynamics is not a generalizable approach to understanding serotype 3 vaccine escape.
FUNDING: National Institute of Child Health and Human Development, Pfizer, and Merck Sharp & Dohme.
RESEARCH IN CONTEXT: Evidence before this study: We conducted this study because of the mounting interest surrounding the changing prevalence of serotype 3 Streptococcus pneumoniae (Spn) genetic lineages and the potential association with escape from 13-valent pneumococcal conjugate vaccine (PCV13) control. To inform our investigation, we searched the PubMed database using different combinations of the following keywords: "Streptococcus pneumoniae", "serotype 3", "CC180", "PCV13", "Clade Iα", "Clade Iβ", and "Clade II". The search included all English language primary research articles published before July 1 [st] , 2024; this language limitation may bias the results of our assessment. Most ST3 isolates belong to clonal complex 180 (CC180), and one study identified three major lineages within CC180: Clade Iα, Clade Iβ, and Clade II. This study observed a global trend of increasing Clade II prevalence with a concomitant decrease in Clade I prevalence over time, which was associated with the introduction of PCV13 in the United States. A report from England and Wales made a similar observation. It was therefore hypothesized that PCV13 may be effective at controlling Clade Iα and that Clade II is driving vaccine escape. Later work refined the clade classification system as follows: Clade I (Clade Iα), Clades II and VI (Clade Iβ), Clades III and IV (Clade II), and Clade V. Clade I strains are marked by a significantly lower recombination rate partly due to the presence of a lineage-specific prophage interfering with competence development, which is a potential mechanism explaining the possible reduced fitness of Clade I. Clade I is also noted to be mostly antibiotic-susceptible. However, a recent study found that Clade I persists as a dominant serotype 3 lineage in Spain, so the generalizability and implications of clade dynamics remain unclear. Added value of this study: Early work assessing the association between changes in serotype 3 clade prevalence and PCV13 was limited by small sample sizes. In addition, studies investigating differences in clade dynamics did not comprehensively consider patient age or disease manifestations such as non-invasive pneumonia and invasive infections. In this study, we evaluated 501 serotype 3 strains from Portugal to investigate clade dynamics. This must be explored in different geographic contexts for a more robust understanding of changing serotype 3 population genomics. We also sought to define genetic determinants linked to strains from regions in which Clade I remains dominant. This is an important step towards a more mechanistic understanding of the serotype 3 CC180 lineage fitness landscape.Implications of all the available evidence: Unlike other serotypes covered by PCV13, serotype 3 has evaded vaccine control. It has been suggested that Clade I prevalence has decreased due to PCV13, which has created an expanded niche for strains from other clades and ultimately renders PCV13 less effective against serotype 3. This postulation has important implications for the future design of an improved vaccine, so this hypothesis must be thoroughly tested in diverse contexts. We find that Clade I remains the dominant lineage in Portugal even after the introduction of PCV13. We delineate Clade I pangenome heterogeneity and show that strains from Portugal and Spain share similar pangenome features in contrast to Clade I strains from regions where Clade I decreased in prevalence, which should motivate future studies to elucidate more generalizable population genomics trends that may better inform strategies for the design of an improved vaccine.}, }
@article {pmid39251928, year = {2024}, author = {Zorigt, T and Furuta, Y and Paudel, A and Kamboyi, HK and Shawa, M and Chuluun, M and Sugawara, M and Enkhtsetseg, N and Enkhtuya, J and Battsetseg, B and Munyeme, M and Hang'ombe, BM and Higashi, H}, title = {Pan-genome analysis reveals novel chromosomal markers for multiplex PCR-based specific detection of Bacillus anthracis.}, journal = {BMC infectious diseases}, volume = {24}, number = {1}, pages = {942}, pmid = {39251928}, issn = {1471-2334}, support = {23K19460//Japan Society for the Promotion of Science (JSPS) under Grants-in-Aid for Scientific Research (KAKENHI)/ ; 21K15430//Japan Society for the Promotion of Science (JSPS) under Grants-in-Aid for Scientific Research (KAKENHI)/ ; 18K19436//Japan Society for the Promotion of Science (JSPS) under Grants-in-Aid for Scientific Research (KAKENHI)/ ; JP23wm0125008//The Japan Program for Infectious Diseases Research and Infrastructure (JIDRI) from the Japan Agency for Medical Research and Development (AMED)/ ; }, abstract = {BACKGROUND: Bacillus anthracis is a highly pathogenic bacterium that can cause lethal infection in animals and humans, making it a significant concern as a pathogen and biological agent. Consequently, accurate diagnosis of B. anthracis is critically important for public health. However, the identification of specific marker genes encoded in the B. anthracis chromosome is challenging due to the genetic similarity it shares with B. cereus and B. thuringiensis.
METHODS: The complete genomes of B. anthracis, B. cereus, B. thuringiensis, and B. weihenstephanensis were de novo annotated with Prokka, and these annotations were used by Roary to produce the pan-genome. B. anthracis exclusive genes were identified by Perl script, and their specificity was examined by nucleotide BLAST search. A local BLAST alignment was performed to confirm the presence of the identified genes across various B. anthracis strains. Multiplex polymerase chain reactions (PCR) were established based on the identified genes.
RESULT: The distribution of genes among 151 whole-genome sequences exhibited three distinct major patterns, depending on the bacterial species and strains. Further comparative analysis between the three groups uncovered thirty chromosome-encoded genes exclusively present in B. anthracis strains. Of these, twenty were found in known lambda prophage regions, and ten were in previously undefined region of the chromosome. We established three distinct multiplex PCRs for the specific detection of B. anthracis by utilizing three of the identified genes, BA1698, BA5354, and BA5361.
CONCLUSION: The study identified thirty chromosome-encoded genes specific to B. anthracis, encompassing previously described genes in known lambda prophage regions and nine newly discovered genes from an undefined gene region to the best of our knowledge. Three multiplex PCR assays offer an accurate and reliable alternative method for detecting B. anthracis. Furthermore, these genetic markers have value in anthrax vaccine development, and understanding the pathogenicity of B. anthracis.}, }
@article {pmid39251347, year = {2024}, author = {Ou, S and Scheben, A and Collins, T and Qiu, Y and Seetharam, AS and Menard, CC and Manchanda, N and Gent, JI and Schatz, MC and Anderson, SN and Hufford, MB and Hirsch, CN}, title = {Differences in activity and stability drive transposable element variation in tropical and temperate maize.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.278131.123}, pmid = {39251347}, issn = {1549-5469}, abstract = {Much of the profound interspecific variation in genome content has been attributed to transposable elements (TEs). To explore the extent of TE variation within species, we developed an optimized open-source algorithm, panEDTA, to de novo annotate TEs in a pangenome context. We then generated a unified TE annotation for a maize pangenome derived from 26 reference-quality genomes, which reveals an excess of 35.1 Mb of TE sequences per genome in tropical maize relative to temperate maize. A small number (n = 216) of TE families, mainly LTR retrotransposons, drive these differences. Evidence from the methylome, transcriptome, LTR age distribution, and LTR insertional polymorphisms reveals that 64.7% of the variability is contributed by LTR families that are young, less methylated, and more expressed in tropical maize, whereas 18.5% is driven by LTR families with removal or loss in temperate maize. Additionally, we find enrichment for Young LTR families adjacent to nucleotide-binding and leucine-rich repeat (NLR) clusters of varying copy number across lines, suggesting TE activity may be associated with disease resistance in maize.}, }
@article {pmid39251346, year = {2024}, author = {Hung, TK and Liu, WC and Lai, SK and Chuang, HW and Lee, YC and Lin, HY and Hsu, CL and Chen, CY and Yang, YC and Hsu, JS and Chen, PL}, title = {Genetic complexity of killer-cell immunoglobulin-like receptor genes in human pangenome assemblies.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.278358.123}, pmid = {39251346}, issn = {1549-5469}, abstract = {The killer-cell immunoglobulin-like receptor (KIR) gene complex, a highly polymorphic region of the human genome that encodes proteins involved in immune responses, poses strong challenges in genotyping owing to its remarkable genetic diversity and structural intricacy. Accurate analysis of KIR alleles, including their structural variations, is crucial for understanding their roles in various immune responses. Leveraging the high-quality genome assemblies from the Human Pangenome Reference Consortium (HPRC), we present a novel bioinformatic tool, the structural KIR annoTator (SKIRT), to investigate gene diversity and facilitate precise KIR allele analysis. In 47 HPRC-phased assemblies, SKIRT identifies a recurrent novel KIR2DS4/3DL1 fusion gene in the paternal haplotype of HG02630 and maternal haplotype of NA19240. Additionally, SKIRT accurately identifies eight structural variants and 15 novel nonsynonymous alleles, all of which are independently validated using short-read data or quantitative polymerase chain reaction. Our study has discovered a total of 570 novel alleles, among which eight haplotypes harbor at least one KIR gene duplication, six haplotypes have lost at least one framework gene, and 75 out of 94 haplotypes (79.8%) carry at least five novel alleles, thus confirming KIR genetic diversity. These findings are pivotal in providing insights into KIR gene diversity and serve as a solid foundation for understanding the functional consequences of KIR structural variations. High-resolution genome assemblies offer unprecedented opportunities to explore polymorphic regions that are challenging to investigate using short-read sequencing methods. The SKIRT pipeline emerges as a highly efficient tool, enabling the comprehensive detection of the complete spectrum of KIR alleles within human genome assemblies.}, }
@article {pmid39245770, year = {2024}, author = {Kenneally, C and Murphy, CP and Sleator, RD and Culligan, EP}, title = {Genotypic and phenotypic characterisation of asymptomatic bacteriuria (ABU) isolates displaying bacterial interference against multi-drug resistant uropathogenic E. Coli.}, journal = {Archives of microbiology}, volume = {206}, number = {10}, pages = {394}, pmid = {39245770}, issn = {1432-072X}, mesh = {Humans ; *Bacteriuria/microbiology ; *Uropathogenic Escherichia coli/genetics/drug effects/isolation & purification/classification ; *Escherichia coli Infections/microbiology ; *Drug Resistance, Multiple, Bacterial/genetics ; *Genotype ; *Phenotype ; *Urinary Tract Infections/microbiology ; *Anti-Bacterial Agents/pharmacology ; Virulence/genetics ; Phylogeny ; Adult ; Virulence Factors/genetics ; Genome, Bacterial ; Microbial Sensitivity Tests ; }, abstract = {Escherichia coli can colonise the urogenital tract of individuals without causing symptoms of infection, in a condition referred to as asymptomatic bacteriuria (ABU). ABU isolates can protect the host against symptomatic urinary tract infections (UTIs) by bacterial interference against uropathogenic E. coli (UPEC). The aim of this study was to investigate the genotypic and phenotypic characteristics of five ABU isolates from midstream urine samples of adults. Comparative genomic and phenotypic analysis was conducted including an antibiotic resistance profile, pangenome analysis, and a putative virulence profile. Based on the genome analysis, the isolates consisted of one from phylogroup A, three from phylogroup B2, and one from phylogroup D. Two of the isolates, PUTS 58 and SK-106-1, were noted for their lack of antibiotic resistance and virulence genes compared to the prototypic ABU strain E. coli 83,972. This study provides insights into the genotypic and phenotypic profiles of uncharacterised ABU isolates, and how relevant fitness and virulence traits can impact their potential suitability for therapeutic bacterial interference.}, }
@article {pmid39244587, year = {2024}, author = {Campbell, AM and Gavilan, RG and Abanto Marin, M and Yang, C and Hauton, C and van Aerle, R and Martinez-Urtaza, J}, title = {Evolutionary dynamics of the successful expansion of pandemic Vibrio parahaemolyticus ST3 in Latin America.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {7828}, pmid = {39244587}, issn = {2041-1723}, support = {2021 SGR 00526//Generalitat de Catalunya (Government of Catalonia)/ ; NE/S007210/1//RCUK | Natural Environment Research Council (NERC)/ ; }, mesh = {*Vibrio parahaemolyticus/genetics/isolation & purification/classification ; Latin America/epidemiology ; *Evolution, Molecular ; *Vibrio Infections/epidemiology/microbiology ; Humans ; *Phylogeny ; Genome, Bacterial/genetics ; Pandemics ; Mutation ; }, abstract = {The underlying evolutionary mechanisms driving global expansions of pathogen strains are poorly understood. Vibrio parahaemolyticus is one of only two marine pathogens where variants have emerged in distinct climates globally. The success of a Vibrio parahaemolyticus clone (VpST3) in Latin America- the first spread identified outside its endemic region of tropical Asia- provided an invaluable opportunity to investigate mechanisms of VpST3 expansion into a distinct marine climate. A global collection of VpST3 isolates and novel Latin American isolates were used for evolutionary population genomics, pangenome analysis and combined with oceanic climate data. We found a VpST3 population (LatAm-VpST3) introduced in Latin America well before the emergence of this clone in India, previously considered the onset of the VpST3 epidemic. LatAm-VpST3 underwent successful adaptation to local conditions over its evolutionary divergence from Asian VpST3 isolates, to become dominant in Latin America. Selection signatures were found in genes providing resilience to the distinct marine climate. Core genome mutations and accessory gene presences that promoted survival over long dispersals or increased environmental fitness were associated with environmental conditions. These results provide novel insights into the global expansion of this successful V. parahaemolyticus clone into regions with different climate scenarios.}, }
@article {pmid39243017, year = {2024}, author = {Kim, HS and Haley, OC and Portwood Ii, JL and Harding, S and Proctor, RH and Woodhouse, MR and Sen, TZ and Andorf, CM}, title = {Fusarium Protein Toolkit: a web-based resource for structural and variant analysis of Fusarium species.}, journal = {BMC microbiology}, volume = {24}, number = {1}, pages = {326}, pmid = {39243017}, issn = {1471-2180}, support = {5010-11420-001-000-D and 5010-42000-053-000-D//USDA, Agricultural Research Service, United States/ ; 0201-88888-003-000D and 0201-88888-002-000D//USDA, Agricultural Research Service, United States/ ; 5030-21000-072-00-D//USDA, Agricultural Research Service, United States/ ; 5010-11420-001-000-D and 5010-42000-053-000-D//USDA, Agricultural Research Service, United States/ ; 5010-11420-001-000-D and 5010-42000-053-000-D//USDA, Agricultural Research Service, United States/ ; 5030-21000-072-00-D//USDA, Agricultural Research Service, United States/ ; 2030-21000-056-000-D//USDA, Agricultural Research Service, United States/ ; 5030-21000-072-00-D//USDA, Agricultural Research Service, United States/ ; }, mesh = {*Fusarium/genetics/metabolism/classification ; *Fungal Proteins/genetics/chemistry/metabolism ; *Internet ; Genome, Fungal/genetics ; Genetic Variation ; Models, Molecular ; Software ; Protein Conformation ; }, abstract = {BACKGROUND: The genus Fusarium poses significant threats to food security and safety worldwide because numerous species of the fungus cause destructive diseases and/or mycotoxin contamination in crops. The adverse effects of climate change are exacerbating some existing threats and causing new problems. These challenges highlight the need for innovative solutions, including the development of advanced tools to identify targets for control strategies.
DESCRIPTION: In response to these challenges, we developed the Fusarium Protein Toolkit (FPT), a web-based tool that allows users to interrogate the structural and variant landscape within the Fusarium pan-genome. The tool displays both AlphaFold and ESMFold-generated protein structure models from six Fusarium species. The structures are accessible through a user-friendly web portal and facilitate comparative analysis, functional annotation inference, and identification of related protein structures. Using a protein language model, FPT predicts the impact of over 270 million coding variants in two of the most agriculturally important species, Fusarium graminearum and F. verticillioides. To facilitate the assessment of naturally occurring genetic variation, FPT provides variant effect scores for proteins in a Fusarium pan-genome based on 22 diverse species. The scores indicate potential functional consequences of amino acid substitutions and are displayed as intuitive heatmaps using the PanEffect framework.
CONCLUSION: FPT fills a knowledge gap by providing previously unavailable tools to assess structural and missense variation in proteins produced by Fusarium. FPT has the potential to deepen our understanding of pathogenic mechanisms in Fusarium, and aid the identification of genetic targets for control strategies that reduce crop diseases and mycotoxin contamination. Such targets are vital to solving the agricultural problems incited by Fusarium, particularly evolving threats resulting from climate change. Thus, FPT has the potential to contribute to improving food security and safety worldwide.}, }
@article {pmid39242972, year = {2024}, author = {Masignani, V and Rappuoli, R and Pizza, M}, title = {Next generation of "magic bullets", solutions from the microbial pangenome.}, journal = {EMBO molecular medicine}, volume = {}, number = {}, pages = {}, pmid = {39242972}, issn = {1757-4684}, }
@article {pmid39238887, year = {2024}, author = {Najjari, A and Jabberi, M and Chérif, SF and Cherif, A and Ouzari, HI and Linares-Pastén, JA and Sghaier, H}, title = {Genome and pan-genome analysis of a new exopolysaccharide-producing bacterium Pyschrobacillus sp. isolated from iron ores deposit and insights into iron uptake.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1440081}, pmid = {39238887}, issn = {1664-302X}, abstract = {Bacterial exopolysaccharides (EPS) have emerged as one of the key players in the field of heavy metal-contaminated environmental bioremediation. This study aimed to characterize and evaluate the metal biosorption potential of EPS produced by a novel Psychrobacillus strain, NEAU-3TGS, isolated from an iron ore deposit at Tamra iron mine, northern Tunisia. Genomic and pan-genomic analysis of NEAU-3TGS bacterium with nine validated published Psychrobacillus species was also performed. The results showed that the NEAU-3TGS genome (4.48 Mb) had a mean GC content of 36%, 4,243 coding sequences and 14 RNA genes. Phylogenomic analysis and calculation of nucleotide identity (ANI) values (less than 95% for new species with all strains) confirmed that NEAU-3TGS represents a potential new species. Pangenomic analysis revealed that Psychrobacillus genomic diversity represents an "open" pangenome model with 33,091 homologous genes, including 65 core, 3,738 shell, and 29,288 cloud genes. Structural EPS characterization by attenuated total reflectance-Fourier transform infrared (ATR-FTIR) spectroscopy showed uronic acid and α-1,4-glycosidic bonds as dominant components of the EPS. X-ray diffraction (XRD) analysis revealed the presence of chitin, chitosan, and calcite CaCO3 and confirmed the amorphous nature of the EPS. Heavy metal bioabsorption assessment showed that iron and lead were more adsorbed than copper and cadmium. Notably, the optimum activity was observed at 37°C, pH=7 and after 3 h contact of EPS with each metal. Genomic insights on iron acquisition and metabolism in Psychrobacillus sp. NEAU-3TGS suggested that no genes involved in siderophore biosynthesis were found, and only the gene cluster FeuABCD and trilactone hydrolase genes involved in the uptake of siderophores, iron transporter and exporter are present. Molecular modelling and docking of FeuA (protein peptidoglycan siderophore-binding protein) and siderophores ferrienterobactine [Fe[+3] (ENT)][-3] and ferribacillibactine [Fe[+3] (BB)][-3] ligand revealed that [Fe[+3] (ENT)][-3] binds to Phe122, Lys127, Ile100, Gln314, Arg215, Arg217, and Gln252. Almost the same for [Fe[+3] (ENT)][-3] in addition to Cys222 and Tyr229, but not Ile100.To the best of our knowledge, this is the first report on the characterization of EPS and the adsorption of heavy metals by Psychrobacillus species. The heavy metal removal capabilities may be advantageous for using these organisms in metal remediation.}, }
@article {pmid39237905, year = {2024}, author = {Cheng, R and Zhao, Z and Tang, Y and Gu, Y and Chen, G and Sun, Y and Wang, X}, title = {Genome-wide survey of KT/HAK/KUP genes in the genus Citrullus and analysis of their involvement in K[+]-deficiency and drought stress responses in between C. lanatus and C. amarus.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {836}, pmid = {39237905}, issn = {1471-2164}, support = {HABL202228//Huai'an Natural Science Research Project/ ; JBGS[2021]072//Seed Industry Vitalization Research Projects of Jiangsu Province/ ; CARS-25//China Agriculture Research System of MOF and MARA/ ; }, mesh = {*Citrullus/genetics/metabolism/growth & development ; *Stress, Physiological/genetics ; *Plant Proteins/genetics/metabolism ; *Droughts ; *Phylogeny ; Potassium/metabolism ; Gene Expression Regulation, Plant ; Genome, Plant ; Multigene Family ; Cation Transport Proteins/genetics/metabolism ; Potassium Deficiency/genetics/metabolism ; Promoter Regions, Genetic ; }, abstract = {BACKGROUND: The KT/HAK/KUP is the largest K[+] transporter family in plants, playing crucial roles in K[+] absorption, transport, and defense against environmental stress. Sweet watermelon is an economically significant horticultural crop belonging to the genus Citrullus, with a high demand for K[+] during its growth process. However, a comprehensive analysis of the KT/HAK/KUP gene family in watermelon has not been reported.
RESULTS: 14 KT/HAK/KUP genes were identified in the genomes of each of seven Citrullus species. These KT/HAK/KUPs in watermelon were unevenly distributed across seven chromosomes. Segmental duplication is the primary driving force behind the expansion of the KT/HAK/KUP family, subjected to purifying selection during domestication (Ka/Ks < 1), and all KT/HAK/KUPs exhibit conserved motifs and could be phylogenetically classified into four groups. The promoters of KT/HAK/KUPs contain numerous cis-regulatory elements related to plant growth and development, phytohormone response, and stress response. Under K[+] deficiency, the growth of watermelon seedlings was significantly inhibited, with cultivated watermelon experiencing greater impacts (canopy width, redox enzyme activity) compared to the wild type. All KT/HAK/KUPs in C. lanatus and C. amarus exhibit specific expression responses to K[+]-deficiency and drought stress by qRT-PCR. Notably, ClG42_07g0120700/CaPI482276_07g014010 were predominantly expressed in roots and were further induced by K[+]-deficiency and drought stress. Additionally, the K[+] transport capacity of ClG42_07g0120700 under low K[+] stress was confirmed by yeast functional complementation assay.
CONCLUSIONS: KT/HAK/KUP genes in watermelon were systematically identified and analyzed at the pangenome level and provide a foundation for understanding the classification and functions of the KT/HAK/KUPs in watermelon plants.}, }
@article {pmid39235714, year = {2024}, author = {de Oliva, BHD and do Nascimento, AB and de Oliveira, JP and Guidone, GHM and Schoeps, BL and Silva, LC and Barbosa, MGL and Montini, VH and de Oliveira Junior, AG and Rocha, SPD}, title = {Genomic insights into a Proteus mirabilis strain inducing avian cellulitis.}, journal = {Brazilian journal of microbiology : [publication of the Brazilian Society for Microbiology]}, volume = {}, number = {}, pages = {}, pmid = {39235714}, issn = {1678-4405}, support = {Finance Code 001//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; }, abstract = {Proteus mirabilis, a microorganism distributed in soil, water, and animals, is clinically known for causing urinary tract infections in humans. However, recent studies have linked it to skin infections in broiler chickens, termed avian cellulitis, which poses a threat to animal welfare. While Avian Pathogenic Escherichia coli (APEC) is the primary cause of avian cellulitis, few cases of P. mirabilis involvement are reported, raising questions about the factors facilitating such occurrences. This study employed a pan-genomic approach to investigate whether unique genes exist in P. mirabilis strains causing avian cellulitis. The genome of LBUEL-A33, a P. mirabilis strain known to cause this infection, was assembled, and compared with other P. mirabilis strains isolated from poultry and other sources. Additionally, in silico serogroup analysis was conducted. Results revealed numerous genes unique to the LBUEL-A33 strain. No function in cellulitis was identified for these genes, and in silico investigation of the virulence potential of LBUEL-A33's exclusive proteins proved inconclusive. These findings support that multiple factors are necessary for P. mirabilis to cause avian cellulitis. Furthermore, this species likely employs its own unique arsenal of virulence factors, as many identified mechanisms are analogous to those of E. coli. While antigenic gene clusters responsible for serogroups were identified, no clear trend was observed, and the gene cluster of LBUEL-A33 did not show homology with any sequenced Proteus serogroups. These results reinforce the understanding that this disease is multifactorial, necessitating further research to unravel the mechanisms and underpin the development of control and prevention strategies.}, }
@article {pmid38736416, year = {2024}, author = {Brandenburg, JM and Stapleton, GS and Kline, KE and Khoury, J and Mallory, K and Machesky, KD and Ladd-Wilson, SG and Scholz, R and Freiman, J and Schwensohn, C and Palacios, A and Gieraltowski, L and Ellison, Z and Tolar, B and Webb, HE and Tagg, KA and Salah, Z and Nichols, M}, title = {Salmonella Hadar linked to two distinct transmission vehicles highlights challenges to enteric disease outbreak investigations.}, journal = {Epidemiology and infection}, volume = {152}, number = {}, pages = {e86}, doi = {10.1017/S0950268824000682}, pmid = {38736416}, issn = {1469-4409}, mesh = {*Disease Outbreaks ; Animals ; *Salmonella/genetics/classification/isolation & purification ; *Turkeys/microbiology ; *Poultry Diseases/epidemiology/microbiology/transmission ; Humans ; Salmonella Infections, Animal/epidemiology/transmission/microbiology ; Chickens/microbiology ; Multilocus Sequence Typing ; Ducks/microbiology ; Poultry/microbiology ; Salmonella Infections/epidemiology/transmission/microbiology ; }, abstract = {In 2020, an outbreak of Salmonella Hadar illnesses was linked to contact with non-commercial, privately owned (backyard) poultry including live chickens, turkeys, and ducks, resulting in 848 illnesses. From late 2020 to 2021, this Salmonella Hadar strain caused an outbreak that was linked to ground turkey consumption. Core genome multilocus sequence typing (cgMLST) analysis determined that the Salmonella Hadar isolates detected during the outbreak linked to backyard poultry and the outbreak linked to ground turkey were closely related genetically (within 0-16 alleles). Epidemiological and traceback investigations were unable to determine how Salmonella Hadar detected in backyard poultry and ground turkey were linked, despite this genetic relatedness. Enhanced molecular characterization methods, such as analysis of the pangenome of Salmonella isolates, might be necessary to understand the relationship between these two outbreaks. Similarly, enhanced data collection during outbreak investigations and further research could potentially aid in determining whether these transmission vehicles are truly linked by a common source and what reservoirs exist across the poultry industries that allow Salmonella Hadar to persist. Further work combining epidemiological data collection, more detailed traceback information, and genomic analysis tools will be important for monitoring and investigating future enteric disease outbreaks.}, }
@article {pmid39232174, year = {2024}, author = {Bolognini, D and Halgren, A and Lou, RN and Raveane, A and Rocha, JL and Guarracino, A and Soranzo, N and Chin, CS and Garrison, E and Sudmant, PH}, title = {Recurrent evolution and selection shape structural diversity at the amylase locus.}, journal = {Nature}, volume = {}, number = {}, pages = {}, pmid = {39232174}, issn = {1476-4687}, abstract = {The adoption of agriculture triggered a rapid shift towards starch-rich diets in human populations[1]. Amylase genes facilitate starch digestion, and increased amylase copy number has been observed in some modern human populations with high-starch intake[2], although evidence of recent selection is lacking[3,4]. Here, using 94 long-read haplotype-resolved assemblies and short-read data from approximately 5,600 contemporary and ancient humans, we resolve the diversity and evolutionary history of structural variation at the amylase locus. We find that amylase genes have higher copy numbers in agricultural populations than in fishing, hunting and pastoral populations. We identify 28 distinct amylase structural architectures and demonstrate that nearly identical structures have arisen recurrently on different haplotype backgrounds throughout recent human history. AMY1 and AMY2A genes each underwent multiple duplication/deletion events with mutation rates up to more than 10,000-fold the single-nucleotide polymorphism mutation rate, whereas AMY2B gene duplications share a single origin. Using a pangenome-based approach, we infer structural haplotypes across thousands of humans identifying extensively duplicated haplotypes at higher frequency in modern agricultural populations. Leveraging 533 ancient human genomes, we find that duplication-containing haplotypes (with more gene copies than the ancestral haplotype) have rapidly increased in frequency over the past 12,000 years in West Eurasians, suggestive of positive selection. Together, our study highlights the potential effects of the agricultural revolution on human genomes and the importance of structural variation in human adaptation.}, }
@article {pmid39232082, year = {2024}, author = {Rinker, DC and Sauters, TJC and Steffen, K and Gumilang, A and Raja, HA and Rangel-Grimaldo, M and Pinzan, CF and de Castro, PA and Dos Reis, TF and Delbaje, E and Houbraken, J and Goldman, GH and Oberlies, NH and Rokas, A}, title = {Strain heterogeneity in a non-pathogenic Aspergillus fungus highlights factors associated with virulence.}, journal = {Communications biology}, volume = {7}, number = {1}, pages = {1082}, pmid = {39232082}, issn = {2399-3642}, support = {R01 AI153356/AI/NIAID NIH HHS/United States ; DEB-2110404//National Science Foundation (NSF)/ ; }, mesh = {Animals ; Virulence ; *Aspergillus/pathogenicity/genetics/metabolism ; Mice ; Gliotoxin/metabolism ; Disease Models, Animal ; Pulmonary Aspergillosis/microbiology ; Female ; Genome, Fungal ; }, abstract = {Fungal pathogens exhibit extensive strain heterogeneity, including variation in virulence. Whether closely related non-pathogenic species also exhibit strain heterogeneity remains unknown. Here, we comprehensively characterized the pathogenic potentials (i.e., the ability to cause morbidity and mortality) of 16 diverse strains of Aspergillus fischeri, a non-pathogenic close relative of the major pathogen Aspergillus fumigatus. In vitro immune response assays and in vivo virulence assays using a mouse model of pulmonary aspergillosis showed that A. fischeri strains varied widely in their pathogenic potential. Furthermore, pangenome analyses suggest that A. fischeri genomic and phenotypic diversity is even greater. Genomic, transcriptomic, and metabolic profiling identified several pathways and secondary metabolites associated with variation in virulence. Notably, strain virulence was associated with the simultaneous presence of the secondary metabolites hexadehydroastechrome and gliotoxin. We submit that examining the pathogenic potentials of non-pathogenic close relatives is key for understanding the origins of fungal pathogenicity.}, }
@article {pmid39232008, year = {2024}, author = {Veseli, I and DeMers, MA and Cooper, ZS and Schechter, MS and Miller, S and Weber, L and Smith, CB and Rodriguez, LT and Schroer, WF and McIlvin, MR and Lopez, PZ and Saito, M and Dyhrman, S and Eren, AM and Moran, MA and Braakman, R}, title = {Digital Microbe: a genome-informed data integration framework for team science on emerging model organisms.}, journal = {Scientific data}, volume = {11}, number = {1}, pages = {967}, pmid = {39232008}, issn = {2052-4463}, support = {1746045//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; OCE-2019589//National Science Foundation (NSF)/ ; 542391//Simons Foundation/ ; }, mesh = {*Genome, Bacterial ; Genomics ; Software ; Flavobacteriaceae/genetics ; }, abstract = {The remarkable pace of genomic data generation is rapidly transforming our understanding of life at the micron scale. Yet this data stream also creates challenges for team science. A single microbe can have multiple versions of genome architecture, functional gene annotations, and gene identifiers; additionally, the lack of mechanisms for collating and preserving advances in this knowledge raises barriers to community coalescence around shared datasets. "Digital Microbes" are frameworks for interoperable and reproducible collaborative science through open source, community-curated data packages built on a (pan)genomic foundation. Housed within an integrative software environment, Digital Microbes ensure real-time alignment of research efforts for collaborative teams and facilitate novel scientific insights as new layers of data are added. Here we describe two Digital Microbes: 1) the heterotrophic marine bacterium Ruegeria pomeroyi DSS-3 with > 100 transcriptomic datasets from lab and field studies, and 2) the pangenome of the cosmopolitan marine heterotroph Alteromonas containing 339 genomes. Examples demonstrate how an integrated framework collating public (pan)genome-informed data can generate novel and reproducible findings.}, }
@article {pmid39228856, year = {2024}, author = {Fan, X and Chen, L and Chen, M and Zhang, N and Chang, H and He, M and Shen, Z and Zhang, L and Ding, H and Xie, Y and Huang, Y and Ke, W and Xiao, M and Zang, X and Xu, H and Fang, W and Li, S and Cao, C and Xu, Y and Shan, S and Wu, W and Chen, C and Xue, X and Wang, L}, title = {Pan-omics-based characterization and prediction of highly multidrug-adapted strains from an outbreak fungal species complex.}, journal = {Innovation (Cambridge (Mass.))}, volume = {5}, number = {5}, pages = {100681}, pmid = {39228856}, issn = {2666-6758}, abstract = {Strains from the Cryptococcus gattii species complex (CGSC) have caused the Pacific Northwest cryptococcosis outbreak, the largest cluster of life-threatening fungal infections in otherwise healthy human hosts known to date. In this study, we utilized a pan-phenome-based method to assess the fitness outcomes of CGSC strains under 31 stress conditions, providing a comprehensive overview of 2,821 phenotype-strain associations within this pathogenic clade. Phenotypic clustering analysis revealed a strong correlation between distinct types of stress phenotypes in a subset of CGSC strains, suggesting that shared determinants coordinate their adaptations to various stresses. Notably, a specific group of strains, including the outbreak isolates, exhibited a remarkable ability to adapt to all three of the most commonly used antifungal drugs for treating cryptococcosis (amphotericin B, 5-fluorocytosine, and fluconazole). By integrating pan-genomic and pan-transcriptomic analyses, we identified previously unrecognized genes that play crucial roles in conferring multidrug resistance in an outbreak strain with high multidrug adaptation. From these genes, we identified biomarkers that enable the accurate prediction of highly multidrug-adapted CGSC strains, achieving maximum accuracy and area under the curve (AUC) of 0.79 and 0.86, respectively, using machine learning algorithms. Overall, we developed a pan-omic approach to identify cryptococcal multidrug resistance determinants and predict highly multidrug-adapted CGSC strains that may pose significant clinical concern.}, }
@article {pmid39228791, year = {2024}, author = {Do, VH and Nguyen, VS and Nguyen, SH and Le, DQ and Nguyen, TT and Nguyen, CH and Ho, TH and Vo, NS and Nguyen, T and Nguyen, HA and Cao, MD}, title = {PanKA: Leveraging population pangenome to predict antibiotic resistance.}, journal = {iScience}, volume = {27}, number = {9}, pages = {110623}, pmid = {39228791}, issn = {2589-0042}, abstract = {Machine learning has the potential to be a powerful tool in the fight against antimicrobial resistance (AMR), a critical global health issue. Machine learning can identify resistance mechanisms from DNA sequence data without prior knowledge. The first step in building a machine learning model is a feature extraction from sequencing data. Traditional methods like single nucleotide polymorphism (SNP) calling and k-mer counting yield numerous, often redundant features, complicating prediction and analysis. In this paper, we propose PanKA, a method using the pangenome to extract a concise set of relevant features for predicting AMR. PanKA not only enables fast model training and prediction but also improves accuracy. Applied to the Escherichia coli and Klebsiella pneumoniae bacterial species, our model is more accurate than conventional and state-of-the-art methods in predicting AMR.}, }
@article {pmid39227987, year = {2024}, author = {Bonnici, V and Chicco, D}, title = {Seven quick tips for gene-focused computational pangenomic analysis.}, journal = {BioData mining}, volume = {17}, number = {1}, pages = {28}, pmid = {39227987}, issn = {1756-0381}, support = {InfoLife//CINI (Consorzio Interuniversitario Nazionale per l'Informatica)/ ; Project Age-It (Ageing Well in an Ageing Society)//European Union - Next Generation EU programme/ ; ReGAInS//Ministero dell'Università e della Ricerca of Italy/ ; }, abstract = {Pangenomics is a relatively new scientific field which investigates the union of all the genomes of a clade. The word pan means everything in ancient Greek; the term pangenomics originally regarded genomes of bacteria and was later intended to refer to human genomes as well. Modern bioinformatics offers several tools to analyze pangenomics data, paving the way to an emerging field that we can call computational pangenomics. Current computational power available for the bioinformatics community has made computational pangenomic analyses easy to perform, but this higher accessibility to pangenomics analysis also increases the chances to make mistakes and to produce misleading or inflated results, especially by beginners. To handle this problem, we present here a few quick tips for efficient and correct computational pangenomic analyses with a focus on bacterial pangenomics, by describing common mistakes to avoid and experienced best practices to follow in this field. We believe our recommendations can help the readers perform more robust and sound pangenomic analyses and to generate more reliable results.}, }
@article {pmid39221271, year = {2024}, author = {Trisakul, K and Hinwan, Y and Eisiri, J and Salao, K and Chaiprasert, A and Kamolwat, P and Tongsima, S and Campino, S and Phelan, J and Clark, TG and Faksri, K}, title = {Comparisons of genome assembly tools for characterization of Mycobacterium tuberculosis genomes using hybrid sequencing technologies.}, journal = {PeerJ}, volume = {12}, number = {}, pages = {e17964}, pmid = {39221271}, issn = {2167-8359}, mesh = {*Mycobacterium tuberculosis/genetics ; *Genome, Bacterial/genetics ; *High-Throughput Nucleotide Sequencing/methods ; Humans ; Polymorphism, Single Nucleotide/genetics ; Sequence Analysis, DNA/methods ; }, abstract = {BACKGROUND: Next-generation sequencing of Mycobacterium tuberculosis, the infectious agent causing tuberculosis, is improving the understanding of genomic diversity of circulating lineages and strain-types, and informing knowledge of drug resistance mutations. An increasingly popular approach to characterizing M. tuberculosis genomes (size: 4.4 Mbp) and variants (e.g., single nucleotide polymorphisms (SNPs)) involves the de novo assembly of sequence data.
METHODS: We compared the performance of genome assembly tools (Unicycler, RagOut, and RagTag) on sequence data from nine drug resistant M. tuberculosis isolates (multi-drug (MDR) n = 1; pre-extensively-drug (pre-XDR) n = 8) generated using Illumina HiSeq, Oxford Nanopore Technology (ONT) PromethION, and PacBio platforms.
RESULTS: Our investigation found that Unicycler-based assemblies had significantly higher genome completeness (~98.7%; p values = 0.01) compared to other assembler tools (RagOut = 98.6%, and RagTag = 98.6%). The genome assembly sizes (bp) across isolates and sequencers based on RagOut was significantly longer (p values < 0.001) (4,418,574 ± 8,824 bp) than Unicycler and RagTag assemblies (Unicycler = 4,377,642 ± 55,257 bp, and RagTag = 4,380,711 ± 51,164 bp). RagOut-based assemblies had the fewest contigs (~32) and the longest genome size (4,418,574 bp; vs. H37Rv reference size 4,411,532 bp) and therefore were chosen for downstream analysis. Pan-genome analysis of Illumina and PacBio hybrid assemblies revealed the greatest number of detected genes (4,639 genes; H37Rv reference contains 3,976 genes), while Illumina and ONT hybrid assemblies produced the highest number of SNPs. The number of genes from hybrid assemblies with ONT and PacBio long-reads (mean: 4,620 genes) was greater than short-read assembly alone (4,478 genes). All nine RagOut hybrid genome assemblies detected known mutations in genes associated with MDR-TB and pre-XDR-TB.
CONCLUSIONS: Unicycler software performed the best in terms of achieving contiguous genomes, whereas RagOut improved the quality of Unicycler's genome assemblies by providing a longer genome size. Overall, our approach has demonstrated that short-read and long-read hybrid assembly can provide a more complete genome assembly than short-read assembly alone by detecting pan-genomes and more genes, including IS6110, and SNPs.}, }
@article {pmid39218842, year = {2024}, author = {Mane, RS and Prasad, BD and Sahni, S and Quaiyum, Z and Sharma, VK}, title = {Biotechnological studies towards improvement of finger millet using multi-omics approaches.}, journal = {Functional & integrative genomics}, volume = {24}, number = {5}, pages = {148}, pmid = {39218842}, issn = {1438-7948}, mesh = {*Plant Breeding/methods ; *Eleusine/genetics ; *Genomics/methods ; Gene Editing/methods ; Crops, Agricultural/genetics ; Genome, Plant ; Biotechnology ; Multiomics ; }, abstract = {A plethora of studies have uncovered numerous important genes with agricultural significance in staple crops. However, when it comes to orphan crops like minor millet, genomic research lags significantly behind that of major crops. This situation has promoted a focus on exploring research opportunities in minor millets, particularly in finger millet, using cutting-edge methods. Finger millet, a coarse cereal known for its exceptional nutritional content and ability to withstand environmental stresses represents a promising climate-smart and nutritional crop in the battle against escalating environmental challenges. The existing traditional improvement programs for finger millet are insufficient to address global hunger effectively. The lack of utilization of high-throughput platforms, genome editing, haplotype breeding, and advanced breeding approaches hinders the systematic multi-omics studies on finger millet, which are essential for pinpointing crucial genes related to agronomically important and various stress responses. The growing environmental uncertainties have widened the gap between the anticipated and real progress in crop improvement. To overcome these challenges a combination of cutting-edge multi-omics techniques such as high-throughput sequencing, speed breeding, mutational breeding, haplotype-based breeding, genomic selection, high-throughput phenotyping, pangenomics, genome editing, and more along with integration of deep learning and artificial intelligence technologies are essential to accelerate research efforts in finger millet. The scarcity of multi-omics approaches in finger millet leaves breeders with limited modern tools for crop enhancement. Therefore, leveraging datasets from previous studies could prove effective in implementing the necessary multi-omics interventions to enrich the genetic resource in finger millet.}, }
@article {pmid39215522, year = {2024}, author = {Gao, J and Xu, Y}, title = {DNA sequences alignment method using sparse index on pan-genome graph.}, journal = {Journal of bioinformatics and computational biology}, volume = {}, number = {}, pages = {2450019}, doi = {10.1142/S0219720024500197}, pmid = {39215522}, issn = {1757-6334}, abstract = {The graph of sequences represents the genetic variations of pan-genome concisely and space-efficiently than multiple linear reference genome. In order to accelerate aligning reads to the graph, an index of graph-based reference genomes is used to obtain candidate locations. However, the potential combinatorial explosion of nodes on the sequence graph leads to increasing the index space and maximum memory usage of alignment process considerably, especially for large-scale datasets. For this, existing methods typically attempt to prune complex regions, or extend the length of seeds, which sacrifices the recall of alignment algorithm despite reducing space usage slightly. We present the Sparse-index of Graph (SIG) and alignment algorithm SIG-Aligner, capable of indexing and aligning at the lower memory cost. SIG builds the non-overlapping minimizers index inside nodes of sequence graph and SIG-Aligner filters out most of the false positive matches by the method based on the pigeonhole principle. Compared to Giraffe, the results of computational experiments show that SIG achieves a significant reduction in index memory space ranging from 50% to 75% for the human pan-genome graphs, while still preserving superior or comparable accuracy of alignment and the faster alignment time.}, }
@article {pmid39213169, year = {2024}, author = {Andrews, KR and Besser, TE and Stalder, T and Top, EM and Baker, KN and Fagnan, MW and New, DD and Schneider, GM and Gal, A and Andrews-Dickert, R and Hunter, SS and Beckmen, KB and Christensen, L and Justice-Allen, A and Konetchy, D and Lehman, CP and Manlove, K and Miyasaki, H and Nordeen, T and Roug, A and Cassirer, EF}, title = {Comparative genomic analysis identifies potential adaptive variation in Mycoplasma ovipneumoniae.}, journal = {Microbial genomics}, volume = {10}, number = {8}, pages = {}, doi = {10.1099/mgen.0.001279}, pmid = {39213169}, issn = {2057-5858}, mesh = {Animals ; *Mycoplasma ovipneumoniae/genetics ; *Goats/microbiology ; *Phylogeny ; Sheep/microbiology ; *Genome, Bacterial ; Genomics ; Reindeer/microbiology ; China ; Sheep Diseases/microbiology ; Adaptation, Physiological/genetics ; Australia ; Pneumonia, Mycoplasma/microbiology/veterinary ; }, abstract = {Mycoplasma ovipneumoniae is associated with respiratory disease in wild and domestic Caprinae globally, with wide variation in disease outcomes within and between host species. To gain insight into phylogenetic structure and mechanisms of pathogenicity for this bacterial species, we compared M. ovipneumoniae genomes for 99 samples from 6 countries (Australia, Bosnia and Herzegovina, Brazil, China, France and USA) and 4 host species (domestic sheep, domestic goats, bighorn sheep and caribou). Core genome sequences of M. ovipneumoniae assemblies from domestic sheep and goats fell into two well-supported phylogenetic clades that are divergent enough to be considered different bacterial species, consistent with each of these two clades having an evolutionary origin in separate host species. Genome assemblies from bighorn sheep and caribou also fell within these two clades, indicating multiple spillover events, most commonly from domestic sheep. Pangenome analysis indicated a high percentage (91.4 %) of accessory genes (i.e. genes found only in a subset of assemblies) compared to core genes (i.e. genes found in all assemblies), potentially indicating a propensity for this pathogen to adapt to within-host conditions. In addition, many genes related to carbon metabolism, which is a virulence factor for Mycoplasmas, showed evidence for homologous recombination, a potential signature of adaptation. The presence or absence of annotated genes was very similar between sheep and goat clades, with only two annotated genes significantly clade-associated. However, three M. ovipneumoniae genome assemblies from asymptomatic caribou in Alaska formed a highly divergent subclade within the sheep clade that lacked 23 annotated genes compared to other assemblies, and many of these genes had functions related to carbon metabolism. Overall, our results suggest that adaptation of M. ovipneumoniae has involved evolution of carbon metabolism pathways and virulence mechanisms related to those pathways. The genes involved in these pathways, along with other genes identified as potentially involved in virulence in this study, are potential targets for future investigation into a possible genomic basis for the high variation observed in disease outcomes within and between wild and domestic host species.}, }
@article {pmid39212644, year = {2024}, author = {Askenasy, I and Swain, JEV and Ho, PM and Nazeer, RR and Welch, A and Bényei, ÉB and Mancini, L and Nir, S and Liao, P and Welch, M}, title = {'Wild Type'.}, journal = {Microbiology (Reading, England)}, volume = {170}, number = {8}, pages = {}, doi = {10.1099/mic.0.001495}, pmid = {39212644}, issn = {1465-2080}, mesh = {*Genome, Bacterial ; Bacteria/genetics/classification/isolation & purification ; Evolution, Molecular ; Genetic Variation ; Genomics ; }, abstract = {In this opinion piece, we consider the meaning of the term 'wild type' in the context of microbiology. This is especially pertinent in the post-genomic era, where we have a greater awareness of species diversity than ever before. Genomic heterogeneity, in vitro evolution/selection pressures, definition of 'the wild', the size and importance of the pan-genome, gene-gene interactions (epistasis), and the nature of the 'wild-type gene' are all discussed. We conclude that wild type is an outdated and even misleading phrase that should be gradually phased out.}, }
@article {pmid39212029, year = {2024}, author = {de Block, T and De Baetselier, I and Van den Bossche, D and Abdellati, S and Gestels, Z and Laumen, JGE and Van Dijck, C and Vanbaelen, T and Claes, N and Vandelannoote, K and Kenyon, C and Harrison, O and Santhini Manoharan-Basil, S}, title = {Genomic oropharyngeal Neisseria surveillance detects MALDI-TOF MS species misidentifications and reveals a novel Neisseria cinerea clade.}, journal = {Journal of medical microbiology}, volume = {73}, number = {8}, pages = {}, doi = {10.1099/jmm.0.001871}, pmid = {39212029}, issn = {1473-5644}, mesh = {*Spectrometry, Mass, Matrix-Assisted Laser Desorption-Ionization/methods ; *Oropharynx/microbiology ; Humans ; *Whole Genome Sequencing ; *Multilocus Sequence Typing/methods ; *Genome, Bacterial ; Neisseria cinerea/genetics ; Phylogeny ; Neisseria/classification/genetics/isolation & purification ; Belgium ; Neisseria meningitidis/genetics/classification/isolation & purification ; Neisseriaceae Infections/microbiology/diagnosis ; }, abstract = {Introduction. Commensal Neisseria spp. are highly prevalent in the oropharynx as part of the healthy microbiome. N. meningitidis can colonise the oropharynx too from where it can cause invasive meningococcal disease. To identify N. meningitidis, clinical microbiology laboratories often rely on Matrix Assisted Laser Desorption/Ionisation Time of Flight Mass Spectrometry (MALDI-TOF MS).Hypothesis/Gap statement. N. meningitidis may be misidentified by MALDI-TOF MS.Aim. To conduct genomic surveillance of oropharyngeal Neisseria spp. in order to: (i) verify MALDI-TOF MS species identification, and (ii) characterize commensal Neisseria spp. genomes.Methodology. We analysed whole genome sequence (WGS) data from 119 Neisseria spp. isolates from a surveillance programme for oropharyngeal Neisseria spp. in Belgium. Different species identification methods were compared: (i) MALDI-TOF MS, (ii) Ribosomal Multilocus Sequence Typing (rMLST) and (iii) rplF gene species identification. WGS data were used to further characterize Neisseria species found with supplementary analyses of Neisseria cinerea genomes.Results. Based on genomic species identification, isolates from the oropharyngeal Neisseria surveilence study were composed of the following species: N. meningitidis (n=23), N. subflava (n=61), N. mucosa (n=15), N. oralis (n=8), N. cinerea (n=5), N. elongata (n=3), N. lactamica (n=2), N. bacilliformis (n=1) and N. polysaccharea (n=1). Of these 119 isolates, four isolates identified as N. meningitidis (n=3) and N. subflava (n=1) by MALDI-TOF MS, were determined to be N. polysaccharea (n=1), N. cinerea (n=2) and N. mucosa (n=1) by rMLST. Phylogenetic analyses revealed that N. cinerea isolates from the general population (n=3, cluster one) were distinct from those obtained from men who have sex with men (MSM, n=2, cluster two). The latter contained genomes misidentified as N. meningitidis using MALDI-TOF MS. These two N. cinerea clusters persisted after the inclusion of published N. cinerea WGS (n=42). Both N. cinerea clusters were further defined through pangenome and Average Nucleotide Identity (ANI) analyses.Conclusion. This study provides insights into the importance of genomic genus-wide Neisseria surveillance studies to improve the characterization and identification of the Neisseria genus.}, }
@article {pmid39211246, year = {2024}, author = {Hughes Lago, C and Blackburn, D and Kinder Pavlicek, M and Threadgill, DS}, title = {Comparative Genomic Analysis of Campylobacter rectus and Closely Related Species.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.07.26.605372}, pmid = {39211246}, issn = {2692-8205}, abstract = {Campylobacter rectus is a gram-negative, anaerobic bacterium strongly associated with periodontitis. It also causes various extraoral infections and is linked to adverse pregnancy outcomes in humans and murine models. C. rectus and related oral Campylobacters have been termed "emerging Campylobacter species" because infections by these organisms are likely underreported. Previously, no comparative methods have been used to analyze more than single C. rectus strains and until recently, very few C. rectus genomes have been publicly available. More sequenced genomes and comparative analyses are needed to study the genomic features and pathogenicity of this species. We sequenced eight new C. rectus strains and used comparative methods to identify regions of interest. An emphasis was put on the type III flagellar secretion system (T3SS), type IV secretion system (T4SS), and type VI secretion system (T6SS) because these protein complexes are important for pathogenesis in other Campylobacter species. RAST, BV-BRC, and other bioinformatics tools were used to assemble, annotate, and compare these regions in the genomes. The pan-genome of C. rectus consists of 2670 genes with core and accessory genomes of 1429 and 1241 genes, respectively. All isolates analyzed in this study have T3SS and T6SS hallmark proteins, while five of the isolates are missing a T4SS system. Twenty-one prophage clusters were identified across the panel of isolates, including four that appear intact. Overall, significant genomic islands were found, suggesting regions in the genomes that underwent horizontal gene transfer. Additionally, the high frequency of CRISPR arrays and other repetitive elements has led to genome rearrangements across the strains, including in areas adjacent to secretion system gene clusters. This study describes the substantial diversity present among C. rectus isolates and highlights tools/assays that have been developed to permit functional genomic studies. Additionally, we have expanded the studies on C. showae T4SS since we have two new C. showae genomes to report. We also demonstrate that unlike C. rectus , C showae does not demonstrate evidence of intact T6SS except for the strain CAM. The only strain of sequenced C. massilensis has neither T4SS or T6SS.}, }
@article {pmid39203545, year = {2024}, author = {Gheorghe-Barbu, I and Dragomir, RI and Gradisteanu Pircalabioru, G and Surleac, M and Dinu, IA and Gaboreanu, MD and Czobor Barbu, I}, title = {Tracing Acinetobacter baumannii's Journey from Hospitals to Aquatic Ecosystems.}, journal = {Microorganisms}, volume = {12}, number = {8}, pages = {}, doi = {10.3390/microorganisms12081703}, pmid = {39203545}, issn = {2076-2607}, support = {ERANET 243/2021//ERANET/ ; PN-III-P4-PCE-2021-1797 (PCE 96/2022)//Unitatea Executiva Pentru Finantarea Invatamantului Superior a Cercetarii Dezvoltarii si Inovarii/ ; PN-III-P1-1.1-TE-2021-1515 (TE112/2022)//Unitatea Executiva Pentru Finantarea Invatamantului Superior a Cercetarii Dezvoltarii si Inovarii/ ; PN-III-P1-1.1-PD-2021-0540 (PD102/2022)//Unitatea Executiva Pentru Finantarea Invatamantului Superior a Cercetarii Dezvoltarii si Inovarii/ ; }, abstract = {BACKGROUND: This study provides a comprehensive analysis of Acinetobacter baumannii in aquatic environments and fish microbiota by integrating culture-dependent methods, 16S metagenomics, and antibiotic resistance profiling.
METHODS: A total of 83 A. baumannii isolates were recovered using culture-dependent methods from intra-hospital infections (IHI) and wastewater (WW) and surface water (SW) samples from two southern Romanian cities in August 2022. The antibiotic susceptibility was screened using disc diffusion, microdilution, PCR, and Whole Genome Sequencing assays.
RESULTS: The highest microbial load in the analyzed samples was found in Glina, Bucharest, for both WW and SW samples across all investigated phenotypes. For Bucharest isolates, the resistance levels corresponded to fluoroquinolones > aminoglycosides > β-lactam antibiotics. In contrast, A. baumannii from upstream SW samples in Târgoviște showed the highest resistance to aminoglycosides. The blaOXA-23 gene was frequently detected in IHI, WW, and SW isolates in Bucharest, but was absent in Târgoviște. Molecular phylogeny revealed the presence of ST10 in Târgoviște isolates and ST2 in Bucharest isolates, while other minor STs were not specifically correlated with a sampling point. Using 16S rRNA sequencing, significant differences in microbial populations between the two locations was identified. The low abundance of Alphaproteobacteria and Actinobacteria in both locations suggests environmental pressures or contamination events.
CONCLUSIONS: These findings indicate significant fecal contamination and potential public health risks, emphasizing the need for improved water quality monitoring and management.}, }
@article {pmid39203478, year = {2024}, author = {Zhang, L and Kulyar, MF and Niu, T and Yang, S and Chen, W}, title = {Comparative Genomics of Limosilactobacillus reuteri YLR001 Reveals Genetic Diversity and Probiotic Properties.}, journal = {Microorganisms}, volume = {12}, number = {8}, pages = {}, doi = {10.3390/microorganisms12081636}, pmid = {39203478}, issn = {2076-2607}, support = {32202873//National Natural Science Foundation of China/ ; 22JR5RA885//Youth Science and Technology Fund Project of Gansu Province/ ; GAU-KYQD-2021-09//Scientific Research Start-up Funds for Openly recruited Doctors of Gansu Agricultural University/ ; }, abstract = {To gain deeper insights into the genomic characteristics of Limosilactobacillus reuteri (L. reuteri) YLR001 and uncover its probiotic properties, in the current study, a comprehensive analysis of its whole genome was conducted, explicitly exploring the genetic variations associated with different host organisms. The genome of YLR001 consisted of a circular 2,242,943 bp chromosome with a GC content of 38.84%, along with three circular plasmids (24,864, 38, 926, and 132,625 bp). Among the 2183 protein-coding sequences (CDSs), the specific genes associated with genetic adaptation and stress resistance were identified. We predicted the function of COG protein genes and analyzed the KEGG pathways. Comparative genome analysis revealed that the pan-genome contained 5207 gene families, including 475 core gene families and 941 strain-specific genes. Phylogenetic analysis revealed distinct host specificity among 20 strains of L. reuteri, highlighting substantial genetic diversity across different hosts. This study enhanced our comprehension of the genetic diversity of L. reuteri YLR001, demonstrated its potential probiotic characteristics, and established more solid groundwork for future applications.}, }
@article {pmid39201777, year = {2024}, author = {Gureeva, MV and Muntyan, MS and Ravin, NV and Grabovich, MY}, title = {Wastewater Treatment with Bacterial Representatives of the Thiothrix Morphotype.}, journal = {International journal of molecular sciences}, volume = {25}, number = {16}, pages = {}, doi = {10.3390/ijms25169093}, pmid = {39201777}, issn = {1422-0067}, support = {20-14-00137//Russian Science Foundation/ ; }, mesh = {*Wastewater/microbiology ; *Thiothrix/metabolism/genetics ; Water Purification/methods ; Sewage/microbiology ; Sulfides/metabolism ; Waste Disposal, Fluid/methods ; }, abstract = {Bacteria of the Thiothrix morphotype, comprising the genera Thiothrix, Thiolinea and Thiofilum, are frequently encountered in domestic and industrial wastewater treatment systems, but they are usually not clearly differentiated due to the marked similarity in their morphologies. Methods ranging from light microscopy, FISH and PCR to modern high-throughput sequencing are used to identify them. The development of these bacteria in wastewater treatment systems has both advantages and disadvantages. On the one hand, the explosive growth of these bacteria can lead to activated sludge bulking or clogging of the treatment system's membranes, with a consequent decrease in the water treatment efficiency. On the other hand, members of the Thiothrix morphotype can improve the quality of granular sludge and increase the water treatment efficiency. This may be due to their capacity for sulfide oxidation, denitrification combined with the oxidation of reduced sulfur compounds, enhanced biological phosphate removal and possibly denitrifying phosphate removal. The recently obtained pangenome of the genus Thiothrix allows the explanation, at the genomic level, of the experimental results of various studies. Moreover, this review summarizes the data on the factors affecting the proliferation of representatives of the Thiothrix morphotype.}, }
@article {pmid39201547, year = {2024}, author = {Heo, S and Jung, EJ and Park, MK and Sung, MH and Jeong, DW}, title = {Evolution and Competitive Struggles of Lactiplantibacillus plantarum under Different Oxygen Contents.}, journal = {International journal of molecular sciences}, volume = {25}, number = {16}, pages = {}, doi = {10.3390/ijms25168861}, pmid = {39201547}, issn = {1422-0067}, support = {RS-2022-IP322014//Korea Institute of Planning and Evaluation for Technology in Food, Agriculture and Forestry (IPET)/ ; }, mesh = {*Oxygen/metabolism ; *Evolution, Molecular ; Phylogeny ; Lactobacillus plantarum/genetics/metabolism ; Genome, Bacterial ; Anaerobiosis ; Animals ; Humans ; }, abstract = {Lactiplantibacillus (Lb.) plantarum is known as a benign bacterium found in various habitats, including the intestines of animals and fermented foods. Since animal intestines lack oxygen, while fermented foods provide a limited or more oxygen environment, this study aimed to investigate whether there were genetic differences in the growth of Lb. plantarum under aerobic vs. anaerobic conditions. Genomic analysis of Lb. plantarum obtained from five sources-animals, dairy products, fermented meat, fermented vegetables, and humans-was conducted. The analysis included not only an examination of oxygen-utilizing genes but also a comparative pan-genomic analysis to investigate evolutionary relationships between genomes. The ancestral gene analysis of the evolutionary pathway classified Lb. plantarum into groups A and B, with group A further subdivided into A1 and A2. It was confirmed that group A1 does not possess the narGHIJ operon, which is necessary for energy production under limited oxygen conditions. Additionally, it was found that group A1 has experienced more gene acquisition and loss compared to groups A2 and B. Despite an initial assumption that there would be genetic distinctions based on the origin (aerobic or anaerobic conditions), it was observed that such differentiation could not be attributed to the origin. However, the evolutionary process indicated that the loss of genes related to nitrate metabolism was essential in anaerobic or limited oxygen conditions, contrary to the initial hypothesis.}, }
@article {pmid39200041, year = {2024}, author = {González-Fernández, A and Mencía-Ares, O and García-Iglesias, MJ and Petrocchi-Rilo, M and Miguélez-Pérez, R and Gutiérrez-Martín, CB and Martínez-Martínez, S}, title = {Virulence and Antimicrobial Resistance Characterization of Glaesserella parasuis Isolates Recovered from Spanish Swine Farms.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {13}, number = {8}, pages = {}, doi = {10.3390/antibiotics13080741}, pmid = {39200041}, issn = {2079-6382}, support = {PID2019-105125RB-I00//Ministerio de Ciencia, Innovación y Universidades/ ; }, abstract = {Glaesserella (Haemophilus) parasuis, the causative agent of Glässer's disease, is present in most pig farms as an early colonizer of the upper respiratory tract. It exhibits remarkable variability in virulence and antimicrobial resistance (AMR), with virulent strains capable of inducing respiratory or systemic disease. This study aimed to characterize the virulence and the AMR profiles in 65 G. parasuis isolates recovered from Spanish swine farms. Virulence was assessed using multiplex leader sequence (LS)-PCR targeting vtaA genes, with all isolates identified as clinical (presumed virulent). Pathotyping based on ten pangenome genes revealed the virulent HPS_22970 as the most frequent (83.1%). Diverse pathotype profiles were observed, with 29 unique gene combinations and two isolates carrying only potentially non-virulent pangenome genes. AMR phenotyping showed widespread resistance, with 63.3% classified as multidrug resistant, and high resistance to clindamycin (98.3%) and tylosin (93.3%). A very strong association was found between certain pathotype genes and AMR phenotypes, notably between the virulent HPS_22970 and tetracycline resistance (p < 0.001; Φ = 0.58). This study reveals the wide diversity and complexity of G. parasuis pathogenicity and AMR phenotype, emphasizing the need for the targeted characterization of clinical isolates to ensure appropriate antimicrobial treatments and the implementation of prophylactic measures against virulent strains.}, }
@article {pmid39200037, year = {2024}, author = {Machado, MAM and Panzenhagen, P and Lázaro, C and Rojas, M and Figueiredo, EES and Conte-Junior, CA}, title = {Unveiling the High Diversity of Clones and Antimicrobial Resistance Genes in Escherichia coli Originating from ST10 across Different Ecological Niches.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {13}, number = {8}, pages = {}, doi = {10.3390/antibiotics13080737}, pmid = {39200037}, issn = {2079-6382}, support = {313119/2020-1, 140016/2021-0, and 310181/2021-6//National Council for Scientific and Technological Development/ ; E-26/200.891/2021, E-26/204.145/2022 and E-26/201.638/2024//Fundação Carlos Chagas Filho de Amparo à Pesquisa do Estado do Rio de Janeiro/ ; }, abstract = {In this pioneering in silico study in Peru, we aimed to analyze Escherichia coli (E. coli) genomes for antimicrobial resistance genes (ARGs) diversity and virulence and for its mobilome. For this purpose, 469 assemblies from human, domestic, and wild animal hosts were investigated. Of these genomes, three were E. coli strains (pv05, pv06, and sf25) isolated from chickens in our previous study, characterized for antimicrobial susceptibility profile, and sequenced in this study. Three other genomes were included in our repertoire for having rare cgMLSTs. The phenotypic analysis for antimicrobial resistance revealed that pv05, pv06, and sf25 strains presented multidrug resistance to antibiotics belonging to at least three classes. Our in silico analysis indicated that many Peruvian genomes included resistance genes, mainly to the aminoglycoside class, ESBL-producing E. coli, sulfonamides, and tetracyclines. In addition, through Multi-locus Sequence Typing, we found more than 180 different STs, with ST10 being the most prevalent among the genomes. Pan-genome mapping revealed that, with new lineages, the repertoire of accessory genes in E. coli increased, especially genes related to resistance and persistence, which may be carried by plasmids. The results also demonstrated several genes related to adhesion, virulence, and pathogenesis, especially genes belonging to the high pathogenicity island (HPI) from Yersinia pestis, with a prevalence of 42.2% among the genomes. The complexity of the genetic profiles of resistance and virulence in our study highlights the adaptability of the pathogen to different environments and hosts. Therefore, our in silico analysis through genome sequencing enables tracking the epidemiology of E. coli from Peru and the future development of strategies to mitigate its survival.}, }
@article {pmid39196267, year = {2024}, author = {King, AC and Kumar, N and Mellor, KC and Hawkins, PA and McGee, L and Croucher, NJ and Bentley, SD and Lees, JA and Lo, SW}, title = {Comparison of gene-by-gene and genome-wide short nucleotide sequence-based approaches to define the global population structure of Streptococcus pneumoniae.}, journal = {Microbial genomics}, volume = {10}, number = {8}, pages = {}, doi = {10.1099/mgen.0.001278}, pmid = {39196267}, issn = {2057-5858}, mesh = {*Streptococcus pneumoniae/genetics/classification ; *Multilocus Sequence Typing/methods ; *Genome, Bacterial ; *Phylogeny ; Cluster Analysis ; Humans ; Genomics/methods ; }, abstract = {Defining the population structure of a pathogen is a key part of epidemiology, as genomically related isolates are likely to share key clinical features such as antimicrobial resistance profiles and invasiveness. Multiple different methods are currently used to cluster together closely related genomes, potentially leading to inconsistency between studies. Here, we use a global dataset of 26 306 Streptococcus pneumoniae genomes to compare four clustering methods: gene-by-gene seven-locus MLST, core genome MLST (cgMLST)-based hierarchical clustering (HierCC) assignments, life identification number (LIN) barcoding and k-mer-based PopPUNK clustering (known as GPSCs in this species). We compare the clustering results with phylogenetic and pan-genome analyses to assess their relationship with genome diversity and evolution, as we would expect a good clustering method to form a single monophyletic cluster that has high within-cluster similarity of genomic content. We show that the four methods are generally able to accurately reflect the population structure based on these metrics and that the methods were broadly consistent with each other. We investigated further to study the discrepancies in clusters. The greatest concordance was seen between LIN barcoding and HierCC (adjusted mutual information score=0.950), which was expected given that both methods utilize cgMLST, but have different methods for defining an individual cluster and different core genome schema. However, the existence of differences between the two methods shows that the selection of a core genome schema can introduce inconsistencies between studies. GPSC and HierCC assignments were also highly concordant (AMI=0.946), showing that k-mer-based methods which use the whole genome and do not require the careful selection of a core genome schema are just as effective at representing the population structure. Additionally, where there were differences in clustering between these methods, this could be explained by differences in the accessory genome that were not identified in cgMLST. We conclude that for S. pneumoniae, standardized and stable nomenclature is important as the number of genomes available expands. Furthermore, the research community should transition away from seven-locus MLST, whilst cgMLST, GPSC and LIN assignments should be used more widely. However, to allow for easy comparison between studies and to make previous literature relevant, the reporting of multiple clustering names should be standardized within the research.}, }
@article {pmid39194902, year = {2024}, author = {Chen, G and Shi, G and Dai, Y and Zhao, R and Wu, Q}, title = {Graph-Based Pan-Genome Reveals the Pattern of Deleterious Mutations during the Domestication of Saccharomyces cerevisiae.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {10}, number = {8}, pages = {}, doi = {10.3390/jof10080575}, pmid = {39194902}, issn = {2309-608X}, support = {32170015//National Natural Science Foundation of China/ ; }, abstract = {The "cost of domestication" hypothesis suggests that the domestication of wild species increases the number, frequency, and/or proportion of deleterious genetic variants, potentially reducing their fitness in the wild. While extensively studied in domesticated species, this phenomenon remains understudied in fungi. Here, we used Saccharomyces cerevisiae, the world's oldest domesticated fungus, as a model to investigate the genomic characteristics of deleterious variants arising from fungal domestication. Employing a graph-based pan-genome approach, we identified 1,297,761 single nucleotide polymorphisms (SNPs), 278,147 insertion/deletion events (indels; <30 bp), and 19,967 non-redundant structural variants (SVs; ≥30 bp) across 687 S. cerevisiae isolates. Comparing these variants with synonymous SNPs (sSNPs) as neutral controls, we found that the majority of the derived nonsynonymous SNPs (nSNPs), indels, and SVs were deleterious. Heterozygosity was positively correlated with the impact of deleterious SNPs, suggesting a role of genetic diversity in mitigating their effects. The domesticated isolates exhibited a higher additive burden of deleterious SNPs (dSNPs) than the wild isolates, but a lower burden of indels and SVs. Moreover, the domesticated S. cerevisiae showed reduced rates of adaptive evolution relative to the wild S. cerevisiae. In summary, deleterious variants tend to be heterozygous, which may mitigate their harmful effects, but they also constrain breeding potential. Addressing deleterious alleles and minimizing the genetic load are crucial considerations for future S. cerevisiae breeding efforts.}, }
@article {pmid39192886, year = {2024}, author = {Gagie, T}, title = {How to Find Long Maximal Exact Matches and Ignore Short Ones.}, journal = {Developments in language theory. Conference on Developments in Language Theory}, volume = {14791}, number = {}, pages = {131-140}, doi = {10.1007/978-3-031-66159-4_10}, pmid = {39192886}, abstract = {Finding maximal exact matches (MEMs) between strings is an important task in bioinformatics, but it is becoming increasingly challenging as geneticists switch to pangenomic references. Fortunately, we are usually interested only in the relatively few MEMs that are longer than we would expect by chance. In this paper we show that under reasonable assumptions we can find all MEMs of length at least L between a pattern of length m and a text of length n in O (m) time plus extra O (l o g n) time only for each MEM of length at least nearly L using a compact index for the text, suitable for pangenomics.}, }
@article {pmid39192052, year = {2024}, author = {Huang, B and Fan, C and Chen, K and Rao, J and Ou, P and Tian, C and Yang, Y and Cooper, DN and Zhao, H}, title = {VCAT: an integrated variant function annotation tools.}, journal = {Human genetics}, volume = {}, number = {}, pages = {}, pmid = {39192052}, issn = {1432-1203}, support = {2023YFF1204900//National Key Research and Development Program of China/ ; 81971190//Natural Science Foundation of China/ ; 2021A1515010256//Guangdong Key Field Research and Development Plan/ ; 202007030010//Guangzhou Science and Technology Research Plan/ ; }, abstract = {The development of sequencing technology has promoted discovery of variants in the human genome. Identifying functions of these variants is important for us to link genotype to phenotype, and to diagnose diseases. However, it usually requires researchers to visit multiple databases. Here, we presented a one-stop webserver for variant function annotation tools (VCAT, https://biomed.nscc-gz.cn/zhaolab/VCAT/) that is the first one connecting variant to functions via the epigenome, protein, drug and RNA. VCAT is also the first one to make all annotations visualized in interactive charts or molecular structures. VCAT allows users to upload data in VCF format, and download results via a URL. Moreover, VCAT has annotated a huge number (1,262,041,068) of variants collected from dbSNP, 1000 Genomes projects, gnomAD, ICGC, TCGA, and HPRC Pangenome project. For these variants, users are able to searcher their functions, related diseases and drugs from VCAT. In summary, VCAT provides a one-stop webserver to explore the potential functions of human genomic variants including their relationship with diseases and drugs.}, }
@article {pmid39191555, year = {2024}, author = {Tiwari, VK and Saripalli, G and Sharma, PK and Poland, J}, title = {Wheat genomics: genomes, pangenomes, and beyond.}, journal = {Trends in genetics : TIG}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.tig.2024.07.004}, pmid = {39191555}, issn = {0168-9525}, abstract = {There is an urgent need to improve wheat for upcoming challenges, including biotic and abiotic stresses. Sustainable wheat improvement requires the introduction of new genes and alleles in high-yielding wheat cultivars. Using new approaches, tools, and technologies to identify and introduce new genes in wheat cultivars is critical. High-quality genomes, transcriptomes, and pangenomes provide essential resources and tools to examine wheat closely to identify and manipulate new and targeted genes and alleles. Wheat genomics has improved excellently in the past 5 years, generating multiple genomes, pangenomes, and transcriptomes. Leveraging these resources allows us to accelerate our crop improvement pipelines. This review summarizes the progress made in wheat genomics and trait discovery in the past 5 years.}, }
@article {pmid39191402, year = {2024}, author = {Aoun, N and Georgoulis, SJ and Avalos, JK and Grulla, KJ and Miqueo, K and Tom, C and Lowe-Power, TM}, title = {A pangenomic atlas reveals eco-evolutionary dynamics that shape type VI secretion systems in plant-pathogenic Ralstonia.}, journal = {mBio}, volume = {}, number = {}, pages = {e0032324}, doi = {10.1128/mbio.00323-24}, pmid = {39191402}, issn = {2150-7511}, abstract = {Soilborne Ralstonia solanacearum species complex (RSSC) pathogens disrupt microbial communities as they invade roots and fatally wilt plants. RSSC pathogens secrete antimicrobial toxins using a type VI secretion system (T6SS). To investigate how evolution and ecology have shaped the T6SS of these bacterial pathogens, we analyzed the T6SS gene content and architecture across the RSSC and their evolutionary relatives. Our analysis reveals that two ecologically similar Burkholderiaceae taxa, xylem-pathogenic RSSC and Paracidovorax, have convergently evolved to wield large arsenals of T6SS toxins. To understand the mechanisms underlying genomic enrichment of T6SS toxins, we compiled an atlas of 1,066 auxiliary T6SS toxin clusters ("aux" clusters) across 99 high-quality RSSC genomes. We classified 25 types of aux clusters with toxins that predominantly target lipids, nucleic acids, or unknown cellular substrates. The aux clusters were located in diverse genetic neighborhoods and had complex phylogenetic distributions, suggesting frequent horizontal gene flow. Phages and other mobile genetic elements account for most of the aux cluster acquisition on the chromosome but very little on the megaplasmid. Nevertheless, RSSC genomes were more enriched in aux clusters on the megaplasmid. Although the single, ancestral T6SS was broadly conserved in the RSSC, the T6SS has been convergently lost in atypical, non-soilborne lineages. Overall, our data suggest dynamic interplay between the lifestyle of RSSC lineages and the evolution of T6SSes with robust arsenals of toxins. This pangenomic atlas poises the RSSC as an emerging, tractable model to understand the role of the T6SS in shaping pathogen populations.IMPORTANCEWe explored the eco-evolutionary dynamics that shape the inter-microbial warfare mechanisms of a globally significant plant pathogen, the Ralstonia solanacearum species complex. We discovered that most Ralstonia wilt pathogens have evolved extensive and diverse repertoires of type VI secretion system-associated antimicrobial toxins. These expansive toxin arsenals potentially enhance the ability of Ralstonia pathogens to invade plant microbiomes, enabling them to rapidly colonize and kill their host plants. We devised a classification system to categorize the Ralstonia toxins. Interestingly, many of the toxin gene clusters are encoded on mobile genetic elements, including prophages, which may be mutualistic symbionts that enhance the inter-microbial competitiveness of Ralstonia wilt pathogens. Moreover, our findings suggest that the convergent loss of this multi-gene trait contributes to genome reduction in two vector-transmitted lineages of Ralstonia pathogens. Our findings demonstrate that the interplay between microbial ecology and pathogen lifestyle shapes the evolution of a genetically complex antimicrobial weapon.}, }
@article {pmid39189818, year = {2024}, author = {Olawoye, IB and Waglechner, N and McIntosh, F and Akochy, PM and Cloutier, N and Grandjean Lapierre, S and Tannir, B and Greenaway, C and Matouk, E and Poirier, L and Levesque, RC and Boyle, B and Quach, C and Soualhine, H and Batt, J and Behr, MA and Lee, RS and Guthrie, JL}, title = {Genomic Epidemiology of Mycobacterium abscessus on the Island of Montréal Not Suggestive of Healthcare-associated Person-to-Person Transmission.}, journal = {The Journal of infectious diseases}, volume = {}, number = {}, pages = {}, doi = {10.1093/infdis/jiae407}, pmid = {39189818}, issn = {1537-6613}, abstract = {BACKGROUND: Mycobacterium abscessus complex (MABC), an opportunistic nontuberculous mycobacteria (NTM), can lead to poor clinical outcomes in pulmonary infections. Conflicting data exist on person-to-person transmission of MABC within and across healthcare facilities. To investigate further, a comprehensive retrospective study across five healthcare institutions on the Island of Montréal was undertaken.
METHODS: We analyzed the genomes of 221 MABC isolates obtained from 115 individuals (2010-2018) to identify possible links. Genetic similarity, defined as ≤25 single-nucleotide polymorphisms (SNPs), was investigated through a blinded epidemiological inquiry.
RESULTS: Bioinformatics analyses identified 28 sequence types (STs), including globally observed dominant circulating clones (DCCs). Further analysis revealed 210 isolate pairs within the SNP threshold. Among these pairs, there was one possible lab contamination where isolates from different patients processed in the same lab differed by only 2 SNPs. There were 37 isolate pairs from patients who had provided specimens from the same hospital; however, epidemiological analysis found no evidence of healthcare-associated person-to-person transmission between these patients. Additionally, pan-genome analysis showed higher discriminatory power than core genome analysis for examining genomic similarity.
CONCLUSIONS: Genomics alone is insufficient to establish MABC transmission, particularly considering the genetic similarity and wide distribution of DCCs, although pan-genome analysis has the potential to add further insight. Our findings indicate that MABC infections in Montréal are unlikely attributable to healthcare-associated person-to-person transmission.}, }
@article {pmid39185728, year = {2024}, author = {Švara, A and Sun, H and Fei, Z and Khan, A}, title = {Advancing apple genetics research: Malus coronaria and Malus ioensis genomes and a gene family-based pangenome of native North American apples.}, journal = {DNA research : an international journal for rapid publication of reports on genes and genomes}, volume = {}, number = {}, pages = {}, doi = {10.1093/dnares/dsae026}, pmid = {39185728}, issn = {1756-1663}, abstract = {Wild Malus species flourished in North America long before Europeans introduced domesticated apples. Malus coronaria and M. ioensis are native to the mid-western and eastern USA, while M. angustifolia and M. fusca grow in the southeast and west, respectively. They offer disease resistance, climate and soil adaptability, and horticultural traits for apple breeding. However, their utilization remains limited due to insufficient genomic resources and specific genetics. We report high-quality phased chromosome-scale assemblies of M. coronaria and M. ioensis, generated using long-read and conformation capture sequencing. Phylogenetic and synteny analysis indicated high relatedness between these two genomes and previously-published genome of M. angustifolia, and lower relatedness with M. fusca. Gene family-based pangenome of North American Malus identified 60,211 orthogroups containing 340,087 genes. Genes involved in basic cellular and metabolic processes, growth, and development were core to the existence of these species, whereas genes involved in secondary metabolism, stress response, and interactions with other organisms were accessory and are likely associated with adaptation to specific environments. Structural variation hotspots were mostly overlapping with high gene density. This study offers novel native North American Malus genome resources that can be used to identify genes for apple breeding and understand their evolution and adaptation.}, }
@article {pmid39182659, year = {2024}, author = {Arjun, OK and Sethi, M and Parida, D and Dash, J and Kumar Das, S and Prakash, T and Senapati, S}, title = {Comprehensive physiological and genomic characterization of a potential probiotic strain, Lactiplantibacillus plantarum ILSF15, isolated from the gut of tribes of Odisha, India.}, journal = {Gene}, volume = {}, number = {}, pages = {148882}, doi = {10.1016/j.gene.2024.148882}, pmid = {39182659}, issn = {1879-0038}, abstract = {Characterizing probiotic features of organisms isolated from diverse environments can lead to the discovery of novel strains with promising functional features and health attributes. The present study attempts to characterize a novel probiotic strain isolated from the gut of the tribal population of Odisha, India. Based on 16S rRNA-based phylogeny, the strain was identified as a species of the Lactiplantibacillus genus and was named Lactiplantibacillus plantarum strain ILSF15. The current investigation focuses on elucidating this strain's genetic and physiological properties associated with probiotic attributes such as biosafety risk, host adaptation/survival traits, and beneficial functional features. The novel strain was observed, in vitro, exhibiting features such as acid/bile tolerance, adhesion to the host enteric epithelial cells, cholesterol assimilation, and pathogen exclusion, indicating its ability to survive the harsh environment of the human GIT and resist the growth of harmful microorganisms. Additionally, the L. plantarum ILSF15 strain was found to harbor genes associated with the metabolism and synthesis of various bioactive molecules, including amino acids, carbohydrates, lipids, and vitamins, highlighting the organism's ability to efficiently utilize diverse resources and contribute to the host's nutrition and health. Several genes involved in host adaptation/survival strategies and host-microbe interactions were also identified from the ILSF15 genome. Moreover, L. plantarum strains, in general, were found to have an open pangenome characterized by high genetic diversity and the absence of specific lineages associated with particular habitats, signifying its versatile nature and potential applications in probiotic and functional food industries.}, }
@article {pmid39181885, year = {2024}, author = {He, H and Leng, Y and Cao, X and Zhu, Y and Li, X and Yuan, Q and Zhang, B and He, W and Wei, H and Liu, X and Xu, Q and Guo, M and Zhang, H and Yang, L and Lv, Y and Wang, X and Shi, C and Zhang, Z and Chen, W and Zhang, B and Wang, T and Yu, X and Qian, H and Zhang, Q and Dai, X and Liu, C and Cui, Y and Wang, Y and Zheng, X and Xiong, G and Zhou, Y and Qian, Q and Shang, L}, title = {The pan-tandem repeat map highlights multiallelic variants underlying gene expression and agronomic traits in rice.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {7291}, pmid = {39181885}, issn = {2041-1723}, support = {Y2023QC36//Youth Innovation Promotion Association of the Chinese Academy of Sciences (Youth Innovation Promotion Association CAS)/ ; }, mesh = {*Oryza/genetics/growth & development/metabolism ; *Quantitative Trait Loci ; *Alleles ; *Gene Expression Regulation, Plant ; *Genome, Plant ; *Tandem Repeat Sequences/genetics ; Chromosome Mapping ; Polymorphism, Single Nucleotide ; Phenotype ; Genetic Variation ; }, abstract = {Tandem repeats (TRs) are genomic regions that tandemly change in repeat number, which are often multiallelic. Their characteristics and contributions to gene expression and quantitative traits in rice are largely unknown. Here, we survey rice TR variations based on 231 genome assemblies and the rice pan-genome graph. We identify 227,391 multiallelic TR loci, including 54,416 TR variations that are absent from the Nipponbare reference genome. Only 1/3 TR variations show strong linkage with nearby bi-allelic variants (SNPs, Indels and PAVs). Using 193 panicle and 202 leaf transcriptomic data, we reveal 485 and 511 TRs act as QTLs independently of other bi-allelic variations to nearby gene expression, respectively. Using plant height and grain width as examples, we identify and validate TRs contributions to rice agronomic trait variations. These findings would enhance our understanding of the functions of multiallelic variants and facilitate rice molecular breeding.}, }
@article {pmid39179660, year = {2024}, author = {Wang, S and Sun, S and Wang, Q and Chen, H and Guo, Y and Cai, M and Yin, Y and Ma, S and Wang, H}, title = {PathoTracker: an online analytical metagenomic platform for Klebsiella pneumoniae feature identification and outbreak alerting.}, journal = {Communications biology}, volume = {7}, number = {1}, pages = {1038}, pmid = {39179660}, issn = {2399-3642}, support = {32141001//National Natural Science Foundation of China (National Science Foundation of China)/ ; 81991533//National Natural Science Foundation of China (National Science Foundation of China)/ ; }, mesh = {*Klebsiella pneumoniae/genetics/isolation & purification ; *Metagenomics/methods ; Humans ; *Disease Outbreaks ; *Klebsiella Infections/microbiology/epidemiology/diagnosis ; *Phylogeny ; China/epidemiology ; Nanopore Sequencing/methods ; Databases, Genetic ; Genome, Bacterial ; }, abstract = {Clinical metagenomics (CMg) Nanopore sequencing can facilitate infectious disease diagnosis. In China, sub-lineages ST11-KL64 and ST11-KL47 Carbapenem-resistant Klebsiella pneumoniae (CRKP) are widely prevalent. We propose PathoTracker, a specially compiled database and arranged method for strain feature identification in CMg samples and CRKP traceability. A database targeting high-prevalence horizontal gene transfer in CRKP strains and a ST11-only database for distinguishing two sub-lineages in China were created. To make the database user-friendly, facilitate immediate downstream strain feature identification from raw Nanopore metagenomic data, and avoid the need for phylogenetic analysis from scratch, we developed data analysis methods. The methods included pre-performed phylogenetic analysis, gene-isolate-cluster index and multilevel pan-genome database and reduced storage space by 10-fold and random-access memory by 52-fold compared with normal methods. PathoTracker can provide accurate and fast strain-level analysis for CMg data after 1 h Nanopore sequencing, allowing early warning of outbreaks. A user-friendly page (http://PathoTracker.pku.edu.cn/) was developed to facilitate online analysis, including strain-level feature, species identifications and phylogenetic analyses. PathoTracker proposed in this study will aid in the downstream analysis of CMg.}, }
@article {pmid39174505, year = {2024}, author = {Fang, Y and Xiao, X and Lin, J and Lin, Q and Wang, J and Liu, K and Li, Z and Xing, J and Liu, Z and Wang, B and Qi, Y and Long, X and Zeng, X and Hu, Y and Qi, J and Qin, Y and Yang, J and Zhang, Y and Zhang, S and Ye, D and Zhang, J and Liu, J and Tang, C}, title = {Pan-genome and phylogenomic analyses highlight Hevea species delineation and rubber trait evolution.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {7232}, pmid = {39174505}, issn = {2041-1723}, mesh = {*Hevea/genetics ; *Genome, Plant ; *Phylogeny ; *Rubber/metabolism ; Plant Breeding ; Genetic Variation ; Evolution, Molecular ; Multigene Family ; }, abstract = {The para rubber tree (Hevea brasiliensis) is the world's sole commercial source of natural rubber, a vital industrial raw material. However, the narrow genetic diversity of this crop poses challenges for rubber breeding. Here, we generate high-quality de novo genome assemblies for three H. brasiliensis cultivars, two H. brasiliensis wild accessions, and three other Hevea species (H. nitida, H. pauciflora, and H. benthamiana). Through analyzing genomes of 94 Hevea accessions, we identify five distinct lineages that do not align with their previous species delineations. We discover multiple accessions with hybrid origins between these lineages, indicating incomplete reproductive isolation between them. Only two out of four wild lineages have been introduced to commercial rubber cultivars. Furthermore, we reveal that the rubber production traits emerged following the development of a large REF/SRPP gene cluster and its functional specialization in rubber-producing laticifers within this genus. These findings would enhance rubber breeding and benefit research communities.}, }
@article {pmid39170454, year = {2024}, author = {Mangal, V and Verma, LK and Singh, SK and Saxena, K and Roy, A and Karn, A and Rohit, R and Kashyap, S and Bhatt, A and Sood, S}, title = {Triumphs of genomic-assisted breeding in crop improvement.}, journal = {Heliyon}, volume = {10}, number = {15}, pages = {e35513}, pmid = {39170454}, issn = {2405-8440}, abstract = {Conventional breeding approaches have played a significant role in meeting the food demand remarkably well until now. However, the increasing population, yield plateaus in certain crops, and limited recombination necessitate using genomic resources for genomics-assisted crop improvement programs. As a result of advancements in the next-generation sequence technology, GABs have developed dramatically to characterize allelic variants and facilitate their rapid and efficient incorporation in crop improvement programs. Genomics-assisted breeding (GAB) has played an important role in harnessing the potential of modern genomic tools, exploiting allelic variation from genetic resources and developing cultivars over the past decade. The availability of pangenomes for major crops has been a significant development, albeit with varying degrees of completeness. Even though adopting these technologies is essentially determined on economic grounds and cost-effective assays, which create a wealth of information that can be successfully used to exploit the latent potential of crops. GAB has been instrumental in harnessing the potential of modern genomic resources and exploiting allelic variation for genetic enhancement and cultivar development. GAB strategies will be indispensable for designing future crops and are expected to play a crucial role in breeding climate-smart crop cultivars with higher nutritional value.}, }
@article {pmid39166875, year = {2024}, author = {Chan, DTC and Bernstein, HC}, title = {Pangenomic landscapes shape performances of a synthetic genetic circuit across Stutzerimonas species.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0084924}, doi = {10.1128/msystems.00849-24}, pmid = {39166875}, issn = {2379-5077}, abstract = {Engineering identical genetic circuits into different species typically results in large differences in performance due to the unique cellular environmental context of each host, a phenomenon known as the "chassis-effect" or "context-dependency". A better understanding of how genomic and physiological contexts underpin the chassis-effect will improve biodesign strategies across diverse microorganisms. Here, we combined a pangenomic-based gene expression analysis with quantitative measurements of performance from an engineered genetic inverter device to uncover how genome structure and function relate to the observed chassis-effect across six closely related Stutzerimonas hosts. Our results reveal that genome architecture underpins divergent responses between our chosen non-model bacterial hosts to the engineered device. Specifically, differential expression of the core genome, gene clusters shared between all hosts, was found to be the main source of significant concordance to the observed differential genetic device performance, whereas specialty genes from respective accessory genomes were not significant. A data-driven investigation revealed that genes involved in denitrification and components of trans-membrane transporter proteins were among the most differentially expressed gene clusters between hosts in response to the genetic device. Our results show that the chassis-effect can be traced along differences among the most conserved genome-encoded functions and that these differences create a unique biodesign space among closely related species.IMPORTANCEContemporary synthetic biology endeavors often default to a handful of model organisms to host their engineered systems. Model organisms such as Escherichia coli serve as attractive hosts due to their tractability but do not necessarily provide the ideal environment to optimize performance. As more novel microbes are domesticated for use as biotechnology platforms, synthetic biologists are urged to explore the chassis-design space to optimize their systems and deliver on the promises of synthetic biology. The consequences of the chassis-effect will therefore only become more relevant as the field of biodesign grows. In our work, we demonstrate that the performance of a genetic device is highly dependent on the host environment it operates within, promoting the notion that the chassis can be considered a design variable to tune circuit function. Importantly, our results unveil that the chassis-effect can be traced along similarities in genome architecture, specifically the shared core genome. Our study advocates for the exploration of the chassis-design space and is a step forward to empowering synthetic biologists with knowledge for more efficient exploration of the chassis-design space to enable the next generation of broad-host-range synthetic biology.}, }
@article {pmid39166872, year = {2024}, author = {Wang, L and Cheng, X and Guo, Y and Cao, J and Sun, M and Hwang, J-S and Liu, R and Fang, J}, title = {Novel isolates of hydrogen-oxidizing chemolithoautotrophic Sulfurospirillum provide insight to the functions and adaptation mechanisms of Campylobacteria in shallow-water hydrothermal vents.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0014824}, doi = {10.1128/msystems.00148-24}, pmid = {39166872}, issn = {2379-5077}, abstract = {Enhancing the availability of representative isolates from hydrothermal vents (HTVs) is imperative for comprehending the microbial processes that propel the vent ecosystem. In recent years, Campylobacteria have emerged as the predominant and ubiquitous taxon across both shallow and deep-sea vent systems. Nevertheless, only a few isolates have been cultured, primarily originating from deep-sea HTVs. Presently, no cultivable isolates of Campylobacteria are accessible in shallow water vent systems (<200 m), which exhibit markedly distinct environmental conditions from their deep-sea counterparts. In this study, we enriched a novel isolate (genus Sulfurospirillum, Campylobacteria) from shallow-water HTVs of Kueishan Island. Genomic and physiological analysis revealed that this novel Campylobacteria species grows on a variety of substrate and carbon/energy sources. The pan-genome and phenotypic comparisons with 12 previously isolated Sulfurospirillum species from different environments supported the identification of functional features in Sulfurospirillum genomes crucial for adaptation to vent environments, such as sulfur oxidation, carbon fixation, biofilm formation, and benzoate/toluene degradation, as well as diverse genes related with signal transportation. To conclude, the metabolic characteristics of this novel Campylobacteria augment our understanding of Campylobacteria spanning from deep-sea to shallow-water vent systems.IMPORTANCECampylobacteria emerge as the dominant and ubiquitous taxa within vent systems, playing important roles in the vent ecosystems. However, isolated representatives of Campylobacteria have been mainly from the deep-sea hydrothermal fields, leaving a significant knowledge gap regarding the functions, activities, and adaptation strategies of the vent microorganisms in shallow-water hydrothermal vents (HTVs). This study bridges this gap by providing insights into the phenomics and genomic diversity of genus Sulfurospirillum (order Campylobacterales, class Campylobacteria) based on data derived from a novel isolate obtained from shallow-water HTVs. Our mesophilic isolate of Sulfurospirillum not only augments the genus diversity of Campylobacteria pure cultures derived from vent systems but also serves as the inaugural reference isolate for Campylobacteria in shallow-water environments.}, }
@article {pmid39165128, year = {2024}, author = {Trost, K and Knopp, MR and Wimmer, JLE and Tria, FDK and Martin, WF}, title = {A universal and constant rate of gene content change traces pangenome flux to LUCA.}, journal = {FEMS microbiology letters}, volume = {}, number = {}, pages = {}, doi = {10.1093/femsle/fnae068}, pmid = {39165128}, issn = {1574-6968}, abstract = {Prokaryotic genomes constantly undergo gene flux via lateral gene transfer, generating a pangenome structure consisting of a conserved core genome surrounded by a more variable accessory genome shell. Over time, flux generates change in genome content. Here we measure and compare the rate of genome flux for 5 655 prokaryotic genomes as a function of amino acid sequence divergence in 36 universally distributed proteins of the informational core (IC). We find a clock of gene content change. The long-term average rate of gene content flux is remarkably constant across all higher prokaryotic taxa sampled, whereby the size of the accessory genome-the proportion of the genome harboring gene content difference for genome pairs-varies across taxa. The proportion of species-level accessory genes per genome, varies from 0% (Chlamydia) to 30-33% (Alphaproteobacteria, Gammaproteobacteria, Clostridia). A clock-like rate of gene content change across all prokaryotic taxa sampled suggest that pangenome structure is a general feature of prokaryotic genomes and that it has been in existence since the divergence of bacteria and archaea.}, }
@article {pmid39162515, year = {2024}, author = {Wang, Z and Hülpüsch, C and Foesel, B and Traidl-Hoffmann, C and Reiger, M and Schloter, M}, title = {Genomic and functional divergence of Staphylococcus aureus strains from atopic dermatitis patients and healthy individuals: insights from global and local scales.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0057124}, doi = {10.1128/spectrum.00571-24}, pmid = {39162515}, issn = {2165-0497}, abstract = {Atopic dermatitis (AD) is the most common chronic inflammatory skin disease worldwide and is characterized by a complex interplay with skin microbiota, with Staphylococcus aureus often abnormally more abundant in AD patients than in healthy individuals (HE). S. aureus harbors diverse strains with varied genetic compositions and functionalities, which exhibit differential connections with the severity of AD. However, the differences in S. aureus strains between AD and HE remain unclear, with most variations seen at a specific geographic level, implying spontaneous adaptations rather than systematic distinctions. This study presents genomic and functional differences between these S. aureus strains from AD and HE on both global and local levels. We observed reduced gene content diversity but increased functional variation in the global AD-associated strains. Two additional AD-dominant clusters emerged, with Cluster 1 enriched in transposases and Cluster 2 showcasing genes linked to adaptability and antibiotic resistance. Particularly, robust evidence illustrates that the lantibiotic operon of S. aureus, involved in the biosynthesis of lantibiotics, was acquired via horizontal gene transfer from environmental bacteria. Comparisons of the gene abundance profiles in functional categories also indicate limited zoonotic potential between human and animal isolates. Local analysis mirrored global gene diversity but showed distinct functional variations between AD and HE strains. Overall, this research provides foundational insights into the genomic evolution, adaptability, and antibiotic resistance of S. aureus, with significant implications for clinical microbiology.IMPORTANCEOur study uncovers significant genomic variations in Staphylococcus aureus strains associated with atopic dermatitis. We observed adaptive evolution tailored to the disease microenvironment, characterized by a smaller pan-genome than strains from healthy skin both on the global and local levels. Key functional categories driving strain diversification include "replication and repair" and "transporters," with transposases being pivotal. Interestingly, the local strains predominantly featured metal-related genes, whereas global ones emphasized antimicrobial resistances, signifying scale-dependent diversification nuances. We also pinpointed horizontal gene transfer events, indicating interactions between human-associated and environmental bacteria. These insights expand our comprehension of S. aureus's genetic adaptation in atopic dermatitis, yielding valuable implications for clinical approaches.}, }
@article {pmid39160127, year = {2024}, author = {Song, Y and Long, C and Wang, Y and An, Y and Lu, Y}, title = {Advancements in multi-omics for nutraceutical enhancement and traits improvement in buckwheat.}, journal = {Critical reviews in biotechnology}, volume = {}, number = {}, pages = {1-26}, doi = {10.1080/07388551.2024.2373282}, pmid = {39160127}, issn = {1549-7801}, abstract = {Buckwheat (Fagopyrum spp.) is a typical pseudocereal, valued for its extensive nutraceutical potential as well as its centuries-old cultivation. Tartary buckwheat and common buckwheat have been used globally and become well-known nutritious foods due to their high quantities of: proteins, flavonoids, and minerals. Moreover, its increasing demand makes it critical to improve nutraceutical, traits and yield. In this review, bioactive compounds accumulated in buckwheat were comprehensively evaluated according to their chemical structure, properties, and physiological function. Biosynthetic pathways of flavonoids, phenolic acids, and fagopyrin were methodically summarized, with the regulation of flavonoid biosynthesis. Although there are classic synthesis pathways presented in the previous research, the metabolic flow of how these certain compounds are being synthesized in buckwheat still remains uncovered. The functional genes involved in the biosynthesis of flavonols, stress response, and plant development were identified based on multi-omics research. Furthermore, it delves into the applications of multi-omics in improving buckwheat's agronomic traits, including: yield, nutritional content, stress resilience, and bioactive compounds biosynthesis. While pangenomics combined with other omics to mine elite genes, the regulatory network and mechanism of specific agronomic traits and biosynthetic of bioactive components, and developing a more efficient genetic transformation system for genetic engineering require further investigation for the execution of breeding designs aimed at enhancing desirable traits in buckwheat. This critical review will provide a comprehensive understanding of multi-omics for nutraceutical enhancement and traits improvement in buckwheat.}, }
@article {pmid39158344, year = {2024}, author = {Klingström, T and Zonabend König, E and Zwane, AA}, title = {Beyond the hype: using AI, big data, wearable devices, and the internet of things for high-throughput livestock phenotyping.}, journal = {Briefings in functional genomics}, volume = {}, number = {}, pages = {}, doi = {10.1093/bfgp/elae032}, pmid = {39158344}, issn = {2041-2657}, support = {//Livestock Genetics Flagship of the Livestock CGIAR Research Program/ ; }, abstract = {Phenotyping of animals is a routine task in agriculture which can provide large datasets for the functional annotation of genomes. Using the livestock farming sector to study complex traits enables genetics researchers to fully benefit from the digital transformation of society as economies of scale substantially reduces the cost of phenotyping animals on farms. In the agricultural sector genomics has transitioned towards a model of 'Genomics without the genes' as a large proportion of the genetic variation in animals can be modelled using the infinitesimal model for genomic breeding valuations. Combined with third generation sequencing creating pan-genomes for livestock the digital infrastructure for trait collection and precision farming provides a unique opportunity for high-throughput phenotyping and the study of complex traits in a controlled environment. The emphasis on cost efficient data collection mean that mobile phones and computers have become ubiquitous for cost-efficient large-scale data collection but that the majority of the recorded traits can still be recorded manually with limited training or tools. This is especially valuable in low- and middle income countries and in settings where indigenous breeds are kept at farms preserving more traditional farming methods. Digitalization is therefore an important enabler for high-throughput phenotyping for smaller livestock herds with limited technology investments as well as large-scale commercial operations. It is demanding and challenging for individual researchers to keep up with the opportunities created by the rapid advances in digitalization for livestock farming and how it can be used by researchers with or without a specialization in livestock. This review provides an overview of the current status of key enabling technologies for precision livestock farming applicable for the functional annotation of genomes.}, }
@article {pmid39158313, year = {2024}, author = {Rana, R and Nayak, PK and Madhavan, VN and Sonti, RV and Patel, HK and Patil, PB}, title = {Comparative genomics-based insights into Xanthomonas indica, a non-pathogenic species of healthy rice microbiome with bioprotection function.}, journal = {Applied and environmental microbiology}, volume = {}, number = {}, pages = {e0084824}, doi = {10.1128/aem.00848-24}, pmid = {39158313}, issn = {1098-5336}, abstract = {Xanthomonas species are major pathogens of plants and have been studied extensively. There is increasing recognition of the importance of non-pathogenic species within the same genus. With this came the need to understand the genomic and functional diversity of non-pathogenic Xanthomonas (NPX) at the species and strain level. This study reports isolation and investigation into the genomic diversity and variation in NPX isolates, chiefly Xanthomonas indica, a newly discovered NPX species from rice. The study establishes the relationship of X. indica strains within clade I of Xanthomonads with another NPX species, X. sontii, also associated with rice seeds. Identification of highly diverse strains, open-pan genome, and systematic hyper-variation at the lipopolysaccharide biosynthetic locus when compared to pathogenic Xanthomonas indicates the acquisition of new functions for adaptation. Furthermore, comparative genomics studies established the absence of major virulence genes such as type III secretion system and effectors, which are present in the pathogens, and the presence of a known bacterial-killing type IV secretion system (X-T4SS). The diverse non-pathogenic strains of X. indica and X. sontii were found to protect rice from bacterial leaf blight pathogen, X. oryzae pv. oryzae (Xoo). The absence of phenotype of an X-T4SS mutant suggests redundancy in the genetic basis of the mechanisms involved in the bioprotection function, which may include multiple genetic loci, such as putative bacteriocin-encoding gene clusters and involvement of other factors such as nutrient and niche competition apart from induction of innate immunity through shared microbial-associated molecular patterns. The rice-NPX community and its pathogenic counterpart can be a promising model for understanding plant-microbe-microbiome interaction studies.IMPORTANCEThe Xanthomonas group of bacteria is known for its characteristic lifestyle as a phytopathogen. However, the discovery of non-pathogenic Xanthomonas (NPX) species is a major shift in understanding this group of bacteria. Multi-strain, in-depth genomic, evolutionary and functional studies on each of these NPX species are still lacking. This study on diverse non-pathogenic strains provides novel insights into genome diversity, dynamics, and evolutionary trends of NPX species from rice microbiome apart from its relationship with other relatives that form a sub-clade. Interestingly, we also uncovered that NPX species protect rice from pathogenic Xanthomonas species. The plant protection property shows their importance as a part of a healthy plant microbiome. Furthermore, finding an open pan-genome and large-scale variation at lipopolysaccharide biosynthetic locus indicates a significant role of the NPX community in host adaptation. The findings and high-quality genomic resources of NPX species and the strains will allow further systematic molecular and host-associated microbial community studies for plant health.}, }
@article {pmid39155697, year = {2024}, author = {Mederos, MA and Court, CM and Dipardo, BJ and Pisegna, JR and Dawson, DW and Joe Hines, O and Donahue, TR and Graeber, TG and Girgis, MD and Tomlinson, JS}, title = {Oncogenic pathway signatures predict the risk of progression and recurrence in well-differentiated pancreatic neuroendocrine tumors.}, journal = {Journal of surgical oncology}, volume = {}, number = {}, pages = {}, doi = {10.1002/jso.27830}, pmid = {39155697}, issn = {1096-9098}, support = {P01CA168585//NCI/NIH/ ; R01CA222877//NCI/NIH/ ; R01CA227089//NCI/NIH/ ; P50CA092131//NCI/NIH/ ; //W.M. Keck Foundation/ ; //Hirshberg Foundation for Pancreatic Research/ ; }, abstract = {BACKGROUND: Pancreatic neuroendocrine tumors (pNETs) are genomically diverse tumors. The management of newly diagnosed well-differentiated pNETs is limited by a lack of sensitivity of existing biomarkers for prognostication. Our goal was to investigate the potential utility of genetic markers as a predictor of progression-free survival (PFS) and recurrence-free survival (RFS).
METHODS: Whole-exome sequencing of resected well-differentiated, low and intermediate-grade (G1 and G2) pNETs and normal adjacent tissue from patients who underwent resection from 2005 to 2015 was performed. Genetic alterations were classified using pan-genomic and oncogenic pathway classifications. Additional samples with genetic and clinicopathologic data available were obtained from the publicly available International Cancer Genome Consortium (ICGC) database and included in the analysis. The prognostic relevance of these genomic signatures on PFS and RFS was analyzed.
RESULTS: Thirty-one patients who underwent resection for pNET were identified. Genomic analysis of mutational, copy number, cytogenetic, and complex phenomena revealed similar patterns to prior studies of pNETs with relatively few somatic gene mutations but numerous instances of copy number changes. Analysis of genomic and clinicopathologic outcomes using the combined data from our study as well as the ICGC pNET cohort (n = 124 patients) revealed that the recurrent pattern of whole chromosome loss (RPCL) and metastatic disease were independently associated with disease progression. When evaluating patients with local disease at the time of resection, RPCL and alterations in the TGFβ oncogenic pathway were independently associated with the risk of recurrence.
CONCLUSIONS: Well-differentiated pNETs are genomically diverse tumors. Pathway signatures may be prognostic for predicting disease progression and recurrence.}, }
@article {pmid39151939, year = {2024}, author = {Woodhouse, MR and Cannon, EK and Portwood, JL and Gardiner, JM and Hayford, RK and Haley, O and Andorf, CM}, title = {Tools and Resources at the Maize Genetics and Genomics Database (MaizeGDB).}, journal = {Cold Spring Harbor protocols}, volume = {}, number = {}, pages = {}, doi = {10.1101/pdb.over108430}, pmid = {39151939}, issn = {1559-6095}, abstract = {The Maize Genetics and Genomics Database (MaizeGDB) is the community resource for maize researchers, offering a suite of tools, informatics resources, and curated data sets to support maize genetics, genomics, and breeding research. Here, we provide an overview of the key resources available at MaizeGDB, including maize genomes, comparative genomics, and pan-genomics tools. This review aims to familiarize users with the range of options available for maize research and highlights the importance of MaizeGDB as a central hub for the maize research community. By providing a detailed snapshot of the database's capabilities, we hope to enable researchers to make use of MaizeGDB's resources, ultimately assisting them to better study the evolution and diversity of maize.}, }
@article {pmid39149335, year = {2024}, author = {Ma, W and Chaisson, MJ}, title = {High-resolution global diversity copy number variation maps and association with ctyper.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.08.11.607269}, pmid = {39149335}, issn = {2692-8205}, abstract = {Genetic analysis of copy number variations (CNVs), especially in complex regions, is challenging due to reference bias and ambiguous alignment of Next-Generation Sequencing (NGS) reads to repetitive DNA. Consequently, aggregate copy numbers are typically analyzed, overlooking variation between gene copies. Pangenomes contain diverse sequences of gene copies and enable the study of sequence-resolved CNVs. We developed a method, ctyper, to discover sequence-resolved CNVs in NGS data by leveraging CNV genes from pangenomes. From 118 public assemblies, we constructed a database of 3,351 CNV genes, distinguishing each gene copy as a resolved allele. We used phylogenetic trees to organize alleles into highly similar allele-types that revealed events of linked small variants due to stratification, structural variation, conversion, and duplication. Saturation analysis showed that new samples share an average of 97.8% CNV alleles with the database. The ctyper method traces individual gene copies in NGS data to their nearest alleles in the database and identifies allele-specific copy numbers using multivariate linear regression on k-mer counts and phylogenetic clustering. Applying ctyper to 1000 Genomes Project (1kgp) samples showed Hardy-Weinberg Equilibrium on 99.3% of alleles and a 97.6% F1 score on genotypes based on 641 1kgp trios. Leave-one-out analysis on 39 assemblies matched to 1kgp samples showed that 96.5% of variants in query sequences match the genotyped allele. Genotyping 1kgp data revealed 226 population-specific CNVs, including a conversion on SMN2 to SMN1, potentially impacting Spinal Muscular Atrophy diagnosis in Africans. Our results revealed two models of CNV: recent CNVs due to ongoing duplications and polymorphic CNVs from ancient paralogs missing from the reference. To measure the functional impact of CNVs, after merging allele-types, we conducted genome-wide Quantitative Trait Locus analysis on 451 1kgp samples with Geuvadis rRNA-seqs. Using a linear mixed model, our genotyping enables the inference of relative expression levels of paralogs within a gene family. In a global evolutionary context, 150 out of 1,890 paralogs (7.94%) and 546 out of 16,628 orthologs (3.28%) had significantly different expression levels, suggesting divergent expression from original genes. Specific examples include lower expression on the converted SMN and increased expression on translocated AMY2B (GTEx pancreas data). Our method enables large cohort studies on complex CNVs to uncover hidden health impacts and overcome reference bias.}, }
@article {pmid39147372, year = {2024}, author = {Jara-Servin, A and Mejia, G and Romero, MF and Peimbert, M and Alcaraz, LD}, title = {Unravelling the genomic and environmental diversity of the ubiquitous Solirubrobacter.}, journal = {Environmental microbiology}, volume = {26}, number = {8}, pages = {e16685}, doi = {10.1111/1462-2920.16685}, pmid = {39147372}, issn = {1462-2920}, support = {CVU 725278//Consejo Nacional de Ciencia y Tecnología/ ; IN206824//Universidad Nacional Autónoma de México, DGAPA-PAPIIT-UNAM/ ; }, mesh = {*Soil Microbiology ; *Phylogeny ; *Genome, Bacterial ; *RNA, Ribosomal, 16S/genetics ; Rhizosphere ; Genomics ; Metagenomics ; Genetic Variation ; }, abstract = {Solirubrobacter, though widespread in soils and rhizospheres, has been relatively unexplored despite its ubiquity. Previously acknowledged as a common soil bacterium, our research explores its phylogenomics, pangenomics, environmental diversity, and interactions within bacterial communities. By analysing seven genomic sequences, we have identified a pangenome consisting of 19,645 protein families, of which 2644 are shared across all studied genomes, forming the core genome. Interestingly, despite the non-motility of reported isolates, we discovered genes for flagellin and a partial flagellum assembly pathway. Examining the 16S ribosomal RNA genes of Solirubrobacter revealed substantial diversity, with 3166 operational taxonomic units identified in Mexican soils. Co-occurrence network analysis further demonstrated its significant integration within bacterial communities. Through phylogenomic scrutiny, we conclusively excluded the NCBI's GCA_009993245.1 genome from being classified as a Solirubrobacter. Our research into the metagenomic diversity of Solirubrobacter across various environments confirmed its presence in rhizospheres and certain soils, underscoring its adaptability. The geographical ubiquity of Solirubrobacter in rhizospheres raises intriguing questions regarding its potential interactions with plant hosts and the biotic and abiotic factors influencing its presence in soil. Given its ecological significance and genetic diversity, Solirubrobacter warrants further investigation as a potentially crucial yet underappreciated keystone species.}, }
@article {pmid39144212, year = {2024}, author = {Gtari, M and Maaoui, R and Ghodhbane-Gtari, F and Ben Slama, K and Sbissi, I}, title = {MAGs-centric crack: how long will, spore-positive Frankia and most Protofrankia, microsymbionts remain recalcitrant to axenic growth?.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1367490}, pmid = {39144212}, issn = {1664-302X}, abstract = {Nearly 50 years after the ground-breaking isolation of the primary Comptonia peregrina microsymbiont under axenic conditions, efforts to isolate a substantial number of Protofrankia and Frankia strains continue with enduring challenges and complexities. This study aimed to streamline genomic insights through comparative and predictive tools to extract traits crucial for isolating specific Frankia in axenic conditions. Pangenome analysis unveiled significant genetic diversity, suggesting untapped potential for cultivation strategies. Shared metabolic strategies in cellular components, central metabolic pathways, and resource acquisition traits offered promising avenues for cultivation. Ecological trait extraction indicated that most uncultured strains exhibit no apparent barriers to axenic growth. Despite ongoing challenges, potential caveats, and errors that could bias predictive analyses, this study provides a nuanced perspective. It highlights potential breakthroughs and guides refined cultivation strategies for these yet-uncultured strains. We advocate for tailored media formulations enriched with simple carbon sources in aerobic environments, with atmospheric nitrogen optionally sufficient to minimize contamination risks. Temperature adjustments should align with strain preferences-28-29°C for Frankia and 32-35°C for Protofrankia-while maintaining an alkaline pH. Given potential extended incubation periods (predicted doubling times ranging from 3.26 to 9.60 days, possibly up to 21.98 days), patience and rigorous contamination monitoring are crucial for optimizing cultivation conditions.}, }
@article {pmid39141833, year = {2024}, author = {Fortin, SG and Sun, X and Jayakumar, A and Ward, BB}, title = {Nitrite-oxidizing bacteria adapted to low oxygen conditions dominate nitrite oxidation in marine oxygen minimum zones.}, journal = {The ISME journal}, volume = {}, number = {}, pages = {}, doi = {10.1093/ismejo/wrae160}, pmid = {39141833}, issn = {1751-7370}, abstract = {Nitrite is a central molecule in the nitrogen cycle because nitrite oxidation to nitrate (an aerobic process) retains fixed nitrogen in a system and its reduction to dinitrogen gas (anaerobic) reduces the fixed nitrogen inventory. Despite its acknowledged requirement for oxygen, nitrite oxidation is observed in oxygen-depleted layers of the ocean's oxygen minimum zones (OMZs), challenging the current understanding of OMZ nitrogen cycling. Previous attempts to determine whether nitrite-oxidizing bacteria in the anoxic layer differ from known nitrite oxidizers in the open ocean were limited by cultivation difficulties and sequencing depth. Here, we construct 31 draft genomes of nitrite-oxidizing bacteria from global OMZs. The distribution of nitrite oxidation rates, abundance and expression of nitrite oxidoreductase genes, and relative abundance of nitrite-oxidizing bacterial draft genomes from the same samples all show peaks in the core of the oxygen-depleted zone (ODZ) and are all highly correlated in depth profiles within the major ocean oxygen minimum zones. The ODZ nitrite oxidizers are not found in the Tara Oceans global dataset (the most complete oxic ocean dataset), and the major nitrite oxidizers found in the oxygenated ocean do not occur in ODZ waters. A pangenomic analysis shows the ODZ nitrite oxidizers have distinct gene clusters compared to oxic nitrite oxidizers and are microaerophilic. These findings all indicate the existence of nitrite oxidizers whose niche is oxygen-deficient seawater. Thus, specialist nitrite-oxidizing bacteria are responsible for fixed nitrogen retention in marine oxygen minimum zones, with implications for control of the ocean's fixed nitrogen inventory.}, }
@article {pmid39141228, year = {2024}, author = {Jouffe, C and Dyar, KA and Uhlenhaut, NH}, title = {Chromatin Immunoprecipitation in Adipose Tissue and Adipocytes: How to Proceed and Optimize the Protocol for Transcription Factor DNA Binding.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2846}, number = {}, pages = {35-45}, pmid = {39141228}, issn = {1940-6029}, mesh = {*Adipocytes/metabolism/cytology ; *Adipose Tissue/metabolism/cytology ; *Chromatin Immunoprecipitation/methods ; *DNA/metabolism/genetics ; *Transcription Factors/metabolism ; Humans ; Animals ; Protein Binding ; Chromatin/metabolism/genetics ; }, abstract = {Chromatin immunoprecipitation (ChIP) coupled to qPCR or sequencing is a crucial experiment to determine direct transcriptional regulation under the control of specific transcriptional factors or co-regulators at loci-specific or pan-genomic levels.Here we provide a reliable method for processing ChIP from adipocytes or frozen adipose tissue collection, isolation of nuclei, cross-linking of protein-DNA complexes, chromatin shearing, immunoprecipitation, and DNA purification. We also discuss critical steps for optimizing the experiment to perform a successful ChIP in lipid-rich cells/tissues.}, }
@article {pmid39140725, year = {2024}, author = {Lemieux, JE}, title = {Analysis of the Borreliaceae Pangenome Reveals a Distinct Genomic Architecture Conserved Across Phylogenetic Scales.}, journal = {The Journal of infectious diseases}, volume = {230}, number = {Supplement_1}, pages = {S51-S61}, doi = {10.1093/infdis/jiae256}, pmid = {39140725}, issn = {1537-6613}, support = {K99/R00AI148604/NH/NIH HHS/United States ; }, mesh = {*Genome, Bacterial ; *Phylogeny ; Humans ; Borrelia/genetics/classification ; Genomics ; Lyme Disease/microbiology ; }, abstract = {The family Borreliaceae contains arthropod-borne spirochetes that cause two widespread human diseases, Lyme disease and relapsing fever. Lyme disease is a subacute, progressive illness with variable stage and tissue manifestations. Relapsing fever is an acute febrile illness with prominent bacteremia that may recur and disseminate, particularly to the nervous system. Clinical heterogeneity is a hallmark of both diseases. While human clinical manifestations are influenced by a wide variety of factors, including immune status and host genetic susceptibility, there is evidence that Borreliaceae microbial factors influence the clinical manifestations of human disease caused by this family of spirochetes. Despite these associations, the spirochete genes that influence the severity and manifestations of human disease are, for the most part, unknown. Recent work has identified lineage-specific expansions of lipoproteome-rich accessory genome elements in virulent clones of Borrelia burgdorferi. Using publicly available genome assemblies, it is shown that all Borreliaceae lineages for which sufficient sequence data are available harbor a similar pattern of strongly structured, lineage-specific expansions in their accessory genomes, particularly among lipoproteins, and that this pattern holds across phylogenetic scales including genera, species, and genotypes. The relationships among pangenome elements suggest that infrequent episodes of marked genomic change followed by clonal expansion in geographically and enzootically structured populations may account for the unique lineage structure of Borreliaceae. This analysis informs future genotype-phenotype studies among Borreliaceae and lays a foundation for studies of individual gene function guided by phylogenetic patterns of conservation, diversification, gain, and/or loss.}, }
@article {pmid39138795, year = {2024}, author = {Perrin, C and Coutts, M and Dadone-Montaudié, B}, title = {Subungual melanoma: molecular analysis of 31 cases from early stage to invasive melanoma.}, journal = {Histopathology}, volume = {}, number = {}, pages = {}, doi = {10.1111/his.15297}, pmid = {39138795}, issn = {1365-2559}, abstract = {AIMS: The distinction between the benign subungual melanocytic lesions and an early lesion of subungual melanoma (SUM) remains a diagnostic challenge. We evaluated the routine diagnostic utility of array Comparative Genomic Hybridization (aCGH) to detect whole-genome copy number variations (CNV) as well as targeted next-generation sequencing (NGS) in SUM.
METHODS AND RESULTS: This retrospective study included 20 cases of in situ SUM and 11 cases of invasive SUM. Analysis by aCGH detected common oncogene amplifications in all but one case of invasive SUM (n = 10) and in all cases of in situ SUM with a melanocyte count (MC) >45/mm (n = 4 true positive) and the average number of CNV was 8.5. Thirteen remaining cases of in situ SUM gave false negative results (n = 13), owing to a lack of sufficient melanocytes to analyse (median MC of 35.35; range: 10.16-39.5). Molecular analysis failed in four cases (three in situ SUM and one invasive SUM) due to insufficient amounts of DNA. Across the whole cohort, the sensitivity of aCGH was 52%, but when adjusting the cutoff to MC >45/mm, the sensitivity was 93%. Targeted NGS was less informative than aCGH analyses in our series of SUM.
CONCLUSION: To distinguish malignant from benign lesions, especially in situ SUM versus atypical lentiginous melanocytic proliferations, aCGH analysis should be performed when the MC is above 45 melanocytes per linear millimetre. This pangenomic method can detect oncogene amplifications, as well as a number of CNV >3, which strongly support the diagnosis of malignancy.}, }
@article {pmid39137112, year = {2024}, author = {Feng, Y and Yang, Y and Hu, Y and Xiao, Y and Xie, Y and Wei, L and Wen, H and Zhang, L and McNally, A and Zong, Z}, title = {Population genomics uncovers global distribution, antimicrobial resistance, and virulence genes of the opportunistic pathogen Klebsiella aerogenes.}, journal = {Cell reports}, volume = {43}, number = {8}, pages = {114602}, doi = {10.1016/j.celrep.2024.114602}, pmid = {39137112}, issn = {2211-1247}, abstract = {Klebsiella aerogenes is an understudied and clinically important pathogen. We therefore investigate its population structure by genome analysis aligned with metadata. We sequence 130 non-duplicated K. aerogenes clinical isolates and identify two inter-patient transmission events. We then retrieve all publicly available K. aerogenes genomes (n = 1,026, accessed by January 1, 2023) and analyze them with our 130 genomes. We develop a core-genome multi-locus sequence-typing scheme. We find that K. aerogenes is a species complex comprising four phylogroups undergoing evolutionary divergence, likely forming three species. We delineate remarkable clonal diversity and identify three worldwide-distributed carbapenemase-encoding clonal clusters, representing high-risk lineages. We uncover that K. aerogenes has an open genome equipped by a large arsenal of antimicrobial resistance genes. We identify two genetic regions specific for K. aerogenes, encoding a type VI secretion system and flagella/chemotaxis for motility, respectively, both contributing to the virulence. These results provide much-needed insights into the population structure and pan-genomes of K. aerogenes.}, }
@article {pmid39134411, year = {2024}, author = {Kuronen, J and Horsfield, ST and Pöntinen, AK and Mallawaarachchi, S and Arredondo-Alonso, S and Thorpe, H and Gladstone, RA and Willems, RJL and Bentley, SD and Croucher, NJ and Pensar, J and Lees, JA and Tonkin-Hill, G and Corander, J}, title = {Pangenome-spanning epistasis and coselection analysis via de Bruijn graphs.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.278485.123}, pmid = {39134411}, issn = {1549-5469}, abstract = {Studies of bacterial adaptation and evolution are hampered by the difficulty of measuring traits such as virulence, drug resistance, and transmissibility in large populations. In contrast, it is now feasible to obtain high-quality complete assemblies of many bacterial genomes thanks to scalable high-accuracy long-read sequencing technologies. To exploit this opportunity, we introduce a phenotype- and alignment-free method for discovering coselected and epistatically interacting genomic variation from genome assemblies covering both core and accessory parts of genomes. Our approach uses a compact colored de Bruijn graph to approximate the intragenome distances between pairs of loci for a collection of bacterial genomes to account for the impacts of linkage disequilibrium (LD). We demonstrate the versatility of our approach to efficiently identify associations between loci linked with drug resistance and adaptation to the hospital niche in the major human bacterial pathogens Streptococcus pneumoniae and Enterococcus faecalis.}, }
@article {pmid39133351, year = {2024}, author = {Yin, J and He, M and Liu, XX and Ren, CB and Liu, HH and Luo, H and Chen, G and Wang, ZF and Debnath, SC and Wang, PM and Chen, HX and Zheng, DQ}, title = {Peteryoungia algae sp. nov. isolated from seaweeds of Gouqi Island, China, and its unique genetic features among Peteryoungia strains.}, journal = {Antonie van Leeuwenhoek}, volume = {117}, number = {1}, pages = {112}, pmid = {39133351}, issn = {1572-9699}, support = {LDT23D06022D06//National Natural Science Foundation of Zhejiang Province/ ; LDT23D06022D06//National Natural Science Foundation of Zhejiang Province/ ; LDT23D06022D06//National Natural Science Foundation of Zhejiang Province/ ; LDT23D06022D06//National Natural Science Foundation of Zhejiang Province/ ; LDT23D06022D06//National Natural Science Foundation of Zhejiang Province/ ; ZDYF2024SHFZ046//Key Research and Development Program of Hainan Province/ ; ZDYF2024SHFZ046//Key Research and Development Program of Hainan Province/ ; ZDYF2024SHFZ046//Key Research and Development Program of Hainan Province/ ; ZDYF2024SHFZ046//Key Research and Development Program of Hainan Province/ ; ZDYF2024SHFZ046//Key Research and Development Program of Hainan Province/ ; 226-2024-00019//Fundamental Research Funds for the Central Universities/ ; 226-2024-00019//Fundamental Research Funds for the Central Universities/ ; 226-2024-00019//Fundamental Research Funds for the Central Universities/ ; 226-2024-00019//Fundamental Research Funds for the Central Universities/ ; 226-2024-00019//Fundamental Research Funds for the Central Universities/ ; SKJC-2024-02-003//Project of Sanya Yazhou Bay Science and Technology City/ ; }, mesh = {*Phylogeny ; China ; *RNA, Ribosomal, 16S/genetics ; *Seaweed/microbiology ; *DNA, Bacterial/genetics ; *Base Composition ; *Fatty Acids/analysis/chemistry ; Bacterial Typing Techniques ; Genome, Bacterial ; Sequence Analysis, DNA ; Islands ; Nucleic Acid Hybridization ; }, abstract = {A Gram-stain-negative, light khaki, strictly aerobic, rod-shaped, motile via multiple flagella, and catalase- and oxidase-positive bacterium, designated as SSM4.3[T], was isolated from the seaweed of Gouqi Island in the East China Sea. The novel isolate grows at 0-5.0% NaCl concentrations (w/v) (optimum 1%), pH 5.0-9.0 (optimum pH 7.0), and 15-37 °C (optimum 30 °C). The 16S rRNA gene sequences-based phylogeny indicates that the novel marine isolate belongs to the family Rhizobiaceae and that it shared the greatest sequence similarity (98.9%) with Peteryoungia rhizophila CGMCC 1.15691[T]. This classification was also supported by phylogenetic analysis using core genes. The predominant fatty acids (≥ 10%) of the strain were identified as C18:1 ω7c/C18:1 ω6c. Q-10 was identified as the major isoprenoid quinone, with trace levels of Q-9 present. The major polar lipids were identified as diphosphatidylglycerol, phosphatidylethanolamine and phosphatidylglycerol. The complete genome size of strain SSM4.3[T] is 4.39 Mb with a DNA G+C content of 61.3%. The average nucleotide identity, digital DNA-DNA hybridization, and average amino acid identity values between the genomes of strain SSM4.3[T] and its closely related representatives were 74.80-86.93%, 20.00-32.30%, and 70.30-91.52%, respectively. Phylogenetic analysis, grounded on the core genes, reveals the evolutionary relationship between SSM4.3[T] and other Peteryoungia strains. Pan-genomics analysis of 8 previously classified Peteryoungia species and SSM4.3[T] revealed their unique genetic features and functions. Overall, strain SSM4.3[T] was considered to be a new species of the Peteryoungia genus; the name Peteryoungia algae sp. nov. has been proposed, with type strain SSM4.3[T] (= LMG 32561 = MCCC 1K07170).}, }
@article {pmid39132840, year = {2024}, author = {Vale, FF and Roberts, RJ and Kobayashi, I and Camargo, MC and Rabkin, CS}, title = {Gene content, phage cycle regulation model and prophage inactivation disclosed by prophage genomics in the Helicobacter pylori Genome Project.}, journal = {Gut microbes}, volume = {16}, number = {1}, pages = {2379440}, doi = {10.1080/19490976.2024.2379440}, pmid = {39132840}, issn = {1949-0984}, mesh = {*Helicobacter pylori/genetics/virology ; *Prophages/genetics/physiology ; *Genome, Bacterial ; Humans ; *Phylogeny ; *Genomics ; Helicobacter Infections/microbiology ; }, abstract = {Prophages can have major clinical implications through their ability to change pathogenic bacterial traits. There is limited understanding of the prophage role in ecological, evolutionary, adaptive processes and pathogenicity of Helicobacter pylori, a widespread bacterium causally associated with gastric cancer. Inferring the exact prophage genomic location and completeness requires complete genomes. The international Helicobacter pylori Genome Project (HpGP) dataset comprises 1011 H. pylori complete clinical genomes enriched with epigenetic data. We thoroughly evaluated the H. pylori prophage genomic content in the HpGP dataset. We investigated population evolutionary dynamics through phylogenetic and pangenome analyses. Additionally, we identified genome rearrangements and assessed the impact of prophage presence on bacterial gene disruption and methylome. We found that 29.5% (298) of the HpGP genomes contain prophages, of which only 32.2% (96) were complete, minimizing the burden of prophage carriage. The prevalence of H. pylori prophage sequences was variable by geography and ancestry, but not by disease status of the human host. Prophage insertion occasionally results in gene disruption that can change the global bacterial epigenome. Gene function prediction allowed the development of the first model for lysogenic-lytic cycle regulation in H. pylori. We have disclosed new prophage inactivation mechanisms that appear to occur by genome rearrangement, merger with other mobile elements, and pseudogene accumulation. Our analysis provides a comprehensive framework for H. pylori prophage biological and genomics, offering insights into lysogeny regulation and bacterial adaptation to prophages.}, }
@article {pmid39130480, year = {2024}, author = {Hasnat, S and Hoque, MN and Mahbub, MM and Sakif, TI and Shahinuzzaman, ADA and Islam, T}, title = {Pantothenate kinase: A promising therapeutic target against pathogenic Clostridium species.}, journal = {Heliyon}, volume = {10}, number = {14}, pages = {e34544}, pmid = {39130480}, issn = {2405-8440}, abstract = {Current treatment of clostridial infections includes broad-spectrum antibiotics and antitoxins, yet antitoxins are ineffective against all Clostridiumspecies. Moreover, rising antimicrobial resistance (AMR) threatens treatment effectiveness and public health. This study therefore aimed to discover a common drug target for four pathogenic clostridial species, Clostridium botulinum, C. difficile, C. tetani, and C. perfringens through an in-silico core genomic approach. Using four reference genomes of C. botulinum, C. difficile, C. tetani, and C. perfringens, we identified 1484 core genomic proteins (371/genome) and screened them for potential drug targets. Through a subtractive approach, four core proteins were finally identified as drug targets, represented by type III pantothenate kinase (CoaX) and, selected for further analyses. Interestingly, the CoaX is involved in the phosphorylation of pantothenate (vitamin B5), which is a critical precursor for coenzyme A (CoA) biosynthesis. Investigation of druggability analysis on the identified drug target reinforces CoaX as a promising novel drug target for the selected Clostridium species. During the molecular screening of 1201 compounds, a known agonist drug compound (Vibegron) showed strong inhibitory activity against targeted clostridial CoaX. Additionally, we identified tazobactam, a beta-lactamase inhibitor, as effective against the newly proposed target, CoaX. Therefore, identifying CoaX as a single drug target effective against all four clostridial pathogens presents a valuable opportunity to develop a cost-effective treatment for multispecies clostridial infections.}, }
@article {pmid39120932, year = {2024}, author = {Krisna, MA and Jolley, KA and Monteith, W and Boubour, A and Hamers, RL and Brueggemann, AB and Harrison, OB and Maiden, MCJ}, title = {Development and implementation of a core genome multilocus sequence typing scheme for Haemophilus influenzae.}, journal = {Microbial genomics}, volume = {10}, number = {8}, pages = {}, doi = {10.1099/mgen.0.001281}, pmid = {39120932}, issn = {2057-5858}, mesh = {*Haemophilus influenzae/genetics/classification ; *Multilocus Sequence Typing/methods ; *Phylogeny ; *Genome, Bacterial ; Humans ; Haemophilus Infections/microbiology ; Genetic Variation ; }, abstract = {Haemophilus influenzae is part of the human nasopharyngeal microbiota and a pathogen causing invasive disease. The extensive genetic diversity observed in H. influenzae necessitates discriminatory analytical approaches to evaluate its population structure. This study developed a core genome multilocus sequence typing (cgMLST) scheme for H. influenzae using pangenome analysis tools and validated the cgMLST scheme using datasets consisting of complete reference genomes (N = 14) and high-quality draft H. influenzae genomes (N = 2297). The draft genome dataset was divided into a development dataset (N = 921) and a validation dataset (N = 1376). The development dataset was used to identify potential core genes, and the validation dataset was used to refine the final core gene list to ensure the reliability of the proposed cgMLST scheme. Functional classifications were made for all the resulting core genes. Phylogenetic analyses were performed using both allelic profiles and nucleotide sequence alignments of the core genome to test congruence, as assessed by Spearman's correlation and ordinary least square linear regression tests. Preliminary analyses using the development dataset identified 1067 core genes, which were refined to 1037 with the validation dataset. More than 70% of core genes were predicted to encode proteins essential for metabolism or genetic information processing. Phylogenetic and statistical analyses indicated that the core genome allelic profile accurately represented phylogenetic relatedness among the isolates (R [2] = 0.945). We used this cgMLST scheme to define a high-resolution population structure for H. influenzae, which enhances the genomic analysis of this clinically relevant human pathogen.}, }
@article {pmid39116952, year = {2024}, author = {Khan, MAS and Chaity, SC and Hosen, A and Rahman, SR}, title = {Genomic epidemiology of multidrug-resistant clinical Acinetobacter baumannii in Bangladesh.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {}, number = {}, pages = {105656}, doi = {10.1016/j.meegid.2024.105656}, pmid = {39116952}, issn = {1567-7257}, abstract = {The rising frequency of multidrug-resistant (MDR) Acinetobacter baumannii infections represents a significant public health challenge in Bangladesh. Genomic analysis of bacterial pathogens enhances surveillance and control efforts by providing insights into genetic diversity, antimicrobial resistance (AMR) profiles, and transmission dynamics. In this study, we conducted a comprehensive bioinformatic analysis of 82 whole-genome sequences (WGS) of A. baumannii from Bangladesh to understand their genomic epidemiological characteristics. WGS of the MDR and biofilm-forming A. baumannii strain S1C revealed the presence of 28 AMR genes, predicting its pathogenicity and classification within sequence type ST2. Multi-locus sequence typing (MLST) genotyping suggested heterogeneity in the distribution of clinical A. baumannii strains in Bangladesh, with a predominance of ST575. The resistome diversity was evident from the detection of 82 different AMR genes, with antibiotic inactivation being the most prevalent resistance mechanism. All strains were predicted to be multidrug-resistant. The observed virulence genes were associated with immune evasion, biofilm formation, adherence, nutrient acquisition, effector delivery, and other mechanisms. Mobile genetic elements carrying AMR genes were predicted in 68.29% (N = 56) of the genomes. The "open" state of the pan-genome and a high proportion of accessory genes highlighted the genome plasticity and diversity of A. baumannii in Bangladesh. Additionally, phylogenomic analysis indicated clustering of A. baumannii strains into three separate clades according to sequence type. In summary, our findings offer detailed insights into the genomic landscape of A. baumannii in Bangladesh, contributing to our understanding of its epidemiology and pathogenicity and informing strategies to combat this pathogen.}, }
@article {pmid39116702, year = {2024}, author = {Singhvi, N and Talwar, C and Nagar, S and Verma, H and Kaur, J and Mahato, NK and Ahmad, N and Mondal, K and Gupta, V and Lal, R}, title = {Insights into the radiation and oxidative stress mechanisms in genus Deinococcus.}, journal = {Computational biology and chemistry}, volume = {112}, number = {}, pages = {108161}, doi = {10.1016/j.compbiolchem.2024.108161}, pmid = {39116702}, issn = {1476-928X}, abstract = {Deinococcus species, noted for their exceptional resistance to DNA-damaging environmental stresses, have piqued scientists' interest for decades. This study dives into the complex mechanisms underpinning radiation resistance in the Deinococcus genus. We have examined the genomes of 82 Deinococcus species and classified radiation-resistance proteins manually into five unique curated categories: DNA repair, oxidative stress defense, Ddr and Ppr proteins, regulatory proteins, and miscellaneous resistance components. This classification reveals important information about the various molecular mechanisms used by these extremophiles which have been less explored so far. We also investigated the presence or lack of these proteins in the context of phylogenetic relationships, core, and pan-genomes, which offered light on the evolutionary dynamics of radiation resistance. This comprehensive study provides a deeper understanding of the genetic underpinnings of radiation resistance in the Deinococcus genus, with potential implications for understanding similar mechanisms in other organisms using an interactomics approach. Finally, this study reveals the complexities of radiation resistance mechanisms, providing a comprehensive understanding of the genetic components that allow Deinococcus species to flourish under harsh environments. The findings add to our understanding of the larger spectrum of stress adaption techniques in bacteria and may have applications in sectors ranging from biotechnology to environmental research.}, }
@article {pmid39116055, year = {2024}, author = {Rojas-Vargas, J and Rebollar, EA and Sanchez-Flores, A and Pardo-López, L}, title = {A comparative genomic study of a hydrocarbon-degrading marine bacterial consortium.}, journal = {PloS one}, volume = {19}, number = {8}, pages = {e0303363}, doi = {10.1371/journal.pone.0303363}, pmid = {39116055}, issn = {1932-6203}, mesh = {*Biodegradation, Environmental ; *Hydrocarbons/metabolism ; *Phylogeny ; *Genome, Bacterial ; *Genomics/methods ; Microbial Consortia/genetics ; Bacteria/genetics/metabolism/classification ; Seawater/microbiology ; }, abstract = {Ocean oil pollution has a large impact on the environment and the health of living organisms. Bioremediation cleaning strategies are promising eco-friendly alternatives for tackling this problem. Previously, we designed and reported a hydrocarbon (HC) degrading microbial consortium of four marine strains belonging to the species Alloalcanivorax xenomutans, Halopseudomonas aestusnigri, Paenarthrobacter sp., and Pseudomonas aeruginosa. However, the knowledge about the metabolic potential of this bacterial consortium for HC bioremediation is not yet well understood. Here, we analyzed the complete genomes of these marine bacterial strains accompanied by a phylogenetic reconstruction along with 138 bacterial strains. Synteny between complete genomes of the same species or genus, revealed high conservation among strains of the same species, covering over 91% of their genomic sequences. Functional predictions highlighted a high abundance of genes related to HC degradation, which may result in functional redundancy within the consortium; however, unique and complete gene clusters linked to aromatic degradation were found in the four genomes, suggesting substrate specialization. Pangenome gain and loss analysis of genes involved in HC degradation provided insights into the evolutionary history of these capabilities, shedding light on the acquisition and loss of relevant genes related to alkane and aromatic degradation. Our work, including comparative genomic analyses, identification of secondary metabolites, and prediction of HC-degrading genes, enhances our understanding of the functional diversity and ecological roles of these marine bacteria in crude oil-contaminated marine environments and contributes to the applied knowledge of bioremediation.}, }
@article {pmid39114027, year = {2024}, author = {Saikat, TA and Sayem Khan, MA and Islam, MS and Tasnim, Z and Ahmed, S}, title = {Characterization and genome mining of Bacillus subtilis BDSA1 isolated from river water in Bangladesh: A promising bacterium with diverse biotechnological applications.}, journal = {Heliyon}, volume = {10}, number = {14}, pages = {e34369}, pmid = {39114027}, issn = {2405-8440}, abstract = {The metabolic versatility of Bacillus subtilis makes it useful for a wide range of applications in biotechnology, from bioremediation to industrially important metabolite production. Understanding the molecular attributes of the biocontrol characteristics of B. subtilis is necessary for its tailored use in the environment and industry. Therefore, the present study aimed to conduct phenotypic characterization and whole genome analysis of the B. subtilis BDSA1 isolated from polluted river water from Dhaka, Bangladesh to explore its biotechnological potential. The chromium reduction capacity at 100 ppm Cr (VI) showed that B. subtilis BDSA1 reduced 40 % of Cr (VI) within 24hrs at 37 °C. Exposure of this bacterium to 200 ppm cadmium resulted in 43 % adsorption following one week of incubation at 37 °C. Molecular detection of chrA and czcC gene confirmed chromium and cadmium resistance characteristics of BDSA1. The size of the genome of the B. subtilis BDSA1 was 4.2 Mb with 43.4 % GC content. Genome annotation detected the presence of numerous genes involved in the degradation of xenobiotics, resistance to abiotic stress, production of lytic enzymes, siderophore formation, and plant growth promotion. The assembled genome also carried chromium, cadmium, copper, and arsenic resistance-related genes, notably cadA, czcD, czrA, arsB etc. Genome mining revealed six biosynthetic gene clusters for bacillaene, bacillibacin, bacilysin, subtilosin, fengycin and surfactin. Importantly, BDSA1 was predicted to be non-pathogenic to humans and had only two acquired antimicrobial resistance genes. The pan-genome analysis showed the openness of the B. subtilis pan-genome. Our findings suggested that B. subtilis BDSA1 might be a promising candidate for diverse biotechnological uses.}, }
@article {pmid39113613, year = {2024}, author = {Chen, M and Trotter, VV and Walian, PJ and Chen, Y and Lopez, R and Lui, LM and Nielsen, TN and Malana, RG and Thorgersen, MP and Hendrickson, AJ and Carion, H and Deutschbauer, AM and Petzold, CJ and Smith, HJ and Arkin, AP and Adams, MWW and Fields, MW and Chakraborty, R}, title = {Molecular mechanisms and environmental adaptations of flagellar loss and biofilm growth of Rhodanobacter under environmental stress.}, journal = {The ISME journal}, volume = {}, number = {}, pages = {}, doi = {10.1093/ismejo/wrae151}, pmid = {39113613}, issn = {1751-7370}, abstract = {Biofilms aid bacterial adhesion to surfaces via direct and indirect mechanisms, and formation of biofilms is considered as an important strategy for adaptation and survival in sub-optimal environmental conditions. However, the molecular underpinnings of biofilm formation in subsurface sediment/groundwater ecosystems where microorganisms often experience fluctuations in nutrient input, pH, nitrate or metal concentrations is underexplored. We examined biofilm formation under different nutrient, pH, metal, and nitrate regimes of 16 Rhodanobacter strains isolated from subsurface groundwater wells spanning diverse pH (3.5 to 5) and nitrate levels (13.7 to 146 mM). Eight Rhodanobacter strains demonstrated significant biofilm growth under low pH, suggesting adaptation to survive and grow at low pH. Biofilms intensified under aluminum stress, particularly in strains possessing fewer genetic traits associated with biofilm formation warranting further investigation. Through RB-TnSeq, proteomics, use of specific mutants and transmission electron microscopy analysis, we discovered flagellar loss under aluminum stress, indicating a potential relationship between motility, metal tolerance, and biofilm growth. Comparative genomic analyses revealed absence of flagella and chemotaxis genes, and presence of putative Type VI secretion system in the high biofilm-forming strain FW021-MT20. This study identifies genetic determinants associated with biofilm growth in a predominant environmental genus, Rhodanobacter, under metal stress and identifies traits aiding survival and adaptation to contaminated subsurface environments.}, }
@article {pmid39113147, year = {2024}, author = {Wang, Z and Wang, M and Du, L}, title = {Public perceptions of international genetic information sharing for biomedical research in China: a case study of the social media debate on the article "A Pangenome Reference of 36 Chinese Populations" published in Nature.}, journal = {Human genomics}, volume = {18}, number = {1}, pages = {86}, pmid = {39113147}, issn = {1479-7364}, support = {MYRG2020-00096-FLL//Universidade de Macau/ ; }, abstract = {BACKGROUND: The international disclosure of Chinese human genetic data continues to be a contentious issue in China, generating public debates in both traditional and social media channels. Concerns have intensified after Chinese scientists' research on pangenome data was published in the prestigious journal Nature.
METHODS: This study scrutinized microblogs posted on Weibo, a popular Chinese social media site, in the two months immediately following the publication (June 14, 2023-August 21, 2023). Content analysis was conducted to assess the nature of public responses, justifications for positive or negative attitudes, and the users' overall knowledge of how Chinese human genetic information is regulated and managed in China.
RESULTS: Weibo users displayed contrasting attitudes towards the article's public disclose of pangenome research data, with 18% positive, 64% negative, and 18% neutral. Positive attitudes came primarily from verified government and media accounts, which praised the publication. In contrast, negative attitudes originated from individual users who were concerned about national security and health risks and often believed that the researchers have betrayed China. The benefits of data sharing highlighted in the commentaries included advancements in disease research and scientific progress. Approximately 16% of the microblogs indicated that Weibo users had misunderstood existing regulations and laws governing data sharing and stewardship.
CONCLUSIONS: Based on the predominantly negative public attitudes toward scientific data sharing established by our study, we recommend enhanced outreach by scientists and scientific institutions to increase the public understanding of developments in genetic research, international data sharing, and associated regulations. Additionally, governmental agencies can alleviate public fears and concerns by being more transparent about their security reviews of international collaborative research involving Chinese human genetic data and its cross-border transfer.}, }
@article {pmid39113037, year = {2024}, author = {Kuo, WH and Wright, SJ and Small, LL and Olsen, KM}, title = {De novo genome assembly of white clover (Trifolium repens L.) reveals the role of copy number variation in rapid environmental adaptation.}, journal = {BMC biology}, volume = {22}, number = {1}, pages = {165}, pmid = {39113037}, issn = {1741-7007}, support = {IOS-1557770//Directorate for Biological Sciences/ ; DEB-1601641//Division of Environmental Biology/ ; DGE-1143954//Division of Graduate Education/ ; }, abstract = {BACKGROUND: White clover (Trifolium repens) is a globally important perennial forage legume. This species also serves as an eco-evolutionary model system for studying within-species chemical defense variation; it features a well-studied polymorphism for cyanogenesis (HCN release following tissue damage), with higher frequencies of cyanogenic plants favored in warmer locations worldwide. Using a newly generated haplotype-resolved genome and two other long-read assemblies, we tested the hypothesis that copy number variants (CNVs) at cyanogenesis genes play a role in the ability of white clover to rapidly adapt to local environments. We also examined questions on subgenome evolution in this recently evolved allotetraploid species and on chromosomal rearrangements in the broader IRLC legume clade.
RESULTS: Integration of PacBio HiFi, Omni-C, Illumina, and linkage map data yielded a completely de novo genome assembly for white clover (created without a priori sequence assignment to subgenomes). We find that white clover has undergone extensive transposon diversification since its origin but otherwise shows highly conserved genome organization and composition with its diploid progenitors. Unlike some other clover species, its chromosomal structure is conserved with other IRLC legumes. We further find extensive evidence of CNVs at the major cyanogenesis loci; these contribute to quantitative variation in the cyanogenic phenotype and to local adaptation across wild North American populations.
CONCLUSIONS: This work provides a case study documenting the role of CNVs in local adaptation in a plant species, and it highlights the value of pan-genome data for identifying contributions of structural variants to adaptation in nature.}, }
@article {pmid39111568, year = {2024}, author = {Joishy, TK and Bhattacharya, A and Singh, CT and Mukherjee, AK and Khan, MR}, title = {Probiotic and anti-inflammatory properties of Lactiplantibacillus plantarum MKTJ24 isolated from an artisanal fermented fish of North-east India.}, journal = {New biotechnology}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.nbt.2024.07.005}, pmid = {39111568}, issn = {1876-4347}, abstract = {The study aimed to isolate and characterize lactic acid bacteria from various traditional fermented fish products from North East India, including Xindol, Hentak, and Ngari, which hold significant dietary importance for the indigenous tribes. Additionally, the study sought to examine their untargeted metabolomic profiles. A total of 43 strains of Bacillus, Priestia, Staphylococcus, Pediococcus, and Lactiplantibacillus were isolated, characterized by 16S rRNA gene and tested for probiotic properties. Five strains passed pH and bile salt tests with strain dependent antimicrobial activity, which exhibited moderate autoaggregation and hydrophobicity properties. Lactiplantibacillus plantarum MKTJ24 exhibited the highest hydrophobicity (42%), which was further confirmed by adhesion assay in HT-29 cell lines (100%). Lactiplantibacillus plantarum MKTJ24 treatment in LPS-stimulated HT-29 cells up-regulated expression of mucin genes compared to LPS-treated cells. Treatment of RAW 264.7 cells with Lactiplantibacillus plantarum MKTJ24 decreased LPS-induced reactive oxygen species (ROS) and nitric oxide (NO) productions. Further, genome analysis of Lactiplantibacillus plantarum MKTJ24 revealed the presence of several probiotic markers and immunomodulatory genes. The genome was found to harbour plantaracin operon involved in bacteriocin production. A pangenome analysis using all the publicly available L. plantarum genomes specifically isolated from fermented fish products identified 120 unique genes in Lactiplantibacillus plantarum MKTJ24. Metabolomic analysis indicated dominance of ascorbic acids, pentafluropropionate, cyclopropaneacetic acid, florobenzylamine, and furanonee in Xindol. This study suggests that Lactiplantibacillus plantarum MKTJ24 has potential probiotic and immunomodulatory properties that could be used in processing traditional fermented fish products on an industrial scale to improve their quality and enhance functional properties.}, }
@article {pmid39106791, year = {2024}, author = {Gasparini, K and Figueiredo, YG and Araújo, WL and Peres, LE and Zsögön, A}, title = {De novo domestication in the Solanaceae: advances and challenges.}, journal = {Current opinion in biotechnology}, volume = {89}, number = {}, pages = {103177}, doi = {10.1016/j.copbio.2024.103177}, pmid = {39106791}, issn = {1879-0429}, abstract = {The advent of highly efficient genome editing (GE) tools, coupled with high-throughput genome sequencing, has paved the way for the accelerated domestication of crop wild relatives. New crops could thus be rapidly created that are well adapted to cope with drought, flooding, soil salinity, or insect damage. De novo domestication avoids the complexity of transferring polygenic stress resistance from wild species to crops. Instead, new crops can be created by manipulating major genes in stress-resistant wild species. However, the genetic basis of certain relevant domestication-related traits often involve epistasis and pleiotropy. Furthermore, pan-genome analyses show that structural variation driving gene expression changes has been selected during domestication. A growing body of work suggests that the Solanaceae family, which includes crop species such as tomatoes, potatoes, eggplants, peppers, and tobacco, is a suitable model group to dissect these phenomena and operate changes in wild relatives to improve agronomic traits rapidly with GE. We briefly discuss the prospects of this exciting novel field in the interface between fundamental and applied plant biology and its potential impact in the coming years.}, }
@article {pmid39107817, year = {2024}, author = {Le, DQ and Nguyen, TA and Nguyen, SH and Nguyen, TT and Nguyen, CH and Phung, HT and Ho, TH and Vo, NS and Nguyen, T and Nguyen, HA and Cao, MD}, title = {Efficient inference of large prokaryotic pangenomes with PanTA.}, journal = {Genome biology}, volume = {25}, number = {1}, pages = {209}, pmid = {39107817}, issn = {1474-760X}, support = {VINIF.2019.DA11//VinIF/ ; }, abstract = {Pangenome inference is an indispensable step in bacterial genomics, yet its scalability poses a challenge due to the rapid growth of genomic collections. This paper presents PanTA, a software package designed for constructing pangenomes of large bacterial datasets, showing unprecedented efficiency levels multiple times higher than existing tools. PanTA introduces a novel mechanism to construct the pangenome progressively without rebuilding the accumulated collection from scratch. The progressive mode is shown to consume orders of magnitude less computational resources than existing solutions in managing growing datasets. The software is open source and is publicly available at https://github.com/amromics/panta and at 10.6084/m9.figshare.23724705 .}, }
@article {pmid39107305, year = {2024}, author = {Cortinovis, G and Vincenzi, L and Anderson, R and Marturano, G and Marsh, JI and Bayer, PE and Rocchetti, L and Frascarelli, G and Lanzavecchia, G and Pieri, A and Benazzo, A and Bellucci, E and Di Vittori, V and Nanni, L and Ferreira Fernández, JJ and Rossato, M and Aguilar, OM and Morrell, PL and Rodriguez, M and Gioia, T and Neumann, K and Alvarez Diaz, JC and Gratias, A and Klopp, C and Bitocchi, E and Geffroy, V and Delledonne, M and Edwards, D and Papa, R}, title = {Adaptive gene loss in the common bean pan-genome during range expansion and domestication.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {6698}, pmid = {39107305}, issn = {2041-1723}, support = {862862//EC | Horizon 2020 Framework Programme (EU Framework Programme for Research and Innovation H2020)/ ; }, mesh = {*Phaseolus/genetics ; *Genome, Plant ; *Domestication ; Adaptation, Physiological/genetics ; Genotype ; Genetic Variation ; Crops, Agricultural/genetics ; Selection, Genetic ; Evolution, Molecular ; Mutation ; Plant Breeding/methods ; }, abstract = {The common bean (Phaseolus vulgaris L.) is a crucial legume crop and an ideal evolutionary model to study adaptive diversity in wild and domesticated populations. Here, we present a common bean pan-genome based on five high-quality genomes and whole-genome reads representing 339 genotypes. It reveals ~234 Mb of additional sequences containing 6,905 protein-coding genes missing from the reference, constituting 49% of all presence/absence variants (PAVs). More non-synonymous mutations are found in PAVs than core genes, probably reflecting the lower effective population size of PAVs and fitness advantages due to the purging effect of gene loss. Our results suggest pan-genome shrinkage occurred during wild range expansion. Selection signatures provide evidence that partial or complete gene loss was a key adaptive genetic change in common bean populations with major implications for plant adaptation. The pan-genome is a valuable resource for food legume research and breeding for climate change mitigation and sustainable agriculture.}, }
@article {pmid39102038, year = {2024}, author = {Olanrewaju, OS and Molale-Tom, LG and Bezuidenhout, CC}, title = {Genomic diversity, antibiotic resistance, and virulence in South African Enterococcus faecalis and Enterococcus lactis isolates.}, journal = {World journal of microbiology & biotechnology}, volume = {40}, number = {10}, pages = {289}, pmid = {39102038}, issn = {1573-0972}, mesh = {South Africa ; *Genome, Bacterial ; *Enterococcus faecalis/genetics/drug effects/pathogenicity/isolation & purification ; Virulence/genetics ; *Genetic Variation ; *Anti-Bacterial Agents/pharmacology ; *Virulence Factors/genetics ; Humans ; Drug Resistance, Bacterial/genetics ; Genomic Islands/genetics ; Gram-Positive Bacterial Infections/microbiology ; Enterococcus/genetics/drug effects/pathogenicity/isolation & purification/classification ; Phylogeny ; Gene Transfer, Horizontal ; Genomics ; Microbial Sensitivity Tests ; }, abstract = {This study presents the empirical findings of an in-depth genomic analysis of Enterococcus faecalis and Enterococcus lactis isolates from South Africa. It offers valuable insights into their genetic characteristics and their significant implications for public health. The study uncovers nuanced variations in the gene content of these isolates, despite their similar GC contents, providing a comprehensive view of the evolutionary diversity within the species. Genomic islands are identified, particularly in E. faecalis, emphasizing its propensity for horizontal gene transfer and genetic diversity, especially in terms of antibiotic resistance genes. Pangenome analysis reveals the existence of a core genome, accounting for a modest proportion of the total genes, with 2157 core genes, 1164 shell genes, and 4638 cloud genes out of 7959 genes in 52 South African E. faecalis genomes (2 from this study, 49 south Africa genomes downloaded from NCBI, and E. faecalis reference genome). Detecting large-scale genomic rearrangements, including chromosomal inversions, underscores the dynamic nature of bacterial genomes and their role in generating genetic diversity. The study uncovers an array of antibiotic resistance genes, with trimethoprim, tetracycline, glycopeptide, and multidrug resistance genes prevalent, raising concerns about the effectiveness of antibiotic treatment. Virulence gene profiling unveils a diverse repertoire of factors contributing to pathogenicity, encompassing adhesion, biofilm formation, stress resistance, and tissue damage. These empirical findings provide indispensable insights into these bacteria's genomic dynamics, antibiotic resistance mechanisms, and virulence potential, underlining the pressing need to address antibiotic resistance and implement robust control measures.}, }
@article {pmid39101619, year = {2024}, author = {Kileeg, Z and Wang, P and Mott, GA}, title = {Chromosome-scale assembly and annotation of eight Arabidopsis thaliana ecotypes.}, journal = {Genome biology and evolution}, volume = {}, number = {}, pages = {}, doi = {10.1093/gbe/evae169}, pmid = {39101619}, issn = {1759-6653}, abstract = {The plant Arabidopsis thaliana is a model system used by researchers through much of plant research. Recent efforts have focused on discovering the genomic variation found in naturally occurring ecotypes isolated from around the world. These ecotypes have come from diverse climates and therefore have faced and adapted to a variety of abiotic and biotic stressors. The sequencing and comparative analysis of these genomes can offer insight into the adaptive strategies of plants. While there are a large number of ecotype genome sequences available, the majority were created using short-read technology. Mapping of short-reads containing structural variation to a reference genome bereft of that variation leads to incorrect mapping of those reads, resulting in a loss of genetic information and introduction of false heterozygosity. For this reason, long-read de novo sequencing of genomes is required to resolve structural variation events. In this paper, we sequenced the genomes of eight natural variants of A. thaliana using nanopore sequencing. This resulted in highly contiguous assemblies with >95% of the genome contained within 5 contigs. The sequencing results from this study include 5 ecotypes from relict and African populations, an area of untapped genetic diversity. With this study, we increase the knowledge of diversity we have across A. thaliana ecotypes and contribute to ongoing production of an A. thaliana pan-genome.}, }
@article {pmid39095952, year = {2024}, author = {She, H and Liu, Z and Xu, Z and Zhang, H and Wu, J and Cheng, F and Wang, X and Qian, W}, title = {Pan-genome analysis of 13 Spinacia accessions reveals structural variations associated with sex chromosome evolution and domestication traits in spinach.}, journal = {Plant biotechnology journal}, volume = {}, number = {}, pages = {}, doi = {10.1111/pbi.14433}, pmid = {39095952}, issn = {1467-7652}, support = {CAAS-ASTIP-IVFCAAS//the Chinese Academy of Agricultural Sciences Innovation Project/ ; IVF-BRF2023002//Central Public-interest Scientific Institution Basal Research Fund/ ; Y2023QC07//Central Public-interest Scientific Institution Basal Research Fund/ ; CARS-23-A-17//China Agricultural Research System/ ; }, abstract = {Structural variations (SVs) are major genetic variants that can be involved in the origin, adaptation and domestication of species. However, the identification and characterization of SVs in Spinacia species are rare due to the lack of a pan-genome. Here, we report eight chromosome-scale assemblies of cultivated spinach and its two wild species. After integration with five existing assemblies, we constructed a comprehensive Spinacia pan-genome and identified 193 661 pan-SVs, which were genotyped in 452 Spinacia accessions. Our pan-SVs enabled genome-wide association study identified signals associated with sex and clarified the evolutionary direction of spinach. Most sex-linked SVs (86%) were biased to occur on the Y chromosome during the evolution of the sex-linked region, resulting in reduced Y-linked gene expression. The frequency of pan-SVs among Spinacia accessions further illustrated the contribution of these SVs to domestication, such as bolting time and seed dormancy. Furthermore, compared with SNPs, pan-SVs act as efficient variants in genomic selection (GS) because of their ability to capture missing heritability information and higher prediction accuracy. Overall, this study provides a valuable resource for spinach genomics and highlights the potential utility of pan-SV in crop improvement and breeding programmes.}, }
@article {pmid39091872, year = {2024}, author = {Lin, MJ and Langmead, B and Safonova, Y}, title = {IGLoo: Profiling the Immunoglobulin Heavy chain locus in Lymphoblastoid Cell Lines with PacBio High-Fidelity Sequencing reads.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.07.20.604421}, pmid = {39091872}, issn = {2692-8205}, abstract = {New high-quality human genome assemblies derived from lymphoblastoid cell lines (LCLs) provide reference genomes and pangenomes for genomics studies. However, the characteristics of LCLs pose technical challenges to profiling immunoglobulin (IG) genes. IG loci in LCLs contain a mixture of germline and somatically recombined haplotypes, making them difficult to genotype or assemble accurately. To address these challenges, we introduce IGLoo , a software tool that implements novel methods for analyzing sequence data and genome assemblies derived from LCLs. IGLoo characterizes somatic V(D)J recombination events in the sequence data and identifies the breakpoints and missing IG genes in the LCL-based assemblies. Furthermore, IGLoo implements a novel reassembly framework to improve germline assembly quality by integrating information about somatic events and population structural variantions in the IG loci. We applied IGLoo to study the assemblies from the Human Pangenome Reference Consortium, providing new insights into the mechanisms, gene usage, and patterns of V(D)J recombination, causes of assembly fragmentation in the IG heavy chain (IGH) locus, and improved representation of the IGH assemblies.}, }
@article {pmid39088040, year = {2024}, author = {Nguyen, AK and Schall, PZ and Kidd, JM}, title = {A map of canine sequence variation relative to a Greenland wolf outgroup.}, journal = {Mammalian genome : official journal of the International Mammalian Genome Society}, volume = {}, number = {}, pages = {}, pmid = {39088040}, issn = {1432-1777}, abstract = {For over 15 years, canine genetics research relied on a reference assembly from a Boxer breed dog named Tasha (i.e., canFam3.1). Recent advances in long-read sequencing and genome assembly have led to the development of numerous high-quality assemblies from diverse canines. These assemblies represent notable improvements in completeness, contiguity, and the representation of gene promoters and gene models. Although genome graph and pan-genome approaches have promise, most genetic analyses in canines rely upon the mapping of Illumina sequencing reads to a single reference. The Dog10K consortium, and others, have generated deep catalogs of genetic variation through an alignment of Illumina sequencing reads to a reference genome obtained from a German Shepherd Dog named Mischka (i.e., canFam4, UU_Cfam_GSD_1.0). However, alignment to a breed-derived genome may introduce bias in genotype calling across samples. Since the use of an outgroup reference genome may remove this effect, we have reprocessed 1929 samples analyzed by the Dog10K consortium using a Greenland wolf (mCanLor1.2) as the reference. We efficiently performed remapping and variant calling using a GPU-implementation of common analysis tools. The resulting call set removes the variability in genetic differences seen across samples and breed relationships revealed by principal component analysis are not affected by the choice of reference genome. Using this sequence data, we inferred the history of population sizes and found that village dog populations experienced a 9-13 fold reduction in historic effective population size relative to wolves.}, }
@article {pmid39087631, year = {2024}, author = {Cong, J and Zhang, S and Zhang, Q and Yu, X and Huang, J and Wei, X and Huang, X and Qiu, J and Zhou, X}, title = {Conserved features and diversity attributes of chimeric RNAs across accessions in four plants.}, journal = {Plant biotechnology journal}, volume = {}, number = {}, pages = {}, doi = {10.1111/pbi.14437}, pmid = {39087631}, issn = {1467-7652}, support = {2023ZD04073//National Key Research and Development Program of China/ ; 32370671//National Natural Science Foundation of China/ ; 22ZR1445800//Natural Science Foundation of Shanghai/ ; }, abstract = {As a non-collinear expression form of genetic information, chimeric RNAs increase the complexity of transcriptome in diverse organisms. Although chimeric RNAs have been identified in plants, few common features have been revealed. Here, we systemically explored the landscape of chimeric RNAs across multi-accession and multi-tissue using pan-genome and transcriptome data of four plants: rice, maize, soybean, and Arabidopsis. Among the four species, conserved characteristics of breakpoints and parental genes were discovered. In each species, chimeric RNAs displayed a high level of diversity among accessions, and the clustering of accessions using chimeric events was generally concordant with clustering based on genomic variants, implying a general relationship between genetic variations and chimeric RNAs. Through mass spectrometry, we confirmed a fusion protein OsNDC1-OsGID1L2 and observed its subcellular localization, which differed from the original proteins. Phenotypic cues in transgenic rice suggest the potential functions of OsNDC1-OsGID1L2. Moreover, an intriguing chimeric event Os01g0216500-Os01g0216900, generated by a large deletion in basmati rice, also exists in another accession without the deletion, demonstrating its convergence in evolution. Our results illuminate the characteristics and hint at the evolutionary implications of plant chimeric RNAs, which serve as a supplement to genetic variations, thus expanding our understanding of genetic diversity.}, }
@article {pmid39083619, year = {2024}, author = {Rose, SA and Robicheau, BM and Tolman, J and Fonseca-Batista, D and Rowland, E and Desai, D and Ratten, JM and Kantor, EJH and Comeau, AM and Langille, MGI and Jerlström-Hultqvist, J and Devred, E and Sarthou, G and Bertrand, EM and LaRoche, J}, title = {Nitrogen fixation in the widely distributed marine γ-proteobacterial diazotroph Candidatus Thalassolituus haligoni.}, journal = {Science advances}, volume = {10}, number = {31}, pages = {eadn1476}, doi = {10.1126/sciadv.adn1476}, pmid = {39083619}, issn = {2375-2548}, mesh = {*Nitrogen Fixation ; *Gammaproteobacteria/genetics/metabolism/isolation & purification/enzymology/classification ; *Phylogeny ; Nitrogenase/metabolism/genetics ; Seawater/microbiology ; Metagenome ; Oxidoreductases ; }, abstract = {The high diversity and global distribution of heterotrophic bacterial diazotrophs (HBDs) in the ocean has recently become apparent. However, understanding the role these largely uncultured microorganisms play in marine N2 fixation poses a challenge due to their undefined growth requirements and the complex regulation of the nitrogenase enzyme. We isolated and characterized Candidatus Thalassolituus haligoni, a member of a widely distributed clade of HBD belonging to the Oceanospirillales. Analysis of its nifH gene via amplicon sequencing revealed the extensive distribution of Cand. T. haligoni across the Pacific, Atlantic, and Arctic Oceans. Pangenome analysis indicates that the isolate shares >99% identity with an uncultured metagenome-assembled genome called Arc-Gamma-03, recently recovered from the Arctic Ocean. Through combined genomic, proteomic, and physiological approaches, we confirmed that the isolate fixes N2 gas. However, the mechanisms governing nitrogenase regulation in Cand. T. haligoni remain unclear. We propose Cand. T. haligoni as a globally distributed, cultured HBD model species within this understudied clade of Oceanospirillales.}, }
@article {pmid39079170, year = {2024}, author = {Kim, JI and Manuele, A and Maguire, F and Zaheer, R and McAllister, TA and Beiko, R}, title = {Identification of key drivers of antimicrobial resistance in Enterococcus using machine learning.}, journal = {Canadian journal of microbiology}, volume = {}, number = {}, pages = {}, doi = {10.1139/cjm-2024-0049}, pmid = {39079170}, issn = {1480-3275}, abstract = {With antimicrobial resistance (AMR) rapidly evolving in pathogens, quick and accurate identification of genetic determinants of phenotypic resistance is essential for improving surveillance, stewardship, and clinical mitigation. Machine learning (ML) models show promise for AMR prediction in diagnostics but require a deep understanding of internal processes to use effectively. Our study utilized AMR gene, pangenomic, and predicted plasmid features from 647 Enterococcus faecium and Enterococcus faecalis genomes across the One Health continuum, along with corresponding resistance phenotypes, to develop interpretive ML classifiers. Vancomycin resistance could be predicted with 99% accuracy with AMR gene features, 98% with pangenome features, and 96% with plasmid clusters. Top pangenome features overlapped with the resistance genes of the vanA operon, which are often laterally transmitted via plasmids. Doxycycline resistance prediction achieved approximately 92% accuracy with pangenome features, with the top feature being elements of Tn916 conjugative transposon, a tet(M) carrier. Erythromycin resistance prediction models achieved about 90% accuracy, but top features were negatively correlated with resistance due to the confounding effect of population structure. This work demonstrates the importance of reviewing ML models' features to discern biological relevance even when achieving high-performance metrics. Our workflow offers the potential to propose hypotheses for experimental testing, enhancing the understanding of AMR mechanisms, which are crucial for combating the AMR crisis.}, }
@article {pmid39073678, year = {2024}, author = {Gan, S and Ruan, L and Xu, X and Luo, L and Huo, Y and Jiang, J and Zhang, X and Shang, C}, title = {Whole genome sequencing and analysis of Bacillus sp. TTMP2, a tetramethylpyrazine-producing bacterium.}, journal = {Molecular biology reports}, volume = {51}, number = {1}, pages = {863}, pmid = {39073678}, issn = {1573-4978}, support = {AB21220057 and 2021AB27009//Guangxi Key Research and Development Program/ ; }, mesh = {*Bacillus/genetics/metabolism ; *Pyrazines/metabolism ; *Whole Genome Sequencing/methods ; *Genome, Bacterial/genetics ; *Phylogeny ; Metabolic Networks and Pathways/genetics ; Molecular Sequence Annotation ; }, abstract = {BACKGROUND: Tetramethylpyrazine has been extensively studied as an anticancer substance and a flavor substance in the fields of medicine and food industry. A strain with high tetramethylpyrazine production was screened from the fermented grains of Danquan winery. Genome sequencing can reveal the potential roles of bacteria by thoroughly examining the connection between genes and phenotypes from a genomic perspective.
METHODS AND RESULTS: In this study, whole genome of this strain was sequenced and analyzed. This paper summarized the genomic characteristics of strain TTMP2 and analyzed genes related to the synthesis of tetramethylpyrazine. Bacillus sp. TTMP2 has a complete metabolic pathway for acetoin and tetramethylpyrazine metabolism. Gene function was analyzed by COG annotation, GO annotation, KEGG annotation and functional annotations for lipoproteins, carbohydrate-active enzymes, and pathogen-host interactions. Phylogenetic analysis indicated that Bacillus velezensis had the high homology with Bacillus sp. TTMP2. Genomes of 16 Bacillus species cover all genes of Bacillus, suggesting that genus Bacillus has an open pan-genome and can survive in diverse environments.
CONCLUSION: The analysis of genome sequencing data from Bacillus sp. TTMP2 showed that its metabolic characteristics could be deeply understood, indicating that this bacterium had a particular role in tetramethylpyrazine synthesis.}, }
@article {pmid39071359, year = {2024}, author = {Kim, J and Varki, R and Oliva, M and Boucher, C}, title = {Re [2] Pair: Increasing the Scalability of RePair by Decreasing Memory Usage.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.07.11.603142}, pmid = {39071359}, issn = {2692-8205}, abstract = {UNLABELLED: The RePair compression algorithm produces a context-free grammar by iteratively substituting the most frequently occurring pair of consecutive symbols with a new symbol until all consecutive pairs of symbols appear only once in the compressed text. It is widely used in the settings of bioinformatics, machine learning, and information retrieval where random access to the original input text is needed. For example, in pangenomics, RePair is used for random access to a population of genomes. BigRePair improves the scalability of the original RePair algorithm by using Prefix-Free Parsing (PFP) to preprocess the text prior to building the RePair grammar. Despite the efficiency of PFP on repetitive text, there is a scalability issue with the size of the parse which causes a memory bottleneck in BigRePair. In this paper, we design and implement recursive RePair (denoted as Re [2] Pair), which builds the RePair grammar using recursive PFP. Our novel algorithm faces the challenge of constructing the RePair grammar without direct access to the parse of text, relying solely on the dictionary of the text and the parse and dictionary of the parse of the text. We compare Re [2] Pair to BigRePair using SARS-CoV-2 haplotypes and haplotypes from the 1000 Genomes Project. We show that our method Re [2] Pair achieves over a 40% peak memory reduction and a speed up ranging between 12% to 79% compared to BigRePair when compressing the largest input texts in all experiments. Re [2] Pair is made publicly available under the GNU public license here: https://github.com/jkim210/Recursive-RePair.
Theory of computation → Formal languages and automata theory.}, }
@article {pmid39065181, year = {2024}, author = {Romanenko, L and Bystritskaya, E and Savicheva, Y and Eremeev, V and Otstavnykh, N and Kurilenko, V and Velansky, P and Isaeva, M}, title = {Description and Whole-Genome Sequencing of Mariniflexile litorale sp. nov., Isolated from the Shallow Sediments of the Sea of Japan.}, journal = {Microorganisms}, volume = {12}, number = {7}, pages = {}, doi = {10.3390/microorganisms12071413}, pmid = {39065181}, issn = {2076-2607}, support = {15.BRK.21.0004 (Contract No. 075-15-2021-1052)//Ministry of Science and Higher Education, Russian Federation/ ; }, abstract = {A Gram-negative, aerobic, rod-shaped, non-motile, yellow-pigmented bacterium, KMM 9835[T], was isolated from the sediment sample obtained from the Amur Bay of the Sea of Japan seashore, Russia. Phylogenetic analyses based on the 16S rRNA gene and whole genome sequences positioned the novel strain KMM 9835[T] in the genus Mariniflexile as a separate line sharing the highest 16S rRNA gene sequence similarities of 96.6% and 96.2% with Mariniflexile soesokkakense RSSK-9[T] and Mariniflexile fucanivorans SW5[T], respectively, and similarity values of <96% to other recognized Mariniflexile species. The average nucleotide identity and digital DNA-DNA hybridization values between strain KMM 9835[T] and M. soesokkakense KCTC 32427[T], Mariniflexile gromovii KCTC 12570[T], M. fucanivorans DSM 18792[T], and M. maritimum M5A1M[T] were 83.0%, 82.5%, 83.4%, and 78.3% and 30.7%, 29.6%, 29.5%, and 24.4%, respectively. The genomic DNA GC content of strain KMM 9835[T] was 32.5 mol%. The dominant menaquinone was MK-6, and the major fatty acids were iso-C15:0, iso-C15:1ω10c, and C15:0. The polar lipids of strain KMM 9835[T] consisted of phosphatidylethanolamine, two unidentified aminolipids, an unidentified phospholipid, and six unidentified lipids. A pan-genome analysis showed that the KMM 9835[T] genome encoded 753 singletons. The annotated singletons were more often related to transport protein systems (SusC), transcriptional regulators (AraC, LytTR, LacI), and enzymes (glycosylases). The KMM 9835[T] genome was highly enriched in CAZyme-encoding genes, the proportion of which reached 7.3%. Moreover, the KMM 9835[T] genome was characterized by a high abundance of CAZyme gene families (GH43, GH28, PL1, PL10, CE8, and CE12), indicating its potential to catabolize pectin. This may represent part of an adaptation strategy facilitating microbial consumption of plant polymeric substrates in aquatic environments near shorelines and freshwater sources. Based on the combination of phylogenetic and phenotypic characterization, the marine sediment strain KMM 9835[T] (=KCTC 92792[T]) represents a novel species of the genus Mariniflexile, for which the name Mariniflexile litorale sp. nov. is proposed.}, }
@article {pmid39065090, year = {2024}, author = {Tatarenkov, A and Muñoz-Gutiérrez, I and Vargas, I and Behnsen, J and Mota-Bravo, L}, title = {Pangenome Analysis Reveals Novel Contact-Dependent Growth Inhibition System and Phenazine Biosynthesis Operons in Proteus mirabilis BL95 That Are Located in An Integrative and Conjugative Element.}, journal = {Microorganisms}, volume = {12}, number = {7}, pages = {}, doi = {10.3390/microorganisms12071321}, pmid = {39065090}, issn = {2076-2607}, support = {GM136498//National Institute of General Medical Sciences (NIGMS), NIH/ ; }, abstract = {Proteus mirabilis is a leading cause of urinary tract infections and a common commensal of the gastrointestinal tract. Our recent study (JB) showed that P. mirabilis strain BL95 employs a novel contact-dependent killing system against enteric bacteria in the mouse gut and in vitro. To uncover the genetic determinants of this system, we performed whole-genome sequencing of BL95 and compared it with 98 complete genomes of P. mirabilis. BL95 carries 56 coding sequences (CDSs) not found in other P. mirabilis. Over half of these unique genes are located on a novel integrative conjugative element (ICE) named ICEPm2, inserted in tRNA-Phe and exclusive to BL95. ICEPm2 has integration, conjugation, and DNA replication modules nearly identical to ICEPm1 (common in P. mirabilis), but ICEPm2 of BL95 carries two unique operons for P. mirabilis-a phenazine biosynthesis and a contact-dependent growth inhibition (CDI) system. ICEPm2 is absent in the P. mirabilis (AR_0156) closest to BL95 and it is present in the genomes of several Escherichia coli from mouse intestines, indicating its recent horizontal mobilization. BL95 shares over 100 genes of five different secretion systems with other P. mirabilis, mostly poorly studied, making a large pool of candidate genes for the contact-dependent growth inhibition.}, }
@article {pmid39059904, year = {2024}, author = {Peng, J and Xiao, R and Wu, C and Zheng, Z and Deng, Y and Chen, K and Xiang, Y and Xu, C and Zou, L and Liao, M and Zhang, J}, title = {Characterization of the prevalence of Salmonella in different retail chicken supply modes using genome-wide and machine-learning analyses.}, journal = {Food research international (Ottawa, Ont.)}, volume = {191}, number = {}, pages = {114654}, doi = {10.1016/j.foodres.2024.114654}, pmid = {39059904}, issn = {1873-7145}, abstract = {Salmonella is a foodborne pathogen that causes salmonellosis, of which retail chicken meat is a major source. However, the prevalence of Salmonella in different retail chicken supply modes and the threat posed to consumers remains unclear. The prevalence, serotype distribution, antibiotic resistance, and genomic characteristics of Salmonella in three supply modes of retail chicken (live poultry, frozen, and chilled) were investigated using whole-genome sequencing (WGS) and machine learning (ML). In this study, 480 retail chicken samples from live poultry, frozen, and chilled supply modes in Guangzhou from 2020 to 2021, as well as 253 Salmonella isolates (total isolation rate = 53.1 %), were collected. The prevalence of isolates in the live poultry mode (67.5 %, 81/120) was statistically higher than in the frozen (50.0 %, 120/240) and chilled (43.3 %, 52/120) (P < 0.05) modes. Serotype identification showed significant differences in the serotype distribution of Salmonella in different supply modes. S. Enteritis (46.7 %) and S. Indiana (14.2 %) were predominant in the frozen mode. S. Agona (23.5 %) and S. Saintpaul (13.6 %) were predominant in live poultry, while S. Enteritis (40.4 %) and S. Kentucky (17.3 %) were predominant in chilled mode. Antibiotic testing showed that frozen mode isolates were more resistant; the multidrug-resistant (MDR) rate of isolates in the frozen mode reached 91.8 %, significantly higher than in the chilled (86.5 %) and live (74.1 %) (P < 0.05) modes. WGS was performed on 155 top serotypes (S. Enteritidis, S. Kentucky, S. Indiana, and S. Agona). The antibiotic resistance gene analysis showed that the abundance and carrying rate of antibiotic resistance genes of Salmonella in the frozen mode (54 types, 16.1 %) were significantly higher than in other modes (live poultry: 36 types, 9.4 %, P < 0.05; chilled: 31 types, 11.6 %). The blaNDM-1 and blaNDM-9 genes encoding carbapenem resistance were found in frozen mode isolates on a complex transposon consisting of TnAS3-IS26. Virulence factors and plasmid replicons were abundant in the studied frozen mode isolates. In addition, single nucleotide polymorphism (SNP) phylogenetic tree results showed that in the frozen supply mode, the S. Enteritidis clonal clade continued to contaminate retail chicken meat and was homologous to S. Enteritidis strains found in farm chicken embryos, slaughterhouse chicken carcasses, and patients from hospitals in China (SNP 0 = 10). Notably, the pan-genome-based ML model showed that characteristic genes in frozen and live poultry isolates differed. The narZ gene was a key characteristic gene in frozen isolates, encoding nitrate reductase, relating to anaerobic bacterial growth. The ydgJ gene is a key characteristic gene in the live mode and encodes an oxidoreductase related to oxidative function in bacteria. The high prevalence of live poultry mode Salmonella and the transmission of frozen mode MDR Salmonella in this study pose serious risks to food safety and public health, emphasizing the importance of improving disinfection and cold storage measures to reduce Salmonella contamination and transmission. In conclusion, the continued surveillance of Salmonella across different supply models and the development of an epidemiological surveillance system based on WGS is necessary.}, }
@article {pmid39059819, year = {2024}, author = {Radford, EJ and Whitworth, DE}, title = {The genetic basis of predation by myxobacteria.}, journal = {Advances in microbial physiology}, volume = {85}, number = {}, pages = {1-55}, doi = {10.1016/bs.ampbs.2024.04.001}, pmid = {39059819}, issn = {2162-5468}, abstract = {Myxobacteria (phylum Myxococcota) are abundant and virtually ubiquitous microbial predators. Facultatively multicellular organisms, they are able to form multicellular fruiting bodies and swarm across surfaces, cooperatively hunting for prey. Myxobacterial communities are able to kill a wide range of prey microbes, assimilating their biomass to fuel population growth. Their mechanism of predation is exobiotic - hydrolytic enzymes and toxic metabolites are secreted into the extracellular environment, killing and digesting prey cells from without. However, recent observations of single-cell predation and contact-dependent prey killing challenge the dogma of myxobacterial predation being obligately cooperative. Regardless of their predatory mechanisms, myxobacteria have a broad prey range, which includes Gram-negative bacteria, Gram-positive bacteria and fungi. Pangenome analyses have shown that their extremely large genomes are mainly composed of accessory genes, which are not shared by all members of their species. It seems that the diversity of accessory genes in different strains provides the breadth of activity required to prey upon such a smorgasbord of microbes, and also explains the considerable strain-to-strain variation in predatory efficiency against specific prey. After providing a short introduction to general features of myxobacterial biology which are relevant to predation, this review brings together a rapidly growing body of work into the molecular mechanisms and genetic basis of predation, presenting a summary of current knowledge, highlighting trends in research and suggesting strategies by which we can potentially exploit myxobacterial predation in the future.}, }
@article {pmid39059732, year = {2024}, author = {Magome, TG and Ochai, SO and Hassim, A and Bezuidenhout, CC and van Heerden, H and Lekota, KE}, title = {A genome-based investigation of the Priestia species isolated from anthrax endemic regions in Kruger National Park.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {}, number = {}, pages = {105649}, doi = {10.1016/j.meegid.2024.105649}, pmid = {39059732}, issn = {1567-7257}, abstract = {Priestia is a genus that was renamed from the genus Bacillus based on the conserved signature indels (CSIs) in protein sequences that separate Priestia species from Bacillus, with the latter only including closely related species to B. subtilis and B. cereus. Diagnosis of anthrax, a zoonotic disease is implicated by tripartite anthrax virulence genes (lef, pagA, and cya) and poly-γ-D-glutamic acid capsular genes (capABCDE) of Bacillus anthracis. Due to the amplification of anthrax virulence genes in Priestia isolates, the search for homologous anthrax virulence genes within the Priestia spp. (n = 9) isolated from animal blood smears was embarked upon through whole genome sequencing. In silico taxonomic identification of the isolates was conducted using genome taxonomy database (GTDB), average nucleotide identity (ANI), and multi-locus sequence typing (MLST), which identified the genomes as P. aryabhattai (n = 5), P. endophytica (n = 2) and P. megaterium (n = 2). A pan-genome analysis was further employed on the Priestia genomes, including the screening of virulence, antibiotic resistance genes and mobile genetic elements on the sequenced genomes. The oligoribonuclease NrnB protein sequences showed that Priestia spp. possess a unique CSI that is absent in other Bacillus species. Furthermore, the CSI in P. endophytica is unique from other Priestia spp. Pan-genomic analysis indicates that P. endophytica clusters separately from P. aryabhattai and P. megaterium. In silico BLASTn genome analysis using the SYBR primers, Taqman probes and primers that target the chromosomal marker (Ba-1), protective antigen (pagA), and lethal factor (lef) on B. anthracis, showed partial binding to Priestia regions encoding for hypothetical proteins, pyridoxine biosynthesis, hydrolase, and inhibitory proteins. The antibiotic resistance genes (ARG) profile of Priestia spp. showed that the genomes contained no more than two ARGs. This included genes conferring resistance to rifamycin and fosfomycin (P. endophytica) as well as clindamycin (P. aryabhattai and P. megaterium). Priestia genomes lacked B. anthracis plasmids and consisted of plasmid replicon types with unknown functions. Furthermore, the amplification of Priestia strains may result in false positives when qPCR is used to detect the virulence genes of B. anthracis in soil, blood smears, and/or environmental samples.}, }
@article {pmid39058093, year = {2024}, author = {Peñil-Celis, A and Tagg, KA and Webb, HE and Redondo-Salvo, S and Francois Watkins, L and Vielva, L and Griffin, C and Kim, JY and Folster, JP and Garcillan-Barcia, MP and de la Cruz, F}, title = {Mobile genetic elements define the non-random structure of the Salmonella enterica serovar Typhi pangenome.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0036524}, doi = {10.1128/msystems.00365-24}, pmid = {39058093}, issn = {2379-5077}, abstract = {Bacterial relatedness measured using select chromosomal loci forms the basis of public health genomic surveillance. While approximating vertical evolution through this approach has proven exceptionally valuable for understanding pathogen dynamics, it excludes a fundamental dimension of bacterial evolution-horizontal gene transfer. Incorporating the accessory genome is the logical remediation and has recently shown promise in expanding epidemiological resolution for enteric pathogens. Employing k-mer-based Jaccard index analysis, and a novel genome length distance metric, we computed pangenome (i.e., core and accessory) relatedness for the globally important pathogen Salmonella enterica serotype Typhi (Typhi), and graphically express both vertical (homology-by-descent) and horizontal (homology-by-admixture) evolutionary relationships in a reticulate network of over 2,200 U.S. Typhi genomes. This analysis revealed non-random structure in the Typhi pangenome that is driven predominantly by the gain and loss of mobile genetic elements, confirming and expanding upon known epidemiological patterns, revealing novel plasmid dynamics, and identifying avenues for further genomic epidemiological exploration. With an eye to public health application, this work adds important biological context to the rapidly improving ways of analyzing bacterial genetic data and demonstrates the value of the accessory genome to infer pathogen epidemiology and evolution.IMPORTANCEGiven bacterial evolution occurs in both vertical and horizontal dimensions, inclusion of both core and accessory genetic material (i.e., the pangenome) is a logical step toward a more thorough understanding of pathogen dynamics. With an eye to public, and indeed, global health relevance, we couple contemporary tools for genomic analysis with decades of research on mobile genetic elements to demonstrate the value of the pangenome, known and unknown, annotated, and hypothetical, for stratification of Salmonella enterica serovar Typhi (Typhi) populations. We confirm and expand upon what is known about Typhi epidemiology, plasmids, and antimicrobial resistance dynamics, and offer new avenues of exploration to further deduce Typhi ecology and evolution, and ultimately to reduce the incidence of human disease.}, }
@article {pmid39058033, year = {2024}, author = {Mortimer, TD}, title = {mSphere of Influence: Predicting the evolution of pathogen populations.}, journal = {mSphere}, volume = {}, number = {}, pages = {e0043224}, doi = {10.1128/msphere.00432-24}, pmid = {39058033}, issn = {2379-5042}, abstract = {Tatum D. Mortimer works in the field of pathogen population genomics and evolution. In this mSphere of Influence article, she reflects on how "Frequency-dependent selection can forecast evolution in Streptococcus pneumoniae" by Azarian et al. and "Contingency, repeatability, and predictability in the evolution of a prokaryotic pangenome" by Beavan et al. made an impact on her by highlighting the ways in which genomic data can be used to predict pathogen evolution.}, }
@article {pmid39057398, year = {2024}, author = {Cui, H and Fan, S and Ding, W and Zhang, W}, title = {Genomic Analysis of Novel Sulfitobacter Bacterial Strains Isolated from Marine Biofilms.}, journal = {Marine drugs}, volume = {22}, number = {7}, pages = {}, doi = {10.3390/md22070289}, pmid = {39057398}, issn = {1660-3397}, mesh = {*Biofilms ; *Phylogeny ; *Genome, Bacterial ; Genomics/methods ; Aquatic Organisms ; Multigene Family ; }, abstract = {Bacteria from the genus Sulfitobacter are distributed across various marine habitats and play a significant role in sulfur cycling. However, the metabolic features of Sulfitobacter inhabiting marine biofilms are still not well understood. Here, complete genomes and paired metatranscriptomes of eight Sulfitobacter strains, isolated from biofilms on subtidal stones, have been analyzed to explore their central energy metabolism and potential of secondary metabolite biosynthesis. Based on average nucleotide identity and phylogenetic analysis, the eight strains were classified into six novel species and two novel strains. The reconstruction of the metabolic pathways indicated that all strains had a complete Entner-Doudoroff pathway, pentose phosphate pathway, and diverse pathways for amino acid metabolism, suggesting the presence of an optimized central carbon metabolism. Pangenome analysis further revealed the differences between the gene cluster distribution patterns among the eight strains, suggesting significant functional variation. Moreover, a total of 47 biosynthetic gene clusters were discovered, which were further classified into 37 gene cluster families that showed low similarity with previously documented clusters. Furthermore, metatranscriptomic analysis revealed the expressions of key functional genes involved in the biosynthesis of ribosomal peptides in in situ marine biofilms. Overall, this study sheds new light on the metabolic features, adaptive strategies, and value of genome mining in this group of biofilm-associated Sulfitobacter bacteria.}, }
@article {pmid39056703, year = {2024}, author = {Abid, A and Alzahrani, B and Naz, S and Basheer, A and Bakhtiar, SM and Al-Asmari, F and Jamal, SB and Faheem, M}, title = {Reverse Vaccinology Approach to Identify Novel and Immunogenic Targets against Streptococcus gordonii.}, journal = {Biology}, volume = {13}, number = {7}, pages = {}, doi = {10.3390/biology13070510}, pmid = {39056703}, issn = {2079-7737}, abstract = {Streptococcus gordonii is a gram-positive, mutualistic bacterium found in the human body. It is found in the oral cavity, upper respiratory tract, and intestines, and presents a serious clinical problem because it can lead to opportunistic infections in individuals with weakened immune systems. Streptococci are the most prevalent inhabitants of oral microbial communities, and are typical oral commensals found in the human oral cavity. These streptococci, along with many other oral microbes, produce multispecies biofilms that can attach to salivary pellicle components and other oral bacteria via adhesin proteins expressed on the cell surface. Antibiotics are effective against this bacterium, but resistance against antibodies is increasing. Therefore, a more effective treatment is needed. Vaccines offer a promising method for preventing this issue. This study generated a multi-epitope vaccine against Streptococcus gordonii by targeting the completely sequenced proteomes of five strains. The vaccine targets are identified using a pangenome and subtractive proteomic approach. In the present study, 13 complete strains out of 91 strains of S. gordonii are selected. The pangenomics results revealed that out of 2835 pan genes, 1225 are core genes. Out of these 1225 core genes, 643 identified as non-homologous proteins by subtractive proteomics. A total of 20 essential proteins are predicted from non-homologous proteins. Among these 20 essential proteins, only five are identified as surface proteins. The vaccine construct is designed based on selected B- and T-cell epitopes of the antigenic proteins with the help of linkers and adjuvants. The designed vaccine is docked against TLR2. The expression of the protein is determined using in silico gene cloning. Findings concluded that Vaccine I with adjuvant shows higher interactions with TLR2, suggesting that the vaccine has the ability to induce a humoral and cell-mediated response to treat and prevent infection; this makes it promising as a vaccine against infectious diseases caused by S. gordonii. Furthermore, validation of the vaccine construct is required by in vitro and in vivo trials to check its actual potency and safety for use to prevent infectious diseases caused by S. gordonii.}, }
@article {pmid39054411, year = {2024}, author = {Mol, M and de Maayer, P}, title = {Elucidating the biotechnological potential of the genera Parageobacillus and Saccharococcus through comparative genomic and pan-genome analysis.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {723}, pmid = {39054411}, issn = {1471-2164}, mesh = {*Phylogeny ; *Genome, Bacterial ; *Genomics/methods ; *Bacillaceae/genetics/classification ; Biotechnology ; }, abstract = {BACKGROUND: The genus Geobacillus and its associated taxa have been the focal point of numerous thermophilic biotechnological investigations, both at the whole cell and enzyme level. By contrast, comparatively little research has been done on its recently delineated sister genus, Parageobacillus. Here we performed pan-genomic analyses on a subset of publicly available Parageobacillus and Saccharococcus genomes to elucidate their biotechnological potential.
RESULTS: Phylogenomic analysis delineated the compared taxa into two distinct genera, Parageobacillus and Saccharococcus, with P. caldoxylosilyticus isolates clustering with S. thermophilus in the latter genus. Both genera present open pan-genomes, with the species P. toebii being characterized with the highest novel gene accrual. Diversification of the two genera is driven through the variable presence of plasmids, bacteriophages and transposable elements. Both genera present a range of potentially biotechnologically relevant features, including a source of novel antimicrobials, thermostable enzymes including DNA-active enzymes, carbohydrate active enzymes, proteases, lipases and carboxylesterases. Furthermore, they present a number of metabolic pathways pertinent to degradation of complex hydrocarbons and xenobiotics and for green energy production.
CONCLUSIONS: Comparative genomic analyses of Parageobacillus and Saccharococcus suggest that taxa in both of these genera can serve as a rich source of biotechnologically and industrially relevant secondary metabolites, thermostable enzymes and metabolic pathways that warrant further investigation.}, }
@article {pmid39052555, year = {2024}, author = {Chekesa, B and Singh, H and Gonzalez-Juarbe, N and Vashee, S and Wiscovitch-Russo, R and Dupont, CL and Girma, M and Kerro, O and Gumi, B and Ameni, G}, title = {Pangenome and genomic signatures linked to the dominance of the lineage-4 of Mycobacterium tuberculosis isolated from extrapulmonary tuberculosis patients in western Ethiopia.}, journal = {PloS one}, volume = {19}, number = {7}, pages = {e0304060}, doi = {10.1371/journal.pone.0304060}, pmid = {39052555}, issn = {1932-6203}, mesh = {Humans ; Ethiopia/epidemiology ; *Mycobacterium tuberculosis/genetics/isolation & purification ; *Genome, Bacterial ; *Tuberculosis/microbiology/epidemiology/genetics ; Cross-Sectional Studies ; *Genome-Wide Association Study ; Male ; Female ; Adult ; Phylogeny ; Genomics/methods ; Middle Aged ; Young Adult ; Adolescent ; Tuberculosis, Extrapulmonary ; }, abstract = {BACKGROUND: The lineage 4 (L4) of Mycobacterium tuberculosis (MTB) is not only globally prevalent but also locally dominant, surpassing other lineages, with lineage 2 (L2) following in prevalence. Despite its widespread occurrence, factors influencing the expansion of L4 and its sub-lineages remain poorly understood both at local and global levels. Therefore, this study aimed to conduct a pan-genome and identify genomic signatures linked to the elevated prevalence of L4 sublineages among extrapulmonary TB (EPTB) patients in western Ethiopia.
METHODS: A cross-sectional study was conducted at an institutional level involving confirmed cases of extrapulmonary tuberculosis (EPTB) patients from August 5, 2018, to December 30, 2019. A total of 75 MTB genomes, classified under lineage 4 (L4), were used for conducting pan-genome and genome-wide association study (GWAS) analyses. After a quality check, variants were identified using MTBseq, and genomes were de novo assembled using SPAdes. Gene prediction and annotation were performed using Prokka. The pan-genome was constructed using GET_HOMOLOGUES, and its functional analysis was carried out with the Bacterial Pan-Genome Analysis tool (BPGA). For GWAS analysis, Scoary was employed with Benjamini-Hochberg correction, with a significance threshold set at p-value ≤ 0.05.
RESULTS: The analysis revealed a total of 3,270 core genes, predominantly associated with orthologous groups (COG) functions, notably in the categories of '[R] General function prediction only' and '[I] Lipid transport and metabolism'. Conversely, functions related to '[N] Cell motility' and '[Q] Secondary metabolites biosynthesis, transport, and catabolism' were primarily linked to unique and accessory genes. The pan-genome of MTB L4 was found to be open. Furthermore, the GWAS study identified genomic signatures linked to the prevalence of sublineages L4.6.3 and L4.2.2.2.
CONCLUSIONS: Apart from host and environmental factors, the sublineage of L4 employs distinct virulence factors for successful dissemination in western Ethiopia. Given that the functions of these newly identified genes are not well understood, it is advisable to experimentally validate their roles, particularly in the successful transmission of specific L4 sublineages over others.}, }
@article {pmid39051872, year = {2024}, author = {Dost, I and Abdel-Glil, M and Persson, S and Conza, KL and Oleastro, M and Alves, F and Maurischat, S and Scholtzek, A and Mazuet, C and Diancourt, L and Tenson, T and Schmoock, G and Neubauer, H and Schwarz, S and Seyboldt, C}, title = {Genomic study of European Clostridioides difficile ribotype 002/sequence type 8.}, journal = {Microbial genomics}, volume = {10}, number = {7}, pages = {}, doi = {10.1099/mgen.0.001270}, pmid = {39051872}, issn = {2057-5858}, mesh = {*Clostridioides difficile/genetics/classification ; Humans ; *Ribotyping ; *Genome, Bacterial ; *Clostridium Infections/microbiology/epidemiology ; Multilocus Sequence Typing ; Phylogeny ; Animals ; Europe ; Denmark ; Whole Genome Sequencing ; Genomics ; Drug Resistance, Bacterial/genetics ; }, abstract = {Clostridioides difficile has significant clinical importance as a leading cause of healthcare-associated infections, with symptoms ranging from mild diarrhoea to severe colitis, and possible life-threatening complications. C. difficile ribotype (RT) 002, mainly associated with MLST sequence type (ST) 8, is one of the most common RTs found in humans. This study aimed at investigating the genetic characteristics of 537 C. difficile genomes of ST8/RT002. To this end, we sequenced 298 C. difficile strains representing a new European genome collection, with strains from Germany, Denmark, France and Portugal. These sequences were analysed against a global dataset consisting of 1,437 ST8 genomes available through Enterobase. Our results showed close genetic relatedness among the studied ST8 genomes, a diverse array of antimicrobial resistance (AMR) genes and the presence of multiple mobile elements. Notably, the pangenome analysis revealed an open genomic structure. ST8 shows relatively low overall variation. Thus, clonal isolates were found across different One Health sectors (humans, animals, environment and food), time periods, and geographical locations, suggesting the lineage's stability and a universal environmental source. Importantly, this stability did not hinder the acquisition of AMR genes, emphasizing the adaptability of this bacterium to different selective pressures. Although only 2.4 % (41/1,735) of the studied genomes originated from non-human sources, such as animals, food, or the environment, we identified 9 cross-sectoral core genome multilocus sequence typing (cgMLST) clusters. Our study highlights the importance of ST8 as a prominent lineage of C. difficile with critical implications in the context of One Health. In addition, these findings strongly support the need for continued surveillance and investigation of non-human samples to gain a more comprehensive understanding of the epidemiology of C. difficile.}, }
@article {pmid39048791, year = {2024}, author = {Garg, V and Bohra, A and Mascher, M and Spannagl, M and Xu, X and Bevan, MW and Bennetzen, JL and Varshney, RK}, title = {Unlocking plant genetics with telomere-to-telomere genome assemblies.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {39048791}, issn = {1546-1718}, abstract = {Contiguous genome sequence assemblies will help us to realize the full potential of crop translational genomics. Recent advances in sequencing technologies, especially long-read sequencing strategies, have made it possible to construct gapless telomere-to-telomere (T2T) assemblies, thus offering novel insights into genome organization and function. Plant genomes pose unique challenges, such as a continuum of ancient to recent polyploidy and abundant highly similar and long repetitive elements. Owing to progress in sequencing approaches, for most crop plants, chromosome-scale reference genome assemblies are available, but T2T assembly construction remains challenging. Here we describe methods for haplotype-resolved, gapless T2T assembly construction in plants, including various crop species. We outline the impact of T2T assemblies in elucidating the roles of repetitive elements in gene regulation, as well as in pangenomics, functional genomics, genome-assisted breeding and targeted genome manipulation. In conjunction with sequence-enriched germplasm repositories, T2T assemblies thus hold great promise for basic and applied plant sciences.}, }
@article {pmid39046316, year = {2024}, author = {Hong, UVT and Tamiru-Oli, M and Hurgobin, B and Lewsey, MG}, title = {Genomic and Cell-Specific Regulation of Benzylisoquinoline Alkaloid Biosynthesis in Opium Poppy.}, journal = {Journal of experimental botany}, volume = {}, number = {}, pages = {}, doi = {10.1093/jxb/erae317}, pmid = {39046316}, issn = {1460-2431}, abstract = {Opium poppy is a crop of great commercial value as a source of several opium alkaloids for the pharmaceutical industries including morphine, codeine, thebaine, noscapine and papaverine. Most enzymes involved in benzylisoquinoline alkaloids (BIAs) biosynthesis in opium poppy have been functionally characterized, and opium poppy currently serves as a model system to study BIA metabolism in plants. BIA biosynthesis in opium poppy involves two biosynthetic gene clusters associated respectively with the morphine and noscapine branches. Recent reports have shown that genes in the same cluster are co-expressed, suggesting they might also be co-regulated. However, the transcriptional regulation of opium poppy BIA biosynthesis is not well studied. Opium poppy BIA biosynthesis involves three cell types associated with the phloem system: companion cells, sieve elements and laticifers. The transcripts and enzymes associated with BIA biosynthesis are distributed across cell types, requiring the translocation of key enzymes and pathway intermediates between cell types. Together, these suggest that the regulation of BIA biosynthesis in opium poppy is multilayered and complex, involving biochemical, genomic, and physiological mechanisms. In this review, we highlight recent advances in genome sequencing and single cell and spatial transcriptomics with a focus on how these efforts can improve our understanding of the genomic and cell-specific regulation of BIA biosynthesis. Such knowledge is vital for opium poppy genetic improvement and metabolic engineering efforts targeting the modulation of alkaloid yield and composition.}, }
@article {pmid39044985, year = {2024}, author = {Chintakovid, N and Singkhamanan, K and Yaikhan, T and Nokchan, N and Wonglapsuwan, M and Jitpakdee, J and Kantachote, D and Surachat, K}, title = {Probiogenomic analysis of Lactiplantibacillus plantarum SPS109: A potential GABA-producing and cholesterol-lowering probiotic strain.}, journal = {Heliyon}, volume = {10}, number = {13}, pages = {e33823}, pmid = {39044985}, issn = {2405-8440}, abstract = {Lactiplantibacillus plantarum SPS109, an isolated strain of lactic acid bacteria (LAB) from fermented foods, showed remarkable potential as a probiotic with dual capabilities in γ-aminobutyric acid (GABA) production and cholesterol reduction. This study employs genomic and comparative analyses to search into the strain's genetic profile, safety features, and probiotic attributes. The safety assessment reveals the absence of virulence factors and antimicrobial resistance genes, while the genome uncovers bacteriocin-related elements, including sactipeptides and a cluster for putative plantaricins, strengthening its ability to combat diverse pathogens. Pangenome analysis revealed unique bacteriocin-related genes, specifically lcnD and bcrA, distinguishing SPS109 from four other L. plantarum strains producing GABA. In addition, genomic study emphasizes SPS109 strain distinctive features, two GABA-related genes responsible for GABA production and a bile tolerance gene (cbh) crucial for cholesterol reduction. Additionally, the analysis highlights several genes of potential probiotic properties, including stress tolerance, vitamin production, and antioxidant activity. In summary, L. plantarum SPS109 emerges as a promising probiotic candidate with versatile applications in the food and beverage industries, supported by its unique genomic features and safety profile.}, }
@article {pmid39042999, year = {2024}, author = {L Rocha, J and Lou, RN and Sudmant, PH}, title = {Structural variation in humans and our primate kin in the era of telomere-to-telomere genomes and pangenomics.}, journal = {Current opinion in genetics & development}, volume = {87}, number = {}, pages = {102233}, doi = {10.1016/j.gde.2024.102233}, pmid = {39042999}, issn = {1879-0380}, abstract = {Structural variants (SVs) account for the majority of base pair differences both within and between primate species. However, our understanding of inter- and intra-species SV has been historically hampered by the quality of draft primate genomes and the absence of genome resources for key taxa. Recently, advances in long-read sequencing and genome assembly have begun to radically reshape our understanding of SVs. Two landmark achievements include the publication of a human telomere-to-telomere (T2T) genome as well as the development of the first human pangenome reference. In this review, we first look back to the major works laying the foundation for these projects. We then examine the ways in which T2T genome assemblies and pangenomes are transforming our understanding of and approach to primate SV. Finally, we discuss what the future of primate SV research may look like in the era of T2T genomes and pangenomics.}, }
@article {pmid39041615, year = {2024}, author = {Li, H and Marin, M and Farhat, MR}, title = {Exploring gene content with pangene graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btae456}, pmid = {39041615}, issn = {1367-4811}, abstract = {MOTIVATION: The gene content regulates the biology of an organism. It varies between species and between individuals of the same species. Although tools have been developed to identify gene content changes in bacterial genomes, none is applicable to collections of large eukaryotic genomes such as the human pangenome.
RESULTS: We developed pangene, a computational tool to identify gene orientation, gene order and gene copy-number changes in a collection of genomes. Pangene aligns a set of input protein sequences to the genomes, resolves redundancies between protein sequences and constructs a gene graph with each genome represented as a walk in the graph. It additionally finds subgraphs, which we call bibubbles, that capture gene content changes. Applied to the human pangenome, pangene identifies known gene-level variations and reveals complex haplotypes that are not well studied before. Pangene also works with high-quality bacterial pangenome and reports similar numbers of core and accessory genes in comparison to existing tools.
Source code at https://github.com/lh3/pangene; pre-built pangene graphs can be downloaded from https://zenodo.org/records/8118576 and visualized at https://pangene.bioinweb.org.}, }
@article {pmid39037482, year = {2024}, author = {Mazwi, KD and Lekota, KE and Glover, BA and Kolo, FB and Hassim, A and Rossouw, J and Jonker, A and Wojno, JM and Profiti, G and Martelli, PL and Casadio, R and Zilli, K and Janowicz, A and Marotta, F and Garofolo, G and van Heerden, H}, title = {Whole Genome Sequence Analysis of Brucella spp. from Human, Livestock, and Wildlife in South Africa.}, journal = {Journal of microbiology (Seoul, Korea)}, volume = {}, number = {}, pages = {}, pmid = {39037482}, issn = {1976-3794}, support = {98651//National Research Foundation/ ; }, abstract = {Brucellosis is an economically important zoonotic disease affecting humans, livestock, and wildlife health globally and especially in Africa. Brucella abortus and B. melitensis have been isolated from human, livestock (cattle and goat), and wildlife (sable) in South Africa (SA) but with little knowledge of the population genomic structure of this pathogen in SA. As whole genome sequencing can assist to differentiate and trace the origin of outbreaks of Brucella spp. strains, the whole genomes of retrospective isolates (n = 19) from previous studies were sequenced. Sequences were analysed using average nucleotide identity (ANI), pangenomics, and whole genome single nucleotide polymorphism (wgSNP) to trace the geographical origin of cases of brucellosis circulating in human, cattle, goats, and sable from different provinces in SA. Pangenomics analysis of B. melitensis (n = 69) and B. abortus (n = 56) was conducted with 19 strains that included B. abortus from cattle (n = 3) and B. melitensis from a human (n = 1), cattle (n = 1), goat (n = 1), Rev1 vaccine strain (n = 1), and sable (n = 12). Pangenomics analysis of B. melitensis genomes, highlighted shared genes, that include 10 hypothetical proteins and genes that encodes for acetyl-coenzyme A synthetase (acs), and acylamidase (aam) amongst the sable genomes. The wgSNP analysis confirmed the B. melitensis isolated from human was more closely related to the goat from the Western Cape Province from the same outbreak than the B. melitensis cattle sample from different cases in the Gauteng Province. The B. melitensis sable strains could be distinguished from the African lineage, constituting their own African sub-clade. The sequenced B. abortus strains clustered in the C2 lineage that is closely related to the isolates from Mozambique and Zimbabwe. This study identified genetically diverse Brucella spp. among various hosts in SA. This study expands the limited known knowledge regarding the presence of B. melitensis in livestock and humans in SA, further building a foundation for future research on the distribution of the Brucella spp. worldwide and its evolutionary background.}, }
@article {pmid39035534, year = {2024}, author = {Gheorghe-Barbu, I and Surleac, M and Barbu, IC and Paraschiv, S and Bănică, LM and Rotaru, LI and Vrâncianu, CO and Niță Lazăr, M and Oțelea, D and Chifiriuc, MC}, title = {Decoding the resistome, virulome and mobilome of clinical versus aquatic Acinetobacter baumannii in southern Romania.}, journal = {Heliyon}, volume = {10}, number = {13}, pages = {e33372}, pmid = {39035534}, issn = {2405-8440}, abstract = {Acinetobacter baumannii, a notorious opportunistic pathogen, presents a formidable challenge in both clinical and environmental fields due to its resilience and ability to acquire resistance. This study undertook a comprehensive analysis of 183 A. baumannii isolates collected between 2019 and 2022 from intra-hospital infections (IHI), hospital sewages (Hs), wastewater treatment plants (WWTP), and adjacent river waters from two Southern cities, focusing on their resistome, virulome, and mobilome through isolation on chromogenic media, identification by MALDI-TOF-MS and antibiotic susceptibility testing by disk diffusion) followed by genotypic characterization [Whole Genome Sequencing (WGS), 3rd generation sequencing through the MinION (ONT) platform, pangenome description, and respectively horizontal gene transfer through conjugation assays]. Our findings reveal significant genomic plasticity and the prevalence of high-risk international clones, underlining the potential of these isolates to act as reservoirs for antibiotic resistance genes (ARGs) that could be dynamically exchanged between clinical and environmental settings through mobile genetic elements (MGEs) such as the pMAL1 plasmids and the critical role of WWTPs in the persistence and spread of A. baumannii. Moreover, our study presents the first report of the co-occurrence of bla OXA-23 and bla OXA-72 in A. baumannii ST2 clone. Thus, our research underscores the necessity for integrated surveillance and targeted interventions across healthcare and environmental sectors to mitigate the risk posed by this adaptable pathogen.}, }
@article {pmid39033143, year = {2024}, author = {Xiang, Y and Zhu, K and Min, K and Zhang, Y and Liu, J and Liu, K and Han, Y and Li, X and Du, X and Wang, X and Huang, Y and Li, X and Peng, Y and Yang, C and Liu, H and Liu, H and Li, X and Wang, H and Wang, C and Wang, Q and Jia, H and Yang, M and Wang, L and Wu, Y and Cui, Y and Chen, F and Yang, H and Baker, S and Xu, X and Yang, J and Song, H and Qiu, S}, title = {Characterization of a Salmonella enterica serovar Typhimurium lineage with rough colony morphology and multidrug resistance.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {6123}, pmid = {39033143}, issn = {2041-1723}, support = {82173580//National Natural Science Foundation of China (National Science Foundation of China)/ ; 82202538//National Natural Science Foundation of China (National Science Foundation of China)/ ; }, mesh = {*Salmonella typhimurium/genetics/drug effects/isolation & purification ; *Drug Resistance, Multiple, Bacterial/genetics ; *Phylogeny ; *Anti-Bacterial Agents/pharmacology ; *Whole Genome Sequencing ; *Biofilms/growth & development/drug effects ; China ; *Genome, Bacterial/genetics ; Plasmids/genetics ; Microbial Sensitivity Tests ; Humans ; Salmonella Infections/microbiology ; }, abstract = {Salmonella enterica serovar Typhimurium (S. Typhimurium) is a major cause of salmonellosis, and the emergence of multidrug-resistant pathovariants has become a growing concern. Here, we investigate a distinct rough colony variant exhibiting a strong biofilm-forming ability isolated in China. Whole-genome sequencing on 2,212 Chinese isolates and 1,739 publicly available genomes reveals the population structure and evolutionary history of the rough colony variants. Characterized by macro, red, dry, and rough (mrdar) colonies, these variants demonstrate enhanced biofilm formation at 28 °C and 37 °C compared to typical rdar colonies. The mrdar variants exhibit extensive multidrug resistance, with significantly higher resistance to at least five classes of antimicrobial agents compared to non-mrdar variants. This resistance is primarily conferred by an IncHI2 plasmid harboring 19 antimicrobial resistance genes. Phylogenomic analysis divides the global collections into six lineages. The majority of mrdar variants belong to sublineage L6.5, which originated from Chinese smooth colony strains and possibly emerged circa 1977. Among the mrdar variants, upregulation of the csgDEFG operons is observed, probably due to a distinct point mutation (-44G > T) in the csgD gene promoter. Pangenome and genome-wide association analyses identify 87 specific accessory genes and 72 distinct single nucleotide polymorphisms associated with the mrdar morphotype.}, }
@article {pmid39029396, year = {2024}, author = {Fan, X and Kong, L and Wang, J and Tan, Y and Xu, X and Li, M and Zhu, L}, title = {Surface-programmed microbiome assembly in phycosphere to microplastics contamination.}, journal = {Water research}, volume = {262}, number = {}, pages = {122064}, doi = {10.1016/j.watres.2024.122064}, pmid = {39029396}, issn = {1879-2448}, abstract = {Recalcitrance in microplastics accounts for ubiquitous white pollution. Of special interest are the capabilities of microorganisms to accelerate their degradation sustainably. Compared to the well-studied pure cultures in degrading natural polymers, the algal-bacterial symbiotic system is considered as a promising candidate for microplastics removal, cascading bottom-up impacts on ecosystem-scale processes. This study selected and enriched the algae-associated microbial communities hosted by the indigenous isolation Desmodesmus sp. in wastewater treatment plants with micro-polyvinyl chloride, polyethylene terephthalate, polyethylene, and polystyrene contamination. Results elaborated that multiple settled and specific affiliates were recruited by the uniform algae protagonist from the biosphere under manifold microplastic stress. Alteration of distinct chemical functionalities and deformation of polymers provide direct evidence of degradation in phycosphere under illumination. Microplastic-induced phycosphere-derived DOM created spatial gradients of aromatic protein, fulvic and humic acid-like and tryptophan components to expanded niche-width. Surface thermodynamic analysis was conducted to simulate the reciprocal and reversible interaction on algal-bacterial and phycosphere-microplastic interface, revealing the enhancement of transition to stable and irreversible aggregation for functional microbiota colonization and microplastics capture. Furthermore, pangenomic analysis disclosed the genes related to the chemotaxis and the proposed microplastics biodegradation pathway in enriched algal-bacterial microbiome, orchestrating the evidence for common synthetic polymer particles and ultimately to confirm the effectiveness and potential. The present study emphasizes the necessity for future endeavors aimed at fully leveraging the potential of algal-bacterial mutualistic systems within sustainable bioremediation strategies targeting the eradication of microplastic waste.}, }
@article {pmid39026826, year = {2024}, author = {Hatmaker, EA and Barber, AE and Drott, MT and Sauters, TJC and Alastruey-Izquierdo, A and Garcia-Hermoso, D and Kurzai, O and Rokas, A}, title = {Pathogenicity is associated with population structure in a fungal pathogen of humans.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.07.05.602241}, pmid = {39026826}, issn = {2692-8205}, abstract = {Aspergillus flavus is a clinically and agriculturally important saprotrophic fungus responsible for severe human infections and extensive crop losses. We analyzed genomic data from 250 (95 clinical and 155 environmental) A. flavus isolates from 9 countries, including 70 newly sequenced clinical isolates, to examine population and pan-genome structure and their relationship to pathogenicity. We identified five A. flavus populations, including a new population, D, corresponding to distinct clades in the genome-wide phylogeny. Strikingly, > 75% of clinical isolates were from population D. Accessory genes, including genes within biosynthetic gene clusters, were significantly more common in some populations but rare in others. Population D was enriched for genes associated with zinc ion binding, lipid metabolism, and certain types of hydrolase activity. In contrast to the major human pathogen Aspergillus fumigatus , A. flavus pathogenicity in humans is strongly associated with population structure, making it a great system for investigating how population-specific genes contribute to pathogenicity.}, }
@article {pmid39019985, year = {2024}, author = {Kusza, S and Badaoui, B and Wanjala, G}, title = {Insights into the genomic homogeneity of Moroccan indigenous sheep breeds though the lens of runs of homozygosity.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {16515}, pmid = {39019985}, issn = {2045-2322}, support = {2021-1.2.4-TÉT-2021-00014//National Development, Research and Innovation Fund/ ; Hungarian Eötvös State Scholarship//Tempus Public Foundation/ ; Stipendium Hungaricum Scholarship//Tempus Public Foundation/ ; }, mesh = {Animals ; Morocco ; *Homozygote ; *Breeding ; Sheep/genetics ; Genomics/methods ; Genome ; Polymorphism, Single Nucleotide ; Genetics, Population ; Sheep, Domestic/genetics ; Whole Genome Sequencing ; }, abstract = {Numerous studies have indicated that Morocco's indigenous sheep breeds are genetically homogenous, posing a risk to their survival in the challenging harsh climate conditions where they predominantly inhabit. To understand the genetic behind genetic homogeneity through the lens of runs of homozygosity (ROH), we analyzed the whole genome sequences of five indigenous sheep breeds (Beni Guil, Ouled Djellal, D'man, Sardi, Timahdite and Admixed).The results from principal component, admixture, Fst, and neighbour joining tree analyses consistently showed a homogenous genetic structure. This structure was characterized by an average length of 1.83 Mb for runs of homozygosity (ROH) segments, with a limited number of long ROH segments (24-48 Mb and > 48 Mb). The most common ROH segments were those ranging from 1-6 Mb. The most significant regions of homozygosity (ROH Islands) were mostly observed in two chromosomes, namely Chr1 and Chr5. Specifically, ROH Islands were exclusively discovered in the Ouled Djellal breed on Chr1, whereas Chr5 exhibited ROH Islands in all breeds. The analysis of ROH Island and iHS technique was employed to detect signatures of selection on Chr1 and Chr5. The results indicate that Chr5 had a high level of homogeneity, with the same genes being discovered across all breeds. In contrast, Chr1 displays some genetic variances between breeds. Genes identified on Chr5 included SLC39A1, IL23A, CAST, IL5, IL13, and IL4 which are responsible for immune response while genes identified on Chr1 include SOD1, SLAMF9, RTP4, CLDN1, and PRKAA2. ROH segment profile and effective population sizes patterns suggests that the genetic uniformity of studied breeds is the outcome of events that transpired between 250 and 300 generations ago. This research not only contributes to the understanding of ROH distribution across breeds but helps design and implement native sheep breeding and conservation strategies in Morocco. Future research, incorporating a broader sample size and utilizing the pangenome for reference, is recommended to further elucidate these breeds' genomic landscapes and adaptive mechanisms.}, }
@article {pmid39016539, year = {2024}, author = {Machado, E and Vasconcellos, S and Gomes, L and Catanho, M and Ramos, J and de Carvalho, L and Goldenberg, T and Redner, P and Caldas, P and Campos, C and Dalcolmo, M and Lourenço, MC and Lasunskaia, E and Mussi, V and Spinassé, L and Vinhas, S and Rigouts, L and Cogneau, S and de Rijk, P and Utpatel, C and Kaustova, J and van der Laan, T and de Neeling, H and Rastogi, N and Levina, K and Kütt, M and Mokrousov, I and Zhuravlev, V and Makhado, N and Žolnir-Dovč, M and Jankovic, V and de Waard, J and Sisco, MC and van Soolingen, D and Niemann, S and de Jong, BC and Meehan, CJ and Suffys, P}, title = {Phylogenomic and genomic analysis reveals unique and shared genetic signatures of Mycobacterium kansasii complex species.}, journal = {Microbial genomics}, volume = {10}, number = {7}, pages = {}, doi = {10.1099/mgen.0.001266}, pmid = {39016539}, issn = {2057-5858}, mesh = {*Mycobacterium kansasii/genetics/classification/isolation & purification ; *Phylogeny ; Humans ; *Genome, Bacterial ; *Mycobacterium Infections, Nontuberculous/microbiology ; *Genomics ; Animals ; Virulence/genetics ; }, abstract = {Species belonging to the Mycobacterium kansasii complex (MKC) are frequently isolated from humans and the environment and can cause serious diseases. The most common MKC infections are caused by the species M. kansasii (sensu stricto), leading to tuberculosis-like disease. However, a broad spectrum of virulence, antimicrobial resistance and pathogenicity of these non-tuberculous mycobacteria (NTM) are observed across the MKC. Many genomic aspects of the MKC that relate to these broad phenotypes are not well elucidated. Here, we performed genomic analyses from a collection of 665 MKC strains, isolated from environmental, animal and human sources. We inferred the MKC pangenome, mobilome, resistome, virulome and defence systems and show that the MKC species harbours unique and shared genomic signatures. High frequency of presence of prophages and different types of defence systems were observed. We found that the M. kansasii species splits into four lineages, of which three are lowly represented and mainly in Brazil, while one lineage is dominant and globally spread. Moreover, we show that four sub-lineages of this most distributed M. kansasii lineage emerged during the twentieth century. Further analysis of the M. kansasii genomes revealed almost 300 regions of difference contributing to genomic diversity, as well as fixed mutations that may explain the M. kansasii's increased virulence and drug resistance.}, }
@article {pmid39013614, year = {2024}, author = {Prigozhin, DM and Sutherland, CA and Rangavajjhala, S and Krasileva, KV}, title = {Majority of the highly variable NLRs in maize share genomic location and contain additional target-binding domains.}, journal = {Molecular plant-microbe interactions : MPMI}, volume = {}, number = {}, pages = {}, doi = {10.1094/MPMI-05-24-0047-FI}, pmid = {39013614}, issn = {0894-0282}, abstract = {Nucleotide-binding, Leucine Rich Repeat proteins (NLRs) are a major class of immune receptors in plants. NLRs include both conserved and rapidly evolving members, however their evolutionary trajectory in crops remains understudied. Availability of crop pan-genomes enables analysis of the recent events in the evolution of this highly complex gene family within domesticated species. Here, we investigated the NLR complement of 26 nested association mapping (NAM) founder lines of maize. We found that maize has just four main subfamilies containing rapidly evolving highly variable NLR (hvNLR) receptors. Curiously, three of these phylogenetically distinct hvNLR lineages are located in adjacent clusters on chromosome 10. Members of the same hvNLR clade show variable expression and methylation across lines and tissues, consistent with their rapid evolution. By combining sequence diversity analysis and AlphaFold2 computational structure prediction we predicted ligand binding sites in the hvNLRs. We also observed novel insertion domains in the LRR regions of two hvNLR subfamilies that likely contribute to target recogniton. To make this analysis accessible, we created NLRCladeFinder, a Google Colaboratory notebook, that accepts any newly identified NLR sequence, places it in the evolutionary context of the maize pan-NLRome, and provides an updated clade alignment, phylogenetic tree, and sequence diversity information for the gene of interest.}, }
@article {pmid39013594, year = {2024}, author = {Chandra, G and Gibney, D and Jain, C}, title = {Haplotype-aware sequence alignment to pangenome graphs.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.279143.124}, pmid = {39013594}, issn = {1549-5469}, abstract = {Modern pangenome graphs are built using haplotype-resolved genome assemblies. When mapping reads to a pangenome graph, prioritizing alignments that are consistent with the known haplotypes improves genotyping accuracy. However, the existing rigorous formulations for co-linear chaining and alignment problems do not consider the haplotype paths in a pangenome graph. This often leads to spurious read alignments to those paths that are unlikely recombinations of the known haplotypes. In this paper, we develop novel formulations and algorithms for sequence-to-graph alignment and chaining problems. Inspired by the genotype imputation models, we assume that a query sequence is an imperfect mosaic of reference haplotypes. Accordingly, we introduce a recombination penalty in the scoring functions for each haplotype switch. First, we solve haplotype-aware sequence-to-graph alignment in O(|Q||E||H|) time, where Q is the query sequence, E is the set of edges, and H is the set of haplotypes represented in the graph. To complement our solution, we prove that an algorithm significantly faster than O(|Q||E||H|) is impossible under the Strong Exponential Time Hypothesis (SETH). Second, we propose a haplotype-aware chaining algorithm that runs in O(|H|N log|H|N) time after graph preprocessing, where N is the count of input anchors. We then establish that a chaining algorithm significantly faster than O(|H|N) is impossible under SETH. As a proof-of-concept, we implemented our chaining algorithm in the Minichain aligner. By aligning sequences sampled from the human major histocompatibility complex (MHC) to a pangenome graph of 60 MHC haplotypes, we demonstrate that our algorithm achieves better consistency with ground-truth recombinations when compared to a haplotype-agnostic algorithm.}, }
@article {pmid39014339, year = {2024}, author = {Uzuner, H and Paschen, A and Schadendorf, D and Köster, J}, title = {Orthanq: transparent and uncertainty-aware haplotype quantification with application in HLA-typing.}, journal = {BMC bioinformatics}, volume = {25}, number = {1}, pages = {240}, pmid = {39014339}, issn = {1471-2105}, support = {RTG 2535//Deutsche Forschungsgemeinschaft/ ; 70113455//Deutsche Krebshilfe/ ; }, abstract = {BACKGROUND: Identification of human leukocyte antigen (HLA) types from DNA-sequenced human samples is important in organ transplantation and cancer immunotherapy and remains a challenging task considering sequence homology and extreme polymorphism of HLA genes.
RESULTS: We present Orthanq, a novel statistical model and corresponding application for transparent and uncertainty-aware quantification of haplotypes. We utilize our approach to perform HLA typing while, for the first time, reporting uncertainty of predictions and transparently observing mutations beyond reported HLA types. Using 99 gold standard samples from 1000 Genomes, Illumina Platinum Genomes and Genome In a Bottle projects, we show that Orthanq can provide overall superior accuracy and shorter runtimes than state-of-the-art HLA typers.
CONCLUSIONS: Orthanq is the first approach that allows to directly utilize existing pangenome alignments and type all HLA loci. Moreover, it can be generalized for usages beyond HLA typing, e.g. for virus lineage quantification. Orthanq is available under https://orthanq.github.io .}, }
@article {pmid39012166, year = {2024}, author = {Ceres, K and Zehr, JD and Murrell, C and Millet, JK and Sun, Q and McQueary, HC and Horton, A and Cazer, C and Sams, K and Reboul, G and Andreopoulos, WB and Mitchell, PK and Anderson, R and Franklin-Guild, R and Cronk, BD and Stanhope, BJ and Burbick, CR and Wolking, R and Peak, L and Zhang, Y and McDowall, R and Krishnamurthy, A and Slavic, D and Sekhon, Pk and Tyson, GH and Ceric, O and Stanhope, MJ and Goodman, LB}, title = {Evolutionary genomic analyses of canine E. coli infections identify a relic capsular locus associated with resistance to multiple classes of antimicrobials.}, journal = {Applied and environmental microbiology}, volume = {}, number = {}, pages = {e0035424}, doi = {10.1128/aem.00354-24}, pmid = {39012166}, issn = {1098-5336}, abstract = {UNLABELLED: Infections caused by antimicrobial-resistant Escherichia coli are the leading cause of death attributed to antimicrobial resistance (AMR) worldwide, and the known AMR mechanisms involve a range of functional proteins. Here, we employed a pan-genome wide association study (GWAS) approach on over 1,000 E. coli isolates from sick dogs collected across the US and Canada and identified a strong statistical association (empirical P < 0.01) of AMR, involving a range of antibiotics to a group 1 capsular (CPS) gene cluster. This cluster included genes under relaxed selection pressure, had several loci missing, and had pseudogenes for other key loci. Furthermore, this cluster is widespread in E. coli and Klebsiella clinical isolates across multiple host species. Earlier studies demonstrated that the octameric CPS polysaccharide export protein Wza can transmit macrolide antibiotics into the E. coli periplasm. We suggest that the CPS in question, and its highly divergent Wza, functions as an antibiotic trap, preventing antimicrobial penetration. We also highlight the high diversity of lineages circulating in dogs across all regions studied, the overlap with human lineages, and regional prevalence of resistance to multiple antimicrobial classes.
IMPORTANCE: Much of the human genomic epidemiology data available for E. coli mechanism discovery studies has been heavily biased toward shiga-toxin producing strains from humans and livestock. E. coli occupies many niches and produces a wide variety of other significant pathotypes, including some implicated in chronic disease. We hypothesized that since dogs tend to share similar strains with their owners and are treated with similar antibiotics, their pathogenic isolates will harbor unexplored AMR mechanisms of importance to humans as well as animals. By comparing over 1,000 genomes with in vitro antimicrobial susceptibility data from sick dogs across the US and Canada, we identified a strong multidrug resistance association with an operon that appears to have once conferred a type 1 capsule production system.}, }
@article {pmid39012116, year = {2024}, author = {Jespersen, MG and Hayes, AJ and Tong, SYC and Davies, MR}, title = {Pangenome evaluation of gene essentiality in Streptococcus pyogenes.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0324023}, doi = {10.1128/spectrum.03240-23}, pmid = {39012116}, issn = {2165-0497}, abstract = {Bacterial species often consist of strains with variable gene content, collectively referred to as the pangenome. Variations in the genetic makeup of strains can alter bacterial physiology and fitness. To define biologically relevant genes of a genome, genome-wide transposon mutant libraries have been used to identify genes essential for survival or virulence in a given strain. Such phenotypic studies have been conducted in four different genotypes of the human pathogen Streptococcus pyogenes, yet challenges exist in comparing results across studies conducted in different genetic backgrounds and conditions. To advance genotype to phenotype inferences across different S. pyogenes strains, we built a pangenome database of 249 S. pyogenes reference genomes. We systematically re-analyzed publicly available transposon sequencing datasets from S. pyogenes using a transposon sequencing-specific analysis pipeline, Transit. Across four genetic backgrounds and nine phenotypic conditions, 355 genes were essential for survival, corresponding to ~24% of the core genome. Clusters of Orthologous Genes (COG) categories related to coenzyme and lipid transport and growth functions were overrepresented as essential. Finally, essential operons across S. pyogenes genotypes were defined, with an increased number of essential operons detected under in vivo conditions. This study provides an extendible database to which new studies can be added, and a searchable html-based resource to direct future investigations into S. pyogenes biology.IMPORTANCEStreptococcus pyogenes is a human-adapted pathogen occupying restricted ecological niches. Understanding the essentiality of genes across different strains and experimental conditions is important to direct research questions and efforts to prevent the large burden of disease caused by S. pyogenes. To this end we systematically reanalyzed transposon sequencing studies in S. pyogenes using transposon sequencing-specific methods, integrating them into an extendible meta-analysis framework. This provides a repository of gene essentiality in S. pyogenes which was used to highlight specific genes of interest and for the community to guide future phenotypic studies.}, }
@article {pmid39011297, year = {2024}, author = {Brejová, B and Gagie, T and Herencsárová, E and Vinař, T}, title = {Maximum-scoring path sets on pangenome graphs of constant treewidth.}, journal = {Frontiers in bioinformatics}, volume = {4}, number = {}, pages = {1391086}, pmid = {39011297}, issn = {2673-7647}, abstract = {We generalize a problem of finding maximum-scoring segment sets, previously studied by Csűrös (IEEE/ACM Transactions on Computational Biology and Bioinformatics, 2004, 1, 139-150), from sequences to graphs. Namely, given a vertex-weighted graph G and a non-negative startup penalty c, we can find a set of vertex-disjoint paths in G with maximum total score when each path's score is its vertices' total weight minus c. We call this new problem maximum-scoring path sets (MSPS). We present an algorithm that has a linear-time complexity for graphs with a constant treewidth. Generalization from sequences to graphs allows the algorithm to be used on pangenome graphs representing several related genomes and can be seen as a common abstraction for several biological problems on pangenomes, including searching for CpG islands, ChIP-seq data analysis, analysis of region enrichment for functional elements, or simple chaining problems.}, }
@article {pmid39011009, year = {2024}, author = {Montecillo, JAV}, title = {Genomics of the Thermophilic Bacterium Thermosulfidibacter takaii Reveals Novel Lineage of Deep-Branching Bacterial Phylum.}, journal = {Indian journal of microbiology}, volume = {64}, number = {2}, pages = {762-772}, pmid = {39011009}, issn = {0046-8991}, abstract = {UNLABELLED: The thermophilic bacterium Thermosulfidibacter takaii is affiliated to the deep-branching bacterial lineage in the phylum Aquificota. However, the recent taxonomic study of the phylum Aquificota revealed that T. takaii has no specific association with the phylum. The fact that T. takaii is considered an important model organism for studying the evolution and kinetics of ancestral carbon metabolism pathways, its proper classification is therefore of significant interest. In this work, phylogenomics and comparative genomic analyses were employed to ascertain the taxonomic placement of T. takaii. Results from the phylogenetic analyses based on 16S rRNA gene and core genome sequences confirmed the exclusion of T. takaii from the phylum Aquificota and further revealed a phylum-level lineage for T. takaii. The analysis of conserved signature indels (CSIs) specific for the phylum Aquificota also supported the exclusion of T. takaii from the phylum. Pan-genome analysis of T. takaii along with the members of the closely related clade from the phylum Thermodesulfobacteriota revealed that T. takaii was indeed distinct, supporting its phylum-level placement. Furthermore, the presence of CSIs specific to T. takaii, and the results from the average nucleotide identity and average amino acid identity analyses, together with the unique characteristic of T. takaii also provided evidence supporting its assignment to a novel phylum. Based on these results, T. takaii is proposed to be transferred to a novel family, Thermosulfidibacteraceae fam. nov., of a novel order, Thermosulfidibacterales ord. nov., and a novel class, Thermosulfidibacteria classis nov., within a novel phylum Thermosulfidibacterota phyl. nov.
SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s12088-024-01214-9.}, }
@article {pmid38648121, year = {2024}, author = {Rodenburg, SYA and de Ridder, D and Govers, F and Seidl, MF}, title = {Oomycete Metabolism Is Highly Dynamic and Reflects Lifestyle Adaptations.}, journal = {Molecular plant-microbe interactions : MPMI}, volume = {}, number = {}, pages = {MPMI12230200R}, doi = {10.1094/MPMI-12-23-0200-R}, pmid = {38648121}, issn = {0894-0282}, abstract = {The selective pressure of pathogen-host symbiosis drives adaptations. How these interactions shape the metabolism of pathogens is largely unknown. Here, we use comparative genomics to systematically analyze the metabolic networks of oomycetes, a diverse group of eukaryotes that includes saprotrophs as well as animal and plant pathogens, with the latter causing devastating diseases with significant economic and/or ecological impacts. In our analyses of 44 oomycete species, we uncover considerable variation in metabolism that can be linked to lifestyle differences. Comparisons of metabolic gene content reveal that plant pathogenic oomycetes have a bipartite metabolism consisting of a conserved core and an accessory set. The accessory set can be associated with the degradation of defense compounds produced by plants when challenged by pathogens. Obligate biotrophic oomycetes have smaller metabolic networks, and taxonomically distantly related biotrophic lineages display convergent evolution by repeated gene losses in both the conserved as well as the accessory set of metabolisms. When investigating to what extent the metabolic networks in obligate biotrophs differ from those in hemibiotrophic plant pathogens, we observe that the losses of metabolic enzymes in obligate biotrophs are not random and that gene losses predominantly influence the terminal branches of the metabolic networks. Our analyses represent the first metabolism-focused comparison of oomycetes at this scale and will contribute to a better understanding of the evolution of oomycete metabolism in relation to lifestyle adaptation. Numerous oomycete species are devastating plant pathogens that cause major damage in crops and natural ecosystems. Their interactions with hosts are shaped by strong selection, but how selection affects adaptation of the primary metabolism to a pathogenic lifestyle is not yet well established. By pan-genome and metabolic network analyses of distantly related oomycete pathogens and their nonpathogenic relatives, we reveal considerable lifestyle- and lineage-specific adaptations. This study contributes to a better understanding of metabolic adaptations in pathogenic oomycetes in relation to lifestyle, host, and environment, and the findings will help in pinpointing potential targets for disease control. [Formula: see text] Copyright © 2024 The Author(s). This is an open access article distributed under the CC BY-NC-ND 4.0 International license.}, }
@article {pmid39010229, year = {2024}, author = {Chang, T and Gavelis, GS and Brown, JM and Stepanauskas, R}, title = {Genomic representativeness and chimerism in large collections of SAGs and MAGs of marine prokaryoplankton.}, journal = {Microbiome}, volume = {12}, number = {1}, pages = {126}, pmid = {39010229}, issn = {2049-2618}, support = {827839//Simons Foundation/ ; 827839//Simons Foundation/ ; 827839//Simons Foundation/ ; 827839//Simons Foundation/ ; OIA-1826734//National Science Foundation/ ; OIA-1826734//National Science Foundation/ ; OIA-1826734//National Science Foundation/ ; OIA-1826734//National Science Foundation/ ; }, abstract = {BACKGROUND: Single amplified genomes (SAGs) and metagenome-assembled genomes (MAGs) are the predominant sources of information about the coding potential of uncultured microbial lineages, but their strengths and limitations remain poorly understood. Here, we performed a direct comparison of two previously published collections of thousands of SAGs and MAGs obtained from the same, global environment.
RESULTS: We found that SAGs were less prone to chimerism and more accurately reflected the relative abundance and the pangenome content of microbial lineages inhabiting the epipelagic of the tropical and subtropical ocean, as compared to MAGs. SAGs were also better suited to link genome information with taxa discovered through 16S rRNA amplicon analyses. Meanwhile, MAGs had the advantage of more readily recovering genomes of rare lineages.
CONCLUSIONS: Our analyses revealed the relative strengths and weaknesses of the two most commonly used genome recovery approaches in environmental microbiology. These considerations, as well as the need for better tools for genome quality assessment, should be taken into account when designing studies and interpreting data that involve SAGs or MAGs. Video Abstract.}, }
@article {pmid39007295, year = {2024}, author = {Bosi, E and Taviani, E and Avesani, A and Doni, L and Auguste, M and Oliveri, C and Leonessi, M and Martinez-Urtaza, J and Vetriani, C and Vezzulli, L}, title = {Pan-Genome Provides Insights into Vibrio Evolution and Adaptation to Deep-Sea Hydrothermal Vents.}, journal = {Genome biology and evolution}, volume = {16}, number = {7}, pages = {}, doi = {10.1093/gbe/evae131}, pmid = {39007295}, issn = {1759-6653}, support = {CN00000033//"National Biodiversity Future Center-NBFC/ ; 201728ZA49_002//PRIN 2017 "Emergence of virulence/ ; 2021 SGR 00526//Ministerio de Ciencia e Innovación/ ; //Generalitat de Catalunya/ ; //National Science Foundation/ ; 19-48623//OCE/ ; 19-51690//IOS/ ; 80NSSC21K0485/NASA/NASA/United States ; }, mesh = {*Vibrio/genetics ; *Hydrothermal Vents/microbiology ; *Genome, Bacterial ; *Phylogeny ; Evolution, Molecular ; Adaptation, Physiological/genetics ; Pacific Ocean ; }, abstract = {This study delves into the genomic features of 10 Vibrio strains collected from deep-sea hydrothermal vents in the Pacific Ocean, providing insights into their evolutionary history and ecological adaptations. Through sequencing and pan-genome analysis involving 141 Vibrio species, we found that deep-sea strains exhibit larger genomes with unique gene distributions, suggesting adaptation to the vent environment. The phylogenomic reconstruction of the investigated isolates revealed the presence of 2 main clades: The first is monophyletic, consisting exclusively of Vibrio alginolyticus, while the second forms a monophyletic clade comprising both Vibrio antiquarius and Vibrio diabolicus species, which were previously isolated from deep-sea vents. All strains carry virulence and antibiotic resistance genes related to those found in human pathogenic Vibrio species which may play a wider ecological role other than host infection in these environments. In addition, functional genomic analysis identified genes potentially related to deep-sea survival and stress response, alongside candidate genes encoding for novel antimicrobial agents. Ultimately, the pan-genome we generated represents a valuable resource for future studies investigating the taxonomy, evolution, and ecology of Vibrio species.}, }
@article {pmid39004696, year = {2024}, author = {Seru, LV and Forde, TL and Roberto-Charron, A and Mavrot, F and Niu, YD and Kutz, SJ}, title = {Genomic characterization and virulence gene profiling of Erysipelothrix rhusiopathiae isolated from widespread muskox mortalities in the Canadian Arctic Archipelago.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {691}, pmid = {39004696}, issn = {1471-2164}, support = {NST-2122-0049//Polar Knowledge Canada/ ; Canada Research Chair in Arctic One Health (CRC-2020-00315)//Natural Sciences and Engineering Research Council of Canada/ ; }, mesh = {Arctic Regions ; *Erysipelothrix/genetics/pathogenicity/isolation & purification ; Canada ; Animals ; Virulence/genetics ; Genomics ; Genome, Bacterial ; Phylogeny ; Erysipelothrix Infections/microbiology ; Virulence Factors/genetics ; Genome-Wide Association Study ; Genomic Islands ; }, abstract = {BACKGROUND: Muskoxen are important ecosystem components and provide food, economic opportunities, and cultural well-being for Indigenous communities in the Canadian Arctic. Between 2010 and 2021, Erysipelothrix rhusiopathiae was isolated from carcasses of muskoxen, caribou, a seal, and an Arctic fox during multiple large scale mortality events in the Canadian Arctic Archipelago. A single strain ('Arctic clone') of E. rhusiopathiae was associated with the mortalities on Banks, Victoria and Prince Patrick Islands, Northwest Territories and Nunavut, Canada (2010-2017). The objectives of this study were to (i) characterize the genomes of E. rhusiopathiae isolates obtained from more recent muskox mortalities in the Canadian Arctic in 2019 and 2021; (ii) identify and compare common virulence traits associated with the core genome and mobile genetic elements (i.e. pathogenicity islands and prophages) among Arctic clone versus other E. rhusiopathiae genomes; and iii) use pan-genome wide association studies (GWAS) to determine unique genetic contents of the Arctic clone that may encode virulence traits and that could be used for diagnostic purposes.
RESULTS: Phylogenetic analyses revealed that the newly sequenced E. rhusiopathiae isolates from Ellesmere Island, Nunavut (2021) also belong to the Arctic clone. Of 17 virulence genes analysed among 28 Arctic clone isolates, four genes - adhesin, rhusiopathiae surface protein-A (rspA), choline binding protein-B (cbpB) and CDP-glycerol glycerophosphotransferase (tagF) - had amino acid sequence variants unique to this clone when compared to 31 other E. rhusiopathiae genomes. These genes encode proteins that facilitate E. rhusiopathiae to attach to the host endothelial cells and form biofilms. GWAS analyses using Scoary found several unique genes to be overrepresented in the Arctic clone.
CONCLUSIONS: The Arctic clone of E. rhusiopathiae was associated with multiple muskox mortalities spanning over a decade and multiple Arctic islands with distances over 1000 km, highlighting the extent of its spatiotemporal spread. This clone possesses unique gene content, as well as amino acid variants in multiple virulence genes that are distinct from the other closely related E. rhusiopathiae isolates. This study establishes an essential foundation on which to investigate whether these differences are correlated with the apparent virulence of this specific clone through in vitro and in vivo studies.}, }
@article {pmid39003966, year = {2024}, author = {Biswas, R and Swetha, RG and Basu, S and Roy, A and Ramaiah, S and Anbarasu, A}, title = {Designing multi-epitope vaccine against human cytomegalovirus integrating pan-genome and reverse vaccinology pipelines.}, journal = {Biologicals : journal of the International Association of Biological Standardization}, volume = {87}, number = {}, pages = {101782}, doi = {10.1016/j.biologicals.2024.101782}, pmid = {39003966}, issn = {1095-8320}, abstract = {Human cytomegalovirus (HCMV) is accountable for high morbidity in neonates and immunosuppressed individuals. Due to the high genetic variability of HCMV, current prophylactic measures are insufficient. In this study, we employed a pan-genome and reverse vaccinology approach to screen the target for efficient vaccine candidates. Four proteins, envelope glycoprotein M, UL41A, US23, and US28, were shortlisted based on cellular localization, high solubility, antigenicity, and immunogenicity. A total of 29 B-cell and 44 T-cell highly immunogenic and antigenic epitopes with high global population coverage were finalized using immunoinformatics tools and algorithms. Further, the epitopes that were overlapping among the finalized B-cell and T-cell epitopes were linked with suitable linkers to form various combinations of multi-epitopic vaccine constructs. Among 16 vaccine constructs, Vc12 was selected based on physicochemical and structural properties. The docking and molecular simulations of VC12 were performed, which showed its high binding affinity (-23.35 kcal/mol) towards TLR4 due to intermolecular hydrogen bonds, salt bridges, and hydrophobic interactions, and there were only minimal fluctuations. Furthermore, Vc12 eliciting a good response was checked for its expression in Escherichia coli through in silico cloning and codon optimization, suggesting it to be a potent vaccine candidate.}, }
@article {pmid39003441, year = {2024}, author = {Egor, G and Artem, K and Maksim, B and Gaukhar, Z and Ekaterina, K and Vsevolod, M and Evgeny, K}, title = {Enhancing SNV identification in whole-genome sequencing data through the incorporation of known genetic variants into the minimap2 index.}, journal = {BMC bioinformatics}, volume = {25}, number = {1}, pages = {238}, pmid = {39003441}, issn = {1471-2105}, support = {075-15-2022-294//Ministry of Science and Higher Education of the Russian Federation/ ; 075-15-2022-294//Ministry of Science and Higher Education of the Russian Federation/ ; 075-15-2021-595//Megagrant from the Government of the Russian Federation/ ; }, mesh = {Humans ; *Whole Genome Sequencing/methods ; *Genome, Human ; *Genetic Variation/genetics ; High-Throughput Nucleotide Sequencing/methods ; Polymorphism, Single Nucleotide/genetics ; Sequence Alignment/methods ; Software ; Algorithms ; Genome-Wide Association Study/methods ; }, abstract = {MOTIVATION: Alignment of reads to a reference genome sequence is one of the key steps in the analysis of human whole-genome sequencing data obtained through Next-generation sequencing (NGS) technologies. The quality of the subsequent steps of the analysis, such as the results of clinical interpretation of genetic variants or the results of a genome-wide association study, depends on the correct identification of the position of the read as a result of its alignment. The amount of human NGS whole-genome sequencing data is constantly growing. There are a number of human genome sequencing projects worldwide that have resulted in the creation of large-scale databases of genetic variants of sequenced human genomes. Such information about known genetic variants can be used to improve the quality of alignment at the read alignment stage when analysing sequencing data obtained for a new individual, for example, by creating a genomic graph. While existing methods for aligning reads to a linear reference genome have high alignment speed, methods for aligning reads to a genomic graph have greater accuracy in variable regions of the genome. The development of a read alignment method that takes into account known genetic variants in the linear reference sequence index allows combining the advantages of both sets of methods.
RESULTS: In this paper, we present the minimap2_index_modifier tool, which enables the construction of a modified index of a reference genome using known single nucleotide variants and insertions/deletions (indels) specific to a given human population. The use of the modified minimap2 index improves variant calling quality without modifying the bioinformatics pipeline and without significant additional computational overhead. Using the PrecisionFDA Truth Challenge V2 benchmark data (for HG002 short-read data aligned to the GRCh38 linear reference (GCA_000001405.15) with parameters k = 27 and w = 14) it was demonstrated that the number of false negative genetic variants decreased by more than 9500, and the number of false positives decreased by more than 7000 when modifying the index with genetic variants from the Human Pangenome Reference Consortium.}, }
@article {pmid39003298, year = {2024}, author = {Yang, X and Luo, S and Yang, S and Duoji, C and Wang, Q and Chen, Z and Yang, D and Yang, T and Wan, X and Yang, Y and Liu, T and Yang, Y}, title = {Chromosome-level genome assembly of Hippophae rhamnoides variety.}, journal = {Scientific data}, volume = {11}, number = {1}, pages = {776}, pmid = {39003298}, issn = {2052-4463}, mesh = {*Hippophae/genetics ; *Genome, Plant ; *Chromosomes, Plant/genetics ; Transcriptome ; Molecular Sequence Annotation ; }, abstract = {Fructus hippophae (Hippophae rhamnoides spp. mongolica×Hippophae rhamnoides sinensis), a hybrid variety of sea buckthorn that Hippophae rhamnoides spp. mongolica serves as the female parent and Hippophae rhamnoides sinensis serves as the male parent, is a traditional plant with great potentials of economic and medical values. Herein, we gained a chromosome-level genome of Fructus hippophae about 918.59 Mb, with the scaffolds N50 reaching 83.65 Mb. Then, we anchored 440 contigs with 97.17% of the total genome sequences onto 12 pseudochromosomes. Next, de-novo, homology and transcriptome assembly strategies were adopted for gene structure prediction. This predicted 36475 protein-coding genes, of which 36226 genes could be functionally annotated. Simultaneously, various strategies were used for quality assessment, both the complete BUSCO value (98.80%) and the mapping rate indicated the high assembly quality. Repetitive elements, which occupied 63.68% of the genome, and 1483600 bp of non-coding RNA were annotated. Here, we provide genomic information on female plants of a popular variety, which can provide data for pan-genomic construction of sea buckthorn and for the resolution of the mechanism of sex differentiation.}, }
@article {pmid39000250, year = {2024}, author = {Gao, Z and Lu, Y and Chong, Y and Li, M and Hong, J and Wu, J and Wu, D and Xi, D and Deng, W}, title = {Beef Cattle Genome Project: Advances in Genome Sequencing, Assembly, and Functional Genes Discovery.}, journal = {International journal of molecular sciences}, volume = {25}, number = {13}, pages = {}, doi = {10.3390/ijms25137147}, pmid = {39000250}, issn = {1422-0067}, support = {202202AE090005//Major Science and Technology Projects in Yunnan Province/ ; 202401BD070001-008//Yunnan Provincial Agricultural Union Foundation/ ; XDYC-CYCX-2022-0029//"Xingdian Talent" Industry Innovation Talent Program in Yunnan Province/ ; }, mesh = {Animals ; Cattle/genetics ; *Genome ; Genomics/methods ; Breeding/methods ; Whole Genome Sequencing/methods ; Red Meat ; Quantitative Trait Loci ; }, abstract = {Beef is a major global source of protein, playing an essential role in the human diet. The worldwide production and consumption of beef continue to rise, reflecting a significant trend. However, despite the critical importance of beef cattle resources in agriculture, the diversity of cattle breeds faces severe challenges, with many breeds at risk of extinction. The initiation of the Beef Cattle Genome Project is crucial. By constructing a high-precision functional annotation map of their genome, it becomes possible to analyze the genetic mechanisms underlying important traits in beef cattle, laying a solid foundation for breeding more efficient and productive cattle breeds. This review details advances in genome sequencing and assembly technologies, iterative upgrades of the beef cattle reference genome, and its application in pan-genome research. Additionally, it summarizes relevant studies on the discovery of functional genes associated with key traits in beef cattle, such as growth, meat quality, reproduction, polled traits, disease resistance, and environmental adaptability. Finally, the review explores the potential of telomere-to-telomere (T2T) genome assembly, structural variations (SVs), and multi-omics techniques in future beef cattle genetic breeding. These advancements collectively offer promising avenues for enhancing beef cattle breeding and improving genetic traits.}, }
@article {pmid38999668, year = {2024}, author = {Huang, A and Feng, S and Ye, Z and Zhang, T and Chen, S and Chen, C and Chen, S}, title = {Genome Assembly and Structural Variation Analysis of Luffa acutangula Provide Insights on Flowering Time and Ridge Development.}, journal = {Plants (Basel, Switzerland)}, volume = {13}, number = {13}, pages = {}, doi = {10.3390/plants13131828}, pmid = {38999668}, issn = {2223-7747}, abstract = {Luffa spp. is an important worldwide cultivated vegetable and medicinal plant from the Cucurbitaceae family. In this study, we report a high-quality chromosome-level genome of the high-generation inbred line SG261 of Luffa acutangula. The genomic sequence was determined by PacBio long reads, Hi-C sequencing reads, and 10× Genomics sequencing, with an assembly size of 739.82 Mb, contig N50 of 18.38 Mb, and scaffold N50 of 56.08 Mb. The genome of L. acutangula SG261 was predicted to contain 27,312 protein-coding genes and 72.56% repetitive sequences, of which long terminal repeats (LTRs) were an important form of repetitive sequences, accounting for 67.84% of the genome. Phylogenetic analysis reveals that L. acutangula evolved later than Luffa cylindrica, and Luffa is closely related to Momodica charantia. Comparing the genome of L. acutangula SG261 and L. cylindrica with PacBio data, 67,128 high-quality structural variations (SVs) and 55,978 presence-absence variations (PAVs) were identified in SG261, resulting in 2424 and 1094 genes with variation in the CDS region, respectively, and there are 287 identical genes affected by two different structural variation analyses. In addition, we found that the transcription factor FY (FLOWERING LOCUS Y) families had a large expansion in L. acutangula SG261 (flowering in the morning) compared to L. cylindrica (flowering in the afternoon), which may result in the early flowering time in L. acutangula SG261. This study provides valuable reference for the breeding of and pan-genome research into Luffa species.}, }
@article {pmid38996470, year = {2024}, author = {Miga, KH}, title = {From complete genomes to pangenomes.}, journal = {American journal of human genetics}, volume = {111}, number = {7}, pages = {1265-1268}, doi = {10.1016/j.ajhg.2024.05.012}, pmid = {38996470}, issn = {1537-6605}, mesh = {Humans ; *Genome, Human ; *Genomics/methods ; Human Genetics/history ; }, abstract = {Highlighting the Distinguished Speakers Symposium on "The Future of Human Genetics and Genomics," this collection of articles is based on presentations at the ASHG 2023 Annual Meeting in Washington, DC, in celebration of all our field has accomplished in the past 75 years, since the founding of ASHG in 1948.}, }
@article {pmid38995500, year = {2024}, author = {Barcia-Cruz, R and Balboa, S and Lema, A and Romalde, JL}, title = {Comparative genomics of Vibrio toranzoniae strains.}, journal = {International microbiology : the official journal of the Spanish Society for Microbiology}, volume = {}, number = {}, pages = {}, pmid = {38995500}, issn = {1618-1905}, support = {AGL2016-77539-R//Agencia Estatal de Investigación/ ; ED431C 2022/23//Consellería de Cultura, Educación e Ordenación Universitaria, Xunta de Galicia/ ; }, abstract = {Vibrio toranzoniae is a marine bacterium belonging to the Splendidus clade that was originally isolated from healthy clams in Galicia (NW Spain). Its isolation from different hosts and seawater indicated two lifestyles and wide geographical distribution. The aim of the present study was to determine the differences at the genomic level among six strains (4 isolated from clam and 2 from seawater) and to determine their phylogeny. For this purpose, whole genomes of the six strains were sequenced by different technologies including Illumina and PacBio, and the resulting sequences were corrected. Genomes were annotated and compared using different online tools. Furthermore, the study of core- and pan-genomes were examined, and the phylogeny was inferred. The content of the core genome ranged from 2953 to 2766 genes and that of the pangenome ranged from 6278 to 6132, depending on the tool used. Although the strains shared certain homology, with DDH values ranging from 77.10 to 82.30 and values of OrthoANI values higher than 97%, some differences were found related to motility, capsule synthesis, iron acquisition systems or mobile genetic elements. Phylogenetic analysis of the core genome did not reveal a differentiation of the strains according to their lifestyle (commensal or free-living), but that of the pangenome indicated certain geographical isolation in the same growing area. This study led to the reclassification of some isolates formerly described as V. toranzoniae and demonstrated the importance of cured deposited sequences to proper phylogenetic assignment.}, }
@article {pmid38995188, year = {2024}, author = {Li, XY and Fang, XM and Jia, HT and Bai, JL and Su, J and Zhang, YQ and Yu, LY}, title = {Noviherbaspirillum album sp. nov., an airborne bacteria isolated from an urban area of Beijing, China.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {74}, number = {7}, pages = {}, doi = {10.1099/ijsem.0.006450}, pmid = {38995188}, issn = {1466-5034}, mesh = {*Phylogeny ; *RNA, Ribosomal, 16S/genetics ; Beijing ; *Base Composition ; *DNA, Bacterial/genetics ; *Fatty Acids/analysis ; *Ubiquinone ; *Bacterial Typing Techniques ; *Air Microbiology ; *Nucleic Acid Hybridization ; *Sequence Analysis, DNA ; *Phospholipids/analysis ; }, abstract = {A Gram-negative, ellipsoidal to short-rod-shaped, motile bacterium was isolated from Beijing's urban air. The isolate exhibited the closest kinship with Noviherbaspirillum aerium 122213-3[T], exhibiting 98.4 % 16S rRNA gene sequence similarity. Phylogenetic analyses based on 16S rRNA gene sequences and genomes showed that it clustered closely with N. aerium 122213-3[T], thus forming a distinct phylogenetic lineage within the genus Noviherbaspirillum. The average nucleotide identity and digital DNA-DNA hybridization values between strain I16B-00201[T] and N. aerium 122213-3[T] were 84.6 and 29.4 %, respectively. The respiratory ubiquinone was ubiquinone 8. The major fatty acids (>10 %) were summed feature 3 (C16:1ω6c/C16:1ω7c, 43.3 %), summed feature 8 (C18:1ω7c/C18:1ω6c, 15.9 %) and C12:0 (11.0 %). The polyamine profile showed putrescine as the predominant compound. The polar lipid profile consisted of diphosphatidylglycerol, phosphatidylglycerol, phosphatidylethanolamine, phosphatidylcholine, unknown lipids and unknown phosphatidylaminolipids. The phenotypic, phylogenetic and chemotaxonomic results consistently supported that strain I16B-00201[T] represented a novel species of the genus Noviherbaspirillum, for which the name Noviherbaspirillum album sp. nov. is proposed, with I16B-00201[T] (=CPCC 100848[T]=KCTC 52095[T]) designated as the type strain. Its DNA G+C content is 59.4 mol%. Pan-genome analysis indicated that some Noviherbaspirillum species possess diverse nitrogen and aromatic compound metabolism pathways, suggesting their potential value in pollutant treatment.}, }
@article {pmid38993487, year = {2024}, author = {Schüler, MA and Riedel, T and Overmann, J and Daniel, R and Poehlein, A}, title = {Comparative genome analyses of clinical and non-clinical Clostridioides difficile strains.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1404491}, doi = {10.3389/fmicb.2024.1404491}, pmid = {38993487}, issn = {1664-302X}, abstract = {The pathogenic bacterium Clostridioides difficile is a worldwide health burden with increasing morbidity, mortality and antibiotic resistances. Therefore, extensive research efforts are made to unravel its virulence and dissemination. One crucial aspect for C. difficile is its mobilome, which for instance allows the spread of antibiotic resistance genes (ARG) or influence strain virulence. As a nosocomial pathogen, the majority of strains analyzed originated from clinical environments and infected individuals. Nevertheless, C. difficile can also be present in human intestines without disease development or occur in diverse environmental habitats such as puddle water and soil, from which several strains could already be isolated. We therefore performed comprehensive genome comparisons of closely related clinical and non-clinical strains to identify the effects of the clinical background. Analyses included the prediction of virulence factors, ARGs, mobile genetic elements (MGEs), and detailed examinations of the pan genome. Clinical-related trends were thereby observed. While no significant differences were identified in fundamental C. difficile virulence factors, the clinical strains carried more ARGs and MGEs, and possessed a larger accessory genome. Detailed inspection of accessory genes revealed higher abundance of genes with unknown function, transcription-associated, or recombination-related activity. Accessory genes of these functions were already highlighted in other studies in association with higher strain virulence. This specific trend might allow the strains to react more efficiently on changing environmental conditions in the human host such as emerging stress factors, and potentially increase strain survival, colonization, and strain virulence. These findings indicated an adaptation of the strains to the clinical environment. Further, implementation of the analysis results in pairwise genome comparisons revealed that the majority of these accessory genes were encoded on predicted MGEs, shedding further light on the mobile genome of C. difficile. We therefore encourage the inclusion of non-clinical strains in comparative analyses.}, }
@article {pmid38990940, year = {2024}, author = {Zomer, A and Ingham, CJ and von Meijenfeldt, FAB and Escobar Doncel, Á and van de Kerkhof, GT and Hamidjaja, R and Schouten, S and Schertel, L and Müller, KH and Catón, L and Hahnke, RL and Bolhuis, H and Vignolini, S and Dutilh, BE}, title = {Structural color in the bacterial domain: The ecogenomics of a 2-dimensional optical phenotype.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {121}, number = {29}, pages = {e2309757121}, doi = {10.1073/pnas.2309757121}, pmid = {38990940}, issn = {1091-6490}, support = {40-43500-98-4102/435004516//ZonMw (Netherlands Organisation for Health Research and Development)/ ; 860125//EC | HORIZON EUROPE Framework Programme (Horizon Europe)/ ; 2110570//UKRI | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; 722842//EC | HORIZON EUROPE Framework Programme (Horizon Europe)/ ; P2ZHP2_183998/SNSF_/Swiss National Science Foundation/Switzerland ; SNSF3//Isaac Newton Trust/ ; SNSF 40B1-0_198708/SNSF_/Swiss National Science Foundation/Switzerland ; 865694//EC | European Research Council (ERC)/ ; 101001637//EC | European Research Council (ERC)/ ; BB/V00364X/1//UKRI | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; 390713860//Deutsche Forschungsgemeinschaft (DFG)/ ; }, mesh = {*Genome, Bacterial ; Phenotype ; Color ; Bacteria/genetics/metabolism ; Proteobacteria/genetics/metabolism ; Phylogeny ; Metagenome ; Genome-Wide Association Study ; Bacteroidetes/genetics/metabolism ; }, abstract = {Structural color is an optical phenomenon resulting from light interacting with nanostructured materials. Although structural color (SC) is widespread in the tree of life, the underlying genetics and genomics are not well understood. Here, we collected and sequenced a set of 87 structurally colored bacterial isolates and 30 related strains lacking SC. Optical analysis of colonies indicated that diverse bacteria from at least two different phyla (Bacteroidetes and Proteobacteria) can create two-dimensional packing of cells capable of producing SC. A pan-genome-wide association approach was used to identify genes associated with SC. The biosynthesis of uroporphyrin and pterins, as well as carbohydrate utilization and metabolism, was found to be involved. Using this information, we constructed a classifier to predict SC directly from bacterial genome sequences and validated it by cultivating and scoring 100 strains that were not part of the training set. We predicted that SCr is widely distributed within gram-negative bacteria. Analysis of over 13,000 assembled metagenomes suggested that SC is nearly absent from most habitats associated with multicellular organisms except macroalgae and is abundant in marine waters and surface/air interfaces. This work provides a large-scale ecogenomics view of SC in bacteria and identifies microbial pathways and evolutionary relationships that underlie this optical phenomenon.}, }
@article {pmid38990800, year = {2024}, author = {Perrier, M and Barber, AE}, title = {Unraveling the genomic diversity and virulence of human fungal pathogens through pangenomics.}, journal = {PLoS pathogens}, volume = {20}, number = {7}, pages = {e1012313}, doi = {10.1371/journal.ppat.1012313}, pmid = {38990800}, issn = {1553-7374}, mesh = {Humans ; Virulence/genetics ; *Genomics/methods ; *Fungi/genetics/pathogenicity ; *Genome, Fungal ; *Genetic Variation ; *Mycoses/microbiology/genetics ; }, }
@article {pmid38987589, year = {2024}, author = {Seersholm, FV and Sjögren, KG and Koelman, J and Blank, M and Svensson, EM and Staring, J and Fraser, M and Pinotti, T and McColl, H and Gaunitz, C and Ruiz-Bedoya, T and Granehäll, L and Villegas-Ramirez, B and Fischer, A and Price, TD and Allentoft, ME and Iversen, AKN and Axelsson, T and Ahlström, T and Götherström, A and Storå, J and Kristiansen, K and Willerslev, E and Jakobsson, M and Malmström, H and Sikora, M}, title = {Repeated plague infections across six generations of Neolithic Farmers.}, journal = {Nature}, volume = {}, number = {}, pages = {}, pmid = {38987589}, issn = {1476-4687}, abstract = {In the period between 5,300 and 4,900 calibrated years before present (cal. BP), populations across large parts of Europe underwent a period of demographic decline[1,2]. However, the cause of this so-called Neolithic decline is still debated. Some argue for an agricultural crisis resulting in the decline[3], others for the spread of an early form of plague[4]. Here we use population-scale ancient genomics to infer ancestry, social structure and pathogen infection in 108 Scandinavian Neolithic individuals from eight megalithic graves and a stone cist. We find that the Neolithic plague was widespread, detected in at least 17% of the sampled population and across large geographical distances. We demonstrate that the disease spread within the Neolithic community in three distinct infection events within a period of around 120 years. Variant graph-based pan-genomics shows that the Neolithic plague genomes retained ancestral genomic variation present in Yersinia pseudotuberculosis, including virulence factors associated with disease outcomes. In addition, we reconstruct four multigeneration pedigrees, the largest of which consists of 38 individuals spanning six generations, showing a patrilineal social organization. Lastly, we document direct genomic evidence for Neolithic female exogamy in a woman buried in a different megalithic tomb than her brothers. Taken together, our findings provide a detailed reconstruction of plague spread within a large patrilineal kinship group and identify multiple plague infections in a population dated to the beginning of the Neolithic decline.}, }
@article {pmid38987396, year = {2024}, author = {Khan, A and Tian, R and Bean, SR and Yerka, M and Jiao, Y}, title = {Transcriptome and metabolome analyses reveal regulatory networks associated with nutrition synthesis in sorghum seeds.}, journal = {Communications biology}, volume = {7}, number = {1}, pages = {841}, pmid = {38987396}, issn = {2399-3642}, mesh = {*Sorghum/genetics/metabolism ; *Seeds/metabolism/genetics/growth & development ; *Metabolome ; *Transcriptome ; *Gene Expression Regulation, Plant ; Gene Regulatory Networks ; Gene Expression Profiling ; Endosperm/metabolism/genetics ; Starch/biosynthesis/metabolism ; Edible Grain/genetics/metabolism ; }, abstract = {Cereal seeds are vital for food, feed, and agricultural sustainability because they store and provide essential nutrients to human and animal food and feed systems. Unraveling molecular processes in seed development is crucial for enhancing cereal grain yield and quality. We analyze spatiotemporal transcriptome and metabolome profiles during sorghum seed development in the inbred line 'BTx623'. Morphological and molecular analyses identify the key stages of seed maturation, specifying starch biosynthesis onset at 5 days post-anthesis (dpa) and protein at 10 dpa. Transcriptome profiling from 1 to 25 dpa reveal dynamic gene expression pathways, shifting from cellular growth and embryo development (1-5 dpa) to cell division, fatty acid biosynthesis (5-25 dpa), and seed storage compounds synthesis in the endosperm (5-25 dpa). Network analysis identifies 361 and 207 hub genes linked to starch and protein synthesis in the endosperm, respectively, which will help breeders enhance sorghum grain quality. The availability of this data in the sorghum reference genome line establishes a baseline for future studies as new pangenomes emerge, which will consider copy number and presence-absence variation in functional food traits.}, }
@article {pmid38977857, year = {2024}, author = {Zhang, Y and Zhao, M and Tan, J and Huang, M and Chu, X and Li, Y and Han, X and Fang, T and Tian, Y and Jarret, R and Lu, D and Chen, Y and Xue, L and Li, X and Qin, G and Li, B and Sun, Y and Deng, XW and Deng, Y and Zhang, X and He, H}, title = {Telomere-to-telomere Citrullus super-pangenome provides direction for watermelon breeding.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {38977857}, issn = {1546-1718}, abstract = {To decipher the genetic diversity within the cucurbit genus Citrullus, we generated telomere-to-telomere (T2T) assemblies of 27 distinct genotypes, encompassing all seven Citrullus species. This T2T super-pangenome has expanded the previously published reference genome, T2T-G42, by adding 399.2 Mb and 11,225 genes. Comparative analysis has unveiled gene variants and structural variations (SVs), shedding light on watermelon evolution and domestication processes that enhanced attributes such as bitterness and sugar content while compromising disease resistance. Multidisease-resistant loci from Citrullus amarus and Citrullus mucosospermus were successfully introduced into cultivated Citrullus lanatus. The SVs identified in C. lanatus have not only been inherited from cordophanus but also from C. mucosospermus, suggesting additional ancestors beyond cordophanus in the lineage of cultivated watermelon. Our investigation substantially improves the comprehension of watermelon genome diversity, furnishing comprehensive reference genomes for all Citrullus species. This advancement aids in the exploration and genetic enhancement of watermelon using its wild relatives.}, }
@article {pmid38977308, year = {2024}, author = {Vakirlis, N and Kupczok, A}, title = {Large-scale investigation of species-specific orphan genes in the human gut microbiome elucidates their evolutionary origins.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.278977.124}, pmid = {38977308}, issn = {1549-5469}, abstract = {Species-specific genes, also known as orphans, are ubiquitous across life's domains. In prokaryotes, species-specific orphan genes (SSOGs) are mostly thought to originate in external elements such as viruses followed by horizontal gene transfer, whereas the scenario of native origination, through rapid divergence or de novo, is mostly dismissed. However, quantitative evidence supporting either scenario is lacking. Here, we systematically analyzed genomes from 4644 human gut microbiome species and identified more than 600,000 unique SSOGs, representing an average of 2.6% of a given species' pangenome. These sequences are mostly rare within each species yet show signs of purifying selection. Overall, SSOGs use optimal codons less frequently, and their proteins are more disordered than those of conserved genes (i.e., non-SSOGs). Importantly, across species, the GC content of SSOGs closely matches that of conserved ones. In contrast, the ∼5% of SSOGs that share similarity to known viral sequences have distinct characteristics, including lower GC content. Thus, SSOGs with similarity to viruses differ from the remaining SSOGs, contrasting an external origination scenario for most of them. By examining the orthologous genomic region in closely related species, we show that a small subset of SSOGs likely evolved natively de novo and find that these genes also differ in their properties from the remaining SSOGs. Our results challenge the notion that external elements are the dominant source of prokaryotic genetic novelty and will enable future studies into the biological role and relevance of species-specific genes in the human gut.}, }
@article {pmid38974320, year = {2024}, author = {Lee, J and Hunter, B and Shim, H}, title = {A pangenome analysis of ESKAPE bacteriophages: the underrepresentation may impact machine learning models.}, journal = {Frontiers in molecular biosciences}, volume = {11}, number = {}, pages = {1395450}, pmid = {38974320}, issn = {2296-889X}, abstract = {Bacteriophages are the most prevalent biological entities in the biosphere. However, limitations in both medical relevance and sequencing technologies have led to a systematic underestimation of the genetic diversity within phages. This underrepresentation not only creates a significant gap in our understanding of phage roles across diverse biosystems but also introduces biases in computational models reliant on these data for training and testing. In this study, we focused on publicly available genomes of bacteriophages infecting high-priority ESKAPE pathogens to show the extent and impact of this underrepresentation. First, we demonstrate a stark underrepresentation of ESKAPE phage genomes within the public genome and protein databases. Next, a pangenome analysis of these ESKAPE phages reveals extensive sharing of core genes among phages infecting the same host. Furthermore, genome analyses and clustering highlight close nucleotide-level relationships among the ESKAPE phages, raising concerns about the limited diversity within current public databases. Lastly, we uncover a scarcity of unique lytic phages and phage proteins with antimicrobial activities against ESKAPE pathogens. This comprehensive analysis of the ESKAPE phages underscores the severity of underrepresentation and its potential implications. This lack of diversity in phage genomes may restrict the resurgence of phage therapy and cause biased outcomes in data-driven computational models due to incomplete and unbalanced biological datasets.}, }
@article {pmid38967823, year = {2024}, author = {Lu, YT and Wu, YY and Li, YN and Zheng, WY and Liu, WZ}, title = {Saccharopolyspora mangrovi sp. nov., a novel mangrove soil actinobacterium with distinct metabolic potential revealed by comparative genomic analysis.}, journal = {Archives of microbiology}, volume = {206}, number = {8}, pages = {342}, pmid = {38967823}, issn = {1432-072X}, support = {32202121//National Natural Science Foundation of China/ ; }, mesh = {*Soil Microbiology ; *Phylogeny ; *Saccharopolyspora/genetics/metabolism/classification ; *RNA, Ribosomal, 16S/genetics ; *Genome, Bacterial ; *DNA, Bacterial/genetics ; Multigene Family ; Genomics ; Sequence Analysis, DNA ; Wetlands ; Nucleic Acid Hybridization ; Bacterial Typing Techniques ; }, abstract = {A novel mangrove soil-derived actinomycete, strain S2-29[T], was found to be most closely related to Saccharopolyspora karakumensis 5K548[T] based on 16 S rRNA sequence (99.24% similarity) and genomic phylogenetic analyses. However, significant divergence in digital DNA-DNA hybridization, average nucleotide identity, and unique biosynthetic gene cluster possession distinguished S2-29[T] as a distinct Saccharopolyspora species. Pan genome evaluation revealed exceptional genomic flexibility in genus Saccharopolyspora, with > 95% accessory genome content. Strain S2-29[T] harbored 718 unique genes, largely implicated in energetic metabolisms, indicating different metabolic capacities from its close relatives. Several uncharacterized biosynthetic gene clusters in strain S2-29[T] highlighted the strain's untapped capacity to produce novel functional compounds with potential biotechnological applications. Designation as novel species Saccharopolyspora mangrovi sp. nov. (type strain S2-29[T] = JCM 34,548[T] = CGMCC 4.7716[T]) was warranted, expanding the known Saccharopolyspora diversity and ecology. The discovery of this mangrove-adapted strain advances understanding of the genus while highlighting an untapped source of chemical diversity.}, }
@article {pmid38966864, year = {2024}, author = {Wang, Y and Ding, K and Li, H and Kuang, Y and Liang, Z}, title = {Biography of Vitis genomics: recent advances and prospective.}, journal = {Horticulture research}, volume = {11}, number = {7}, pages = {uhae128}, doi = {10.1093/hr/uhae128}, pmid = {38966864}, issn = {2662-6810}, abstract = {The grape genome is the basis for grape studies and breeding, and is also important for grape industries. In the last two decades, more than 44 grape genomes have been sequenced. Based on these genomes, researchers have made substantial progress in understanding the mechanism of biotic and abiotic resistance, berry quality formation, and breeding strategies. In addition, this work has provided essential data for future pangenome analyses. Apart from de novo assembled genomes, more than six whole-genome sequencing projects have provided datasets comprising almost 5000 accessions. Based on these datasets, researchers have explored the domestication and origins of the grape and clarified the gene flow that occurred during its dispersed history. Moreover, genome-wide association studies and other methods have been used to identify more than 900 genes related to resistance, quality, and developmental phases of grape. These findings have benefited grape studies and provide some basis for smart genomic selection breeding. Moreover, the grape genome has played a great role in grape studies and the grape industry, and the importance of genomics will increase sharply in the future.}, }
@article {pmid38965152, year = {2024}, author = {Sundaresan, AK and Gangwar, J and Murugavel, A and Malli Mohan, GB and Ramakrishnan, J}, title = {Complete genome sequence, phenotypic correlation and pangenome analysis of uropathogenic Klebsiella spp.}, journal = {AMB Express}, volume = {14}, number = {1}, pages = {78}, pmid = {38965152}, issn = {2191-0855}, abstract = {Urinary tract infections (UTI) by antibiotic resistant and virulent K. pneumoniae are a growing concern. Understanding the genome and validating the genomic profile along with pangenome analysis will facilitate surveillance of high-risk clones of K. pneumoniae to underpin management strategies toward early detection. The present study aims to correlate resistome with phenotypic antimicrobial resistance and virulome with pathogenicity in Klebsiella spp. The present study aimed to perform complete genome sequences of Klebsiella spp. and to analyse the correlation of resistome with phenotypic antimicrobial resistance and virulome with pathogenicity. To understand the resistome, pangenome and virulome in the Klebsiella spp, the ResFinder, CARD, IS Finder, PlasmidFinder, PHASTER, Roary, VFDB were used. The phenotypic susceptibility profiling identified the uropathogenic kp3 to exhibit multi drug resistance. The resistome and in vitro antimicrobial profiling showed concordance with all the tested antibiotics against the study strains. Hypermucoviscosity was not observed for any of the test isolates; this phenotypic character matches perfectly with the absence of rmpA and magA genes. To the best of our knowledge, this is the first report on the presence of ste, stf, stc and sti major fimbrial operons of Salmonella enterica serotype Typhimurium in K. pneumoniae genome. The study identifies the discordance of virulome and virulence in Klebsiella spp. The complete genome analysis and phenotypic correlation identify uropathogenic K. pneumoniae kp3 as a carbapenem-resistant and virulent pathogen. The Pangenome of K. pneumoniae was open suggesting high genetic diversity. Diverse K serotypes were observed. Sequence typing reveals the prevalence of K. pneumoniae high-risk clones in UTI catheterised patients. The study also highlights the concordance of resistome and in vitro susceptibility tests. Importantly, the study identifies the necessity of virulome and phenotypic virulence markers for timely diagnosis and immediate treatment for the management of high-risk K. pneumoniae clones.}, }
@article {pmid38962716, year = {2024}, author = {Li, X and Dai, X and He, H and Lv, Y and Yang, L and He, W and Liu, C and Wei, H and Liu, X and Yuan, Q and Wang, X and Wang, T and Zhang, B and Zhang, H and Chen, W and Leng, Y and Yu, X and Qian, H and Zhang, B and Guo, M and Zhang, Z and Shi, C and Zhang, Q and Cui, Y and Xu, Q and Cao, X and Chen, D and Zhou, Y and Qian, Q and Shang, L}, title = {A pan-TE map highlights transposable elements underlying domestication and agronomic traits in Asian rice.}, journal = {National science review}, volume = {11}, number = {6}, pages = {nwae188}, pmid = {38962716}, issn = {2053-714X}, abstract = {Transposable elements (TEs) are ubiquitous genomic components and hard to study due to being highly repetitive. Here we assembled 232 chromosome-level genomes based on long-read sequencing data. Coupling the 232 genomes with 15 existing assemblies, we developed a pan-TE map comprising both cultivated and wild Asian rice. We detected 177 084 high-quality TE variations and inferred their derived state using outgroups. We found TEs were one source of phenotypic variation during rice domestication and differentiation. We identified 1246 genes whose expression variation was associated with TEs but not single-nucleotide polymorphisms (SNPs), such as OsRbohB, and validated OsRbohB's relative expression activity using a dual-Luciferase (LUC) reporter assays system. Our pan-TE map allowed us to detect multiple novel loci associated with agronomic traits. Collectively, our findings highlight the contributions of TEs to domestication, differentiation and agronomic traits in rice, and there is massive potential for gene cloning and molecular breeding by the high-quality Asian pan-TE map we generated.}, }
@article {pmid38962128, year = {2024}, author = {Zhang, B and Ren, H and Wang, X and Han, C and Jin, Y and Hu, X and Shi, R and Li, C and Wang, Y and Li, Y and Lu, S and Liu, Z and Hu, P}, title = {Comparative genomics analysis to explore the biodiversity and mining novel target genes of Listeria monocytogenes strains from different regions.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1424868}, pmid = {38962128}, issn = {1664-302X}, abstract = {As a common foodborne pathogen, infection with L. monocytogenes poses a significant threat to human life and health. The objective of this study was to employ comparative genomics to unveil the biodiversity and evolutionary characteristics of L. monocytogenes strains from different regions, screening for potential target genes and mining novel target genes, thus providing significant reference value for the specific molecular detection and therapeutic targets of L. monocytogenes strains. Pan-genomic analysis revealed that L. monocytogenes from different regions have open genomes, providing a solid genetic basis for adaptation to different environments. These strains contain numerous virulence genes that contribute to their high pathogenicity. They also exhibit relatively high resistance to phosphonic acid, glycopeptide, lincosamide, and peptide antibiotics. The results of mobile genetic elements indicate that, despite being located in different geographical locations, there is a certain degree of similarity in bacterial genome evolution and adaptation to specific environmental pressures. The potential target genes identified through pan-genomics are primarily associated with the fundamental life activities and infection invasion of L. monocytogenes, including known targets such as inlB, which can be utilized for molecular detection and therapeutic purposes. After screening a large number of potential target genes, we further screened them using hub gene selection methods to mining novel target genes. The present study employed eight different hub gene screening methods, ultimately identifying ten highly connected hub genes (bglF_1, davD, menE_1, tilS, dapX, iolC, gshAB, cysG, trpA, and hisC), which play crucial roles in the pathogenesis of L. monocytogenes. The results of pan-genomic analysis showed that L. monocytogenes from different regions exhibit high similarity in bacterial genome evolution. The PCR results demonstrated the excellent specificity of the bglF_1 and davD genes for L. monocytogenes. Therefore, the bglF_1 and davD genes hold promise as specific molecular detection and therapeutic targets for L. monocytogenes strains from different regions.}, }
@article {pmid38960860, year = {2024}, author = {Heumos, S and Guarracino, A and Schmelzle, JM and Li, J and Zhang, Z and Hagmann, J and Nahnsen, S and Prins, P and Garrison, E}, title = {Pangenome graph layout by Path-Guided Stochastic Gradient Descent.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btae363}, pmid = {38960860}, issn = {1367-4811}, abstract = {MOTIVATION: The increasing availability of complete genomes demands for models to study genomic variability within entire populations. Pangenome graphs capture the full genomic similarity and diversity between multiple genomes. In order to understand them, we need to see them. For visualization, we need a human readable graph layout: A graph embedding in low (e.g. two) dimensional depictions. Due to a pangenome graph's potential excessive size, this is a significant challenge.
RESULTS: In response, we introduce a novel graph layout algorithm: the Path-Guided Stochastic Gradient Descent (PG-SGD). PG-SGD uses the genomes, represented in the pangenome graph as paths, as an embedded positional system to sample genomic distances between pairs of nodes. This avoids the quadratic cost seen in previous versions of graph drawing by Stochastic Gradient Descent (SGD). We show that our implementation efficiently computes the low dimensional layouts of gigabase-scale pangenome graphs, unveiling their biological features.
AVAILABILITY: We integrated PG-SGD in ODGI which is released as free software under the MIT open source license. Source code is available at https://github.com/pangenome/odgi.}, }
@article {pmid38955273, year = {2024}, author = {Tong, W and Yang, D and Qiu, S and Tian, S and Ye, Z and Yang, S and Yan, L and Li, W and Li, N and Pei, X and Sun, Z and Liu, C and Peng, S and Li, Y and Wang, Q and Peng, Z}, title = {Relevance of genetic causes and environmental adaptation of Cronobacter spp. isolated from infant and follow-up formula production factories and retailed products in China: A 7-year period of continuous surveillance based on genome-wide analysis.}, journal = {The Science of the total environment}, volume = {}, number = {}, pages = {174368}, doi = {10.1016/j.scitotenv.2024.174368}, pmid = {38955273}, issn = {1879-1026}, abstract = {The possible contamination routes, environmental adaptation, and genetic basis of Cronobacter spp. in infant and follow-up formula production factories and retailed products in mainland China have been determined by laboratory studies and whole-genome comparative analysis in a 7-year nationwide continuous surveillance spanning from 2012 to 2018. The 2-year continuous multicenter surveillance of the production process (conducted in 2013 and 2014) revealed that the source of Cronobacter spp. in the dry-blending process was the raw dry ingredients and manufacturing environment (particularly in the vibro sieve and vacuum cleaner), while in the combined process, the main contamination source was identified as the packing room. It is important to note that, according to the contamination control knowledge obtained from the production process surveillance, the contamination rate of retail powdered infant formula (PIF) and follow-up formula (FUF) products in China decreased significantly from 2016 onward, after improving the hygiene management practices in factories. The prevalence of Cronobacter spp. in retailed PIF and FUF in China in 2018 was dramatically reduced from 1.55 % (61/3925, in 2012) to an average as low as 0.17 % (13/7655 in 2018). Phenotype determination and genomic analysis were performed on a total of 90 Cronobacter spp. isolates obtained from the surveillance. Of the 90 isolates, only two showed resistance to either cefazolin or cefoxitin. The multilocus sequence typing results revealed that C. sakazakii sequence type 1 (ST1), ST37, and C. malonaticus ST7 were the dominant sequence types (STs) collected from the production factories, while C. sakazakii ST1, ST4, ST64, and ST8 were the main STs detected in the retailed PIF and FUF nationwide. One C. sakazakii ST4 isolate (1.1 %, 1/90) had strong biofilm-forming ability and 13 isolates (14.4 %, 13/90) had weak biofilm-forming ability. Genomic analysis revealed that Cronobacter spp. have a relatively stable core-genome and an increasing pan-genome size. Plasmid IncFIB (pCTU3) was prevalent in this genus and some contained 14 antibacterial biocide- and metal-resistance genes (BMRGs) including copper, silver, and arsenic resistant genes. Plasmid IncN_1 was predicted to contain 6 ARGs. This is the first time that a multi-drug resistance IncN_1 type plasmid has been reported in Cronobacter spp. Genomic variations with respect to BMRGs, virulence genes, antimicrobial resistance genes (ARGs), and genes involved in biofilm formation were observed among strains of this genus. There were apparent differences in copies of bcsG and flgJ between the biofilm-forming group and non-biofilm-forming group, indicating that these two genes play key roles in biofilm formation. The findings of this study have improved our understanding of the contamination characteristics and genetic basis of Cronobacter spp. in PIF and FUF and their production environment in China and provide important guidance to reduce contamination with this pathogen during the production of PIF and FUF.}, }
@article {pmid38955887, year = {2024}, author = {Dahiya, P and Kumar, P and Rani, S and Dang, AS and Suneja, P}, title = {Comparative Genomic and Functional Analyses for Insights into Pantoea agglomerans Strains Adaptability in Diverse Ecological Niches.}, journal = {Current microbiology}, volume = {81}, number = {8}, pages = {254}, pmid = {38955887}, issn = {1432-0991}, mesh = {*Pantoea/genetics/physiology/classification ; *Genome, Bacterial ; *Phylogeny ; Genomics ; Adaptation, Physiological ; Humans ; Animals ; Plants/microbiology ; Bacterial Proteins/genetics/metabolism ; }, abstract = {Pantoea agglomerans inhabit diverse ecological niches, ranging from epiphytes and endophytes in plants, body of animals, and occasionally in the human system. This multifaceted bacterium contributes substantially to plant growth promotion, stress resilience, and biocontrol but can also act as a pathogen to its host. The genetic determinants underlying these diverse functions remain largely unfathomed and to uncover this phenomenon, nineteen strains of Pantoea agglomerans were selected and analyzed. Genome-to-Genome Distance Calculator (GGDC) which uses the Genome Blast Distance Phylogeny (GBDP) technique to calculate digital DDH values. Phylogenetic analysis via Genome-to-Genome distance, Average Nucleotide Identity, and Amino Acid Identity calculation revealed that all strains belonged to the genus Pantoea. However, strain 33.1 had a lower value than the threshold for the same species delineation. Bacterial Pan Genome Analysis (BPGA) Pipeline and MinPath analysis revealed genetic traits associated with environmental resilience, such as oxidative stress, UV radiation, temperature extremes, and metabolism of distinct host-specific carbohydrates. Protein-protein interactome analysis illustrated osmotic stress proteins closely linked with core proteins, while heavy metal tolerance, nitrogen metabolism, and Type III and VI secretion systems proteins generally associated with pathogenicity formed a separate network, indicating strain-specific characteristics. These findings shed new light on the intricate genetic architecture of Pantoea agglomerans, revealing its adaptability to inhabit diverse niches and thrive in varied environments.}, }
@article {pmid38952705, year = {2024}, author = {Shchyogolev, SY and Burygin, GL and Dykman, LA and Matora, LY}, title = {Phylogenetic and pangenomic analyses of members of the family Micrococcaceae related to a plant-growth-promoting rhizobacterium isolated from the rhizosphere of potato (Solanum tuberosum L.).}, journal = {Vavilovskii zhurnal genetiki i selektsii}, volume = {28}, number = {3}, pages = {308-316}, doi = {10.18699/vjgb-24-35}, pmid = {38952705}, issn = {2500-0462}, abstract = {We report the results of taxonomic studies on members of the family Micrococcaceae that, according to the 16S rRNA, internal transcribed spacer 1 (ITS1), average nucleotide identity (ANI), and average amino acid identity (AAI) tests, are related to Kocuria rosea strain RCAM04488, a plant-growth-promoting rhizobacterium (PGPR) isolated from the rhizosphere of potato (Solanum tuberosum L.). In these studies, we used whole-genome phylogenetic tests and pangenomic analysis. According to the ANI > 95 % criterion, several known members of K. salina, K. polaris, and K. rosea (including K. rosea type strain ATCC 186T) that are related most closely to isolate RCAM04488 in the ITS1 test should be assigned to the same species with appropriate strain verification. However, these strains were isolated from strongly contrasting ecological and geographical habitats, which could not but affect their genotypes and phenotypes and which should be taken into account in evaluation of their systematic position. This contradiction was resolved by a pangenomic analysis, which showed that the strains differed strongly in the number of accessory and strain-specific genes determining their individuality and possibly their potential for adaptation to different ecological niches. Similar results were obtained in a full-scale AAI test against the UniProt database (about 250 million records), by using the AAI-profiler program and the proteome of K. rosea strain ATCC 186T as a query. According to the AAI > 65 % criterion, members of the genus Arthrobacter and several other genera belonging to the class Actinomycetes, with a very wide geographical and ecological range of sources of isolation, should be placed into the same genus as Kocuria. Within the paradigm with vertically inherited phylogenetic markers, this could be regarded as a signal for their following taxonomic reclassification. An important factor in this case may be the detailing of the gene composition of the strains and the taxonomic ratios resulting from analysis of the pangenomes of the corresponding clades.}, }
@article {pmid38951917, year = {2024}, author = {Niu, J and Wang, W and Wang, Z and Chen, Z and Zhang, X and Qin, Z and Miao, L and Yang, Z and Xie, C and Xin, M and Peng, H and Yao, Y and Liu, J and Ni, Z and Sun, Q and Guo, W}, title = {Tagging large CNV blocks in wheat boosts digitalization of germplasm resources by ultra-low-coverage sequencing.}, journal = {Genome biology}, volume = {25}, number = {1}, pages = {171}, pmid = {38951917}, issn = {1474-760X}, support = {2020YFE0202300//National Key Research and Development Program of China/ ; 32322059//National Natural Science Foundation of China/ ; PC2023B01016//Pinduoduo-China Agricultural University Research Fund/ ; BX20230414//National Postdoctoral Program for Innovative Talents/ ; }, mesh = {*Triticum/genetics ; *DNA Copy Number Variations ; Genome, Plant ; High-Throughput Nucleotide Sequencing ; Genetic Markers ; Alleles ; }, abstract = {BACKGROUND: The massive structural variations and frequent introgression highly contribute to the genetic diversity of wheat, while the huge and complex genome of polyploid wheat hinders efficient genotyping of abundant varieties towards accurate identification, management, and exploitation of germplasm resources.
RESULTS: We develop a novel workflow that identifies 1240 high-quality large copy number variation blocks (CNVb) in wheat at the pan-genome level, demonstrating that CNVb can serve as an ideal DNA fingerprinting marker for discriminating massive varieties, with the accuracy validated by PCR assay. We then construct a digitalized genotyping CNVb map across 1599 global wheat accessions. Key CNVb markers are linked with trait-associated introgressions, such as the 1RS·1BL translocation and 2N[v]S translocation, and the beneficial alleles, such as the end-use quality allele Glu-D1d (Dx5 + Dy10) and the semi-dwarf r-e-z allele. Furthermore, we demonstrate that these tagged CNVb markers promote a stable and cost-effective strategy for evaluating wheat germplasm resources with ultra-low-coverage sequencing data, competing with SNP array for applications such as evaluating new varieties, efficient management of collections in gene banks, and describing wheat germplasm resources in a digitalized manner. We also develop a user-friendly interactive platform, WheatCNVb (http://wheat.cau.edu.cn/WheatCNVb/), for exploring the CNVb profiles over ever-increasing wheat accessions, and also propose a QR-code-like representation of individual digital CNVb fingerprint. This platform also allows uploading new CNVb profiles for comparison with stored varieties.
CONCLUSIONS: The CNVb-based approach provides a low-cost and high-throughput genotyping strategy for enabling digitalized wheat germplasm management and modern breeding with precise and practical decision-making.}, }
@article {pmid38951884, year = {2024}, author = {Lamkiewicz, K and Barf, LM and Sachse, K and Hölzer, M}, title = {RIBAP: a comprehensive bacterial core genome annotation pipeline for pangenome calculation beyond the species level.}, journal = {Genome biology}, volume = {25}, number = {1}, pages = {170}, pmid = {38951884}, issn = {1474-760X}, support = {NFDI 28/1//Deutsche Forschungsgemeinschaft/ ; FZT 118//Deutsche Forschungsgemeinschaft/ ; SFB 1076/3 A06//Deutsche Forschungsgemeinschaft/ ; }, mesh = {*Genome, Bacterial ; *Molecular Sequence Annotation ; *Software ; Brucella/genetics/classification ; Bacteria/genetics/classification ; Chlamydia/genetics ; Enterococcus/genetics ; Klebsiella/genetics ; }, abstract = {Microbial pangenome analysis identifies present or absent genes in prokaryotic genomes. However, current tools are limited when analyzing species with higher sequence diversity or higher taxonomic orders such as genera or families. The Roary ILP Bacterial core Annotation Pipeline (RIBAP) uses an integer linear programming approach to refine gene clusters predicted by Roary for identifying core genes. RIBAP successfully handles the complexity and diversity of Chlamydia, Klebsiella, Brucella, and Enterococcus genomes, outperforming other established and recent pangenome tools for identifying all-encompassing core genes at the genus level. RIBAP is a freely available Nextflow pipeline at github.com/hoelzer-lab/ribap and zenodo.org/doi/10.5281/zenodo.10890871.}, }
@article {pmid38950433, year = {2024}, author = {Rodriguez-Valera, F and Bellas, C}, title = {How Viruses Shape Microbial Plankton Microdiversity.}, journal = {Annual review of marine science}, volume = {}, number = {}, pages = {}, doi = {10.1146/annurev-marine-040623-090847}, pmid = {38950433}, issn = {1941-0611}, abstract = {One major conundrum of modern microbiology is the large pangenome (gene pool) present in microbes, which is much larger than those found in complex organisms such as humans. Here, we argue that this diversity of gene pools carried by different strains is maintained largely due to the control exercised by viral predation. Viruses maintain a high strain diversity through time that we describe as constant-diversity equilibrium, preventing the hoarding of resources by specific clones. Thus, viruses facilitate the release and degradation of dissolved organic matter in the ocean, which may lead to better ecosystem functioning by linking top-down to bottom-up control. By maintaining this equilibrium, viruses act as a key element of the adaptation of marine microbes to their environment and likely evolve as a single evolutionary unit.}, }
@article {pmid38948819, year = {2024}, author = {Kantor, EJH and Robicheau, BM and Tolman, J and Archibald, JM and LaRoche, J}, title = {Targeted metagenomics reveals pangenomic diversity of the nitroplast (UCYN-A) and its algal host plastid.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.06.19.599377}, pmid = {38948819}, issn = {2692-8205}, abstract = {UCYN-A (Cand. Atelocyanobacterium thalassa) has recently been recognized as a globally-distributed, early stage, nitrogen-fixing organelle (the 'nitroplast') of cyanobacterial origin present in select species of haptophyte algae (e.g., Braarudosphaera bigelowii). Although the nitroplast was recognized as the UCYN-A2 sublineage, it is yet to be confirmed in other sublineages of the algal/UCYN-A complex. We used water samples collected from Halifax Harbour (Bedford Basin, Nova Scotia, Canada) and the offshore Scotian Shelf to further our understanding of B. bigelowii and UCYN-A in the coastal Northwest Atlantic. Sequencing data revealed UCYN-A-associated haptophyte signatures and yielded near-complete metagenome-assembled genomes (MAGs) for UCYN-A1, UCYN-A4, and the plastid of the A4-associated haptophyte. Comparative genomics provided new insights into the pangenome of UCYN-A. The UCYN-A4 MAG is the first genome sequenced from this sublineage and shares ~85% identity with the UCYN-A2 nitroplast. Genes missing in the reduced genome of the nitroplast were also missing in the A4 MAG supporting its likely classification as a nitroplast as well. The UCYN-A1 MAG was found to be nearly 100% identical to the reference genome despite coming from different ocean basins. Time-series data paired with the recurrence of specific microbes in enrichment cultures gave insight into the microbes that frequently co-occur with the algal/UCYN-A complex (e.g., Pelagibacter ubique). Overall, our study expands knowledge of UCYN-A and its host across major ocean basins and investigates their co-occurring microbes in the coastal Northwest Atlantic (NWA), thereby facilitating future studies on the underpinnings of haptophyte-associated diazotrophy in the sea.}, }
@article {pmid38948139, year = {2024}, author = {Zhang, P and Zhang, B and Ji, YY and Jiao, J and Zhang, Z and Tian, CF}, title = {Cofitness network connectivity determines a fuzzy essential zone in open bacterial pangenome.}, journal = {mLife}, volume = {3}, number = {2}, pages = {277-290}, pmid = {38948139}, issn = {2770-100X}, abstract = {Most in silico evolutionary studies commonly assumed that core genes are essential for cellular function, while accessory genes are dispensable, particularly in nutrient-rich environments. However, this assumption is seldom tested genetically within the pangenome context. In this study, we conducted a robust pangenomic Tn-seq analysis of fitness genes in a nutrient-rich medium for Sinorhizobium strains with a canonical open pangenome. To evaluate the robustness of fitness category assignment, Tn-seq data for three independent mutant libraries per strain were analyzed by three methods, which indicates that the Hidden Markov Model (HMM)-based method is most robust to variations between mutant libraries and not sensitive to data size, outperforming the Bayesian and Monte Carlo simulation-based methods. Consequently, the HMM method was used to classify the fitness category. Fitness genes, categorized as essential (ES), advantage (GA), and disadvantage (GD) genes for growth, are enriched in core genes, while nonessential genes (NE) are over-represented in accessory genes. Accessory ES/GA genes showed a lower fitness effect than core ES/GA genes. Connectivity degrees in the cofitness network decrease in the order of ES, GD, and GA/NE. In addition to accessory genes, 1599 out of 3284 core genes display differential essentiality across test strains. Within the pangenome core, both shared quasi-essential (ES and GA) and strain-dependent fitness genes are enriched in similar functional categories. Our analysis demonstrates a considerable fuzzy essential zone determined by cofitness connectivity degrees in Sinorhizobium pangenome and highlights the power of the cofitness network in understanding the genetic basis of ever-increasing prokaryotic pangenome data.}, }
@article {pmid38947078, year = {2024}, author = {Socarras, KM and Marino, MC and Earl, JP and Ehrlich, RL and Cramer, NA and Mell, JC and Sen, B and Ahmed, A and Marconi, RT and Ehrlich, GD}, title = {Characterization of the family-level Borreliaceae pan-genome and development of an episomal typing protocol.}, journal = {Research square}, volume = {}, number = {}, pages = {}, doi = {10.21203/rs.3.rs-4491589/v1}, pmid = {38947078}, abstract = {Background The Borreliaceae family includes many obligate parasitic bacterial species which are etiologically associated with a myriad of zoonotic borrelioses including Lyme disease and vector-borne relapsing fevers. Infections by the Borreliaceae are difficult to detect by both direct and indirect methods, often leading to delayed and missed diagnoses. Efforts to improve diagnoses center around the development of molecular diagnostics (MDx), but due to deep tissue sequestration of the causative spirochaetes and the lack of persistent bacteremias, even MDx assays suffer from a lack of sensitivity. Additionally, the highly extensive genomic heterogeneity among isolates, even within the same species, contributes to the lack of assay sensitivity as single target assays cannot provide universal coverage. This within-species heterogeneity is partly due to differences in replicon repertoires and genomic structures that have likely arisen to support the complex Borreliaceae lifecycle in which these parasites have to survive in multiple hosts each with unique immune responses. Results We constructed a Borreliaceae family-level pangenome and characterized the phylogenetic relationships among the constituent taxa which supports the recent taxonomy of splitting the family into at least two genera. Gene content profiles were created for the majority of the Borreliaceae replicons, providing for the first time their unambiguous molecular typing. Conclusion Our characterization of the Borreliaceae pan-genome supports the splitting of the former Borrelia genus into two genera and provides for the phylogenetic placement of several non-species designated isolates. Mining this family-level pangenome will enable precision diagnostics corresponding to gene content-driven clinical outcomes while also providing targets for interventions.}, }
@article {pmid38943059, year = {2024}, author = {Eynard, SE and Klopp, C and Canale-Tabet, K and Marande, W and Vandecasteele, C and Roques, C and Donnadieu, C and Boone, Q and Servin, B and Vignal, A}, title = {The black honey bee genome: insights on specific structural elements and a first step towards pangenomes.}, journal = {Genetics, selection, evolution : GSE}, volume = {56}, number = {1}, pages = {51}, pmid = {38943059}, issn = {1297-9686}, mesh = {Bees/genetics ; Animals ; *Genome, Insect ; }, abstract = {BACKGROUND: The honey bee reference genome, HAv3.1, was produced from a commercial line sample that was thought to have a largely dominant Apis mellifera ligustica genetic background. Apis mellifera mellifera, often referred to as the black bee, has a separate evolutionary history and is the original type in western and northern Europe. Growing interest in this subspecies for conservation and non-professional apicultural practices, together with the necessity of deciphering genome backgrounds in hybrids, triggered the necessity for a specific genome assembly. Moreover, having several high-quality genomes is becoming key for taking structural variations into account in pangenome analyses.
RESULTS: Pacific Bioscience technology long reads were produced from a single haploid black bee drone. Scaffolding contigs into chromosomes was done using a high-density genetic map. This allowed for re-estimation of the recombination rate, which was over-estimated in some previous studies due to mis-assemblies, which resulted in spurious inversions in the older reference genomes. The sequence continuity obtained was very high and the only limit towards continuous chromosome-wide sequences seemed to be due to tandem repeat arrays that were usually longer than 10 kb and that belonged to two main families, the 371 and 91 bp repeats, causing problems in the assembly process due to high internal sequence similarity. Our assembly was used together with the reference genome to genotype two structural variants by a pangenome graph approach with Graphtyper2. Genotypes obtained were either correct or missing, when compared to an approach based on sequencing depth analysis, and genotyping rates were 89 and 76% for the two variants.
CONCLUSIONS: Our new assembly for the Apis mellifera mellifera honey bee subspecies demonstrates the utility of multiple high-quality genomes for the genotyping of structural variants, with a test case on two insertions and deletions. It will therefore be an invaluable resource for future studies, for instance by including structural variants in GWAS. Having used a single haploid drone for sequencing allowed a refined analysis of very large tandem repeat arrays, raising the question of their function in the genome. High quality genome assemblies for multiple subspecies such as presented here, are crucial for emerging projects using pangenomes.}, }
@article {pmid38940135, year = {2024}, author = {Shivakumar, VS and Ahmed, OY and Kovaka, S and Zakeri, M and Langmead, B}, title = {Sigmoni: classification of nanopore signal with a compressed pangenome index.}, journal = {Bioinformatics (Oxford, England)}, volume = {40}, number = {Supplement_1}, pages = {i287-i296}, doi = {10.1093/bioinformatics/btae213}, pmid = {38940135}, issn = {1367-4811}, support = {//Advanced Research Computing at Hopkins/ ; OAC 1920103//National Science Foundation/ ; R01HG011392/HG/NHGRI NIH HHS/United States ; IIBR 2029552//National Science Foundation/ ; U01CA253481/NH/NIH HHS/United States ; RGP0025/2021//Human Frontier Science Program/ ; }, mesh = {Humans ; *Algorithms ; Nanopore Sequencing/methods ; Software ; Nanopores ; Genome, Human ; Genomics/methods ; Sequence Analysis, DNA/methods ; }, abstract = {SUMMARY: Improvements in nanopore sequencing necessitate efficient classification methods, including pre-filtering and adaptive sampling algorithms that enrich for reads of interest. Signal-based approaches circumvent the computational bottleneck of basecalling. But past methods for signal-based classification do not scale efficiently to large, repetitive references like pangenomes, limiting their utility to partial references or individual genomes. We introduce Sigmoni: a rapid, multiclass classification method based on the r-index that scales to references of hundreds of Gbps. Sigmoni quantizes nanopore signal into a discrete alphabet of picoamp ranges. It performs rapid, approximate matching using matching statistics, classifying reads based on distributions of picoamp matching statistics and co-linearity statistics, all in linear query time without the need for seed-chain-extend. Sigmoni is 10-100× faster than previous methods for adaptive sampling in host depletion experiments with improved accuracy, and can query reads against large microbial or human pangenomes. Sigmoni is the first signal-based tool to scale to a complete human genome and pangenome while remaining fast enough for adaptive sampling applications.
Sigmoni is implemented in Python, and is available open-source at https://github.com/vshiv18/sigmoni.}, }
@article {pmid38934790, year = {2024}, author = {Cohen, ZP and Perkin, LC and Wagner, TA and Liu, J and Bell, AA and Arick Ii, MA and Grover, CE and Yu, JZ and Udall, JA and Suh, CP}, title = {Nematode-resistance loci in Upland cotton genomes are associated with structural differences.}, journal = {G3 (Bethesda, Md.)}, volume = {}, number = {}, pages = {}, doi = {10.1093/g3journal/jkae140}, pmid = {38934790}, issn = {2160-1836}, abstract = {Reniform and root-knot nematode are two of the most destructive pests of conventional upland cotton, Gossypium hirsutum, L. and continue to be a major threat to cotton fiber production in semi-arid regions of the southern United States and Central America. Fortunately, naturally occurring tolerance to these nematodes has been identified in the Pima cotton species (G. barbadense) and several upland cotton varieties (G. hirsutum), which has led to a robust breeding program that has successfully introgressed and stacked these independent resistant traits into several upland cotton lineages with superior agronomic traits, e.g. BAR 32-30 and BARBREN-713. This work identifies the genomic variations of these nematode tolerant accessions by comparing their respective genomes to the susceptible, high-quality fiber producing parental line of this lineage: Phytogen 355 (PSC355). We discover several large genomic differences within marker regions that harbor putative resistance genes as well as expression mechanisms shared by the two resistant lines, with respect to the susceptible PSC355 parental line. This work emphasizes the utility of whole genome comparisons as a means of elucidating large and small nuclear differences by lineage and phenotype. .}, }
@article {pmid38934646, year = {2024}, author = {Raghuram, V and Petit, RA and Karol, Z and Mehta, R and Weissman, DB and Read, TD}, title = {Average nucleotide identity-based Staphylococcus aureus strain grouping allows identification of strain-specific genes in the pangenome.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0014324}, doi = {10.1128/msystems.00143-24}, pmid = {38934646}, issn = {2379-5077}, abstract = {UNLABELLED: Staphylococcus aureus causes both hospital- and community-acquired infections in humans worldwide. Due to the high incidence of infection, S. aureus is also one of the most sampled and sequenced pathogens today, providing an outstanding resource to understand variation at the bacterial subspecies level. We processed and downsampled 83,383 public S. aureus Illumina whole-genome shotgun sequences and 1,263 complete genomes to produce 7,954 representative substrains. Pairwise comparison of average nucleotide identity revealed a natural boundary of 99.5% that could be used to define 145 distinct strains within the species. We found that intermediate frequency genes in the pangenome (present in 10%-95% of genomes) could be divided into those closely linked to strain background ("strain-concentrated") and those highly variable within strains ("strain-diffuse"). Non-core genes had different patterns of chromosome location. Notably, strain-diffuse genes were associated with prophages; strain-concentrated genes were associated with the vSaβ genome island and rare genes (<10% frequency) concentrated near the origin of replication. Antibiotic resistance genes were enriched in the strain-diffuse class, while virulence genes were distributed between strain-diffuse, strain-concentrated, core, and rare classes. This study shows how different patterns of gene movement help create strains as distinct subspecies entities and provide insight into the diverse histories of important S. aureus functions.
IMPORTANCE: We analyzed the genomic diversity of Staphylococcus aureus, a globally prevalent bacterial species that causes serious infections in humans. Our goal was to build a genetic picture of the different strains of S. aureus and which genes may be associated with them. We reprocessed >84,000 genomes and subsampled to remove redundancy. We found that individual samples sharing >99.5% of their genome could be grouped into strains. We also showed that a portion of genes that are present in intermediate frequency in the species are strongly associated with some strains but completely absent from others, suggesting a role in strain specificity. This work lays the foundation for understanding individual gene histories of the S. aureus species and also outlines strategies for processing large bacterial genomic data sets.}, }
@article {pmid38934605, year = {2024}, author = {Burcham, ZM}, title = {Comparative genomic analysis of an emerging Pseudomonadaceae member, Thiopseudomonas alkaliphila.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0415723}, doi = {10.1128/spectrum.04157-23}, pmid = {38934605}, issn = {2165-0497}, abstract = {Thiopseudomonas alkaliphila, an organism recently classified within the Pseudomonadaceae family, has been detected in diverse sources such as human tissues, animal guts, industrial fermenters, and decomposition environments, suggesting a diverse ecological role. However, a large knowledge gap exists in how T. alkaliphila functions. In this comparative genomic analysis, adaptations indicative of habitat specificity among strains and genomic similarity to known opportunistic pathogens are revealed. Genomic investigation reveals a core metabolic utilization of multiple oxidative and non-oxidative catabolic pathways, suggesting adaptability to varied environments and carbon sources. The genomic repertoire of T. alkaliphila includes secondary metabolites, such as antimicrobials and siderophores, indicative of its involvement in microbial competition and resource acquisition. Additionally, the presence of transposases, prophages, plasmids, and Clustered Regularly Interspaced Short Palindromic Repeats-Cas systems in T. alkaliphila genomes suggests mechanisms for horizontal gene transfer and defense against viral predation. This comprehensive genomic analysis expands our understanding on the ecological functions, community interactions, and potential virulence of T. alkaliphila, while emphasizing its adaptability and diverse capabilities across environmental and host-associated ecosystems.IMPORTANCEAs the microbial world continues to be explored, new organisms will emerge with beneficial and/or pathogenetic impact. Thiopseudomonas alkaliphila is a species originally isolated from clinical human tissue and fluid samples but has not been attributed to disease. Since its classification, T. alkaliphila has been found in animal guts, animal waste, decomposing remains, and biogas fermentation reactors. This is the first study to provide an in-depth view of the metabolic potential of publicly available genomes belonging to this species through a comparative genomics and draft pangenome calculation approach. It was found that T. alkaliphila is metabolically versatile and likely adapts to diverse energy sources and environments, which may make it useful for bioremediation and in industrial settings. A range of virulence factors and antibiotic resistances were also detected, suggesting T. alkaliphila may operate as an undescribed opportunistic pathogen.}, }
@article {pmid38934546, year = {2024}, author = {Oles, RE and Carrillo Terrazas, M and Loomis, LR and Hsu, C-Y and Tribelhorn, C and Belda-Ferre, P and Ea, AC and Bryant, M and Young, JA and Carrow, HC and Sandborn, WJ and Dulai, PS and Sivagnanam, M and Pride, D and Knight, R and Chu, H}, title = {Pangenome comparison of Bacteroides fragilis genomospecies unveils genetic diversity and ecological insights.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0051624}, doi = {10.1128/msystems.00516-24}, pmid = {38934546}, issn = {2379-5077}, abstract = {UNLABELLED: Bacteroides fragilis is a Gram-negative commensal bacterium commonly found in the human colon, which differentiates into two genomospecies termed divisions I and II. Through a comprehensive collection of 694 B. fragilis whole genome sequences, we identify novel features distinguishing these divisions. Our study reveals a distinct geographic distribution with division I strains predominantly found in North America and division II strains in Asia. Additionally, division II strains are more frequently associated with bloodstream infections, suggesting a distinct pathogenic potential. We report differences between the two divisions in gene abundance related to metabolism, virulence, stress response, and colonization strategies. Notably, division II strains harbor more antimicrobial resistance (AMR) genes than division I strains. These findings offer new insights into the functional roles of division I and II strains, indicating specialized niches within the intestine and potential pathogenic roles in extraintestinal sites.
IMPORTANCE: Understanding the distinct functions of microbial species in the gut microbiome is crucial for deciphering their impact on human health. Classifying division II strains as Bacteroides fragilis can lead to erroneous associations, as researchers may mistakenly attribute characteristics observed in division II strains to the more extensively studied division I B. fragilis. Our findings underscore the necessity of recognizing these divisions as separate species with distinct functions. We unveil new findings of differential gene prevalence between division I and II strains in genes associated with intestinal colonization and survival strategies, potentially influencing their role as gut commensals and their pathogenicity in extraintestinal sites. Despite the significant niche overlap and colonization patterns between these groups, our study highlights the complex dynamics that govern strain distribution and behavior, emphasizing the need for a nuanced understanding of these microorganisms.}, }
@article {pmid38931094, year = {2024}, author = {Mather, D and Vassos, E and Sheedy, J and Guo, W and McKay, A}, title = {A Quantitative Trait Locus with a Major Effect on Root-Lesion Nematode Resistance in Barley.}, journal = {Plants (Basel, Switzerland)}, volume = {13}, number = {12}, pages = {}, doi = {10.3390/plants13121663}, pmid = {38931094}, issn = {2223-7747}, support = {DAS00141, UA00143, USQ00019 USQ1702-007RTSX//Grains Research and Development Corporation/ ; }, abstract = {Although the root-lesion nematode Pratylenchus thornei is known to affect barley (Hordeum vulgare L.), there have been no reports on the genetic control of P. thornei resistance in barley. In this research, P. thornei resistance was assessed for a panel of 46 barley mapping parents and for two mapping populations (Arapiles/Franklin and Denar/Baudin). With both populations, a highly significant quantitative trait locus (QTL) was mapped at the same position on the long arm of chromosome 7H. Single-nucleotide polymorphisms (SNPs) in this region were anchored to an RGT Planet pan-genome assembly and assayed on the mapping parents and other barley varieties. The results indicate that Arapiles, Denar, RGT Planet and several other varieties likely have the same resistance gene on chromosome 7H. Marker assays reported here could be used to select for P. thornei resistance in barley breeding. Analysis of existing barley pan-genomic and pan-transcriptomic data provided a list of candidate genes along with information on the expression and differential expression of some of those genes in barley root tissue. Further research is required to identify a specific barley gene that affects root-lesion nematode resistance.}, }
@article {pmid38926873, year = {2024}, author = {Sierra, P and Durbin, R}, title = {Identification of transposable element families from pangenome polymorphisms.}, journal = {Mobile DNA}, volume = {15}, number = {1}, pages = {13}, pmid = {38926873}, issn = {1759-8753}, support = {956229//Horizon 2020 Framework Programme/ ; 207492/WT_/Wellcome Trust/United Kingdom ; }, abstract = {BACKGROUND: Transposable Elements (TEs) are segments of DNA, typically a few hundred base pairs up to several tens of thousands bases long, that have the ability to generate new copies of themselves in the genome. Most existing methods used to identify TEs in a newly sequenced genome are based on their repetitive character, together with detection based on homology and structural features. As new high quality assemblies become more common, including the availability of multiple independent assemblies from the same species, an alternative strategy for identification of TE families becomes possible in which we focus on the polymorphism at insertion sites caused by TE mobility.
RESULTS: We develop the idea of using the structural polymorphisms found in pangenomes to create a library of the TE families recently active in a species, or in a closely related group of species. We present a tool, pantera, that achieves this task, and illustrate its use both on species with well-curated libraries, and on new assemblies.
CONCLUSIONS: Our results show that pantera is sensitive and accurate, tending to correctly identify complete elements with precise boundaries, and is particularly well suited to detect larger, low copy number TEs that are often undetected with existing de novo methods.}, }
@article {pmid38921378, year = {2024}, author = {Casimiro-Ramos, A and Bautista-Crescencio, C and Vidal-Montiel, A and González, GM and Hernández-García, JA and Hernández-Rodríguez, C and Villa-Tanaca, L}, title = {Comparative Genomics of the First Resistant Candida auris Strain Isolated in Mexico: Phylogenomic and Pan-Genomic Analysis and Mutations Associated with Antifungal Resistance.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {10}, number = {6}, pages = {}, doi = {10.3390/jof10060392}, pmid = {38921378}, issn = {2309-608X}, support = {SIP 20240946//Instituto Politécnico Nacional/ ; SIP 20231481//Instituto Politécnico Nacional/ ; }, abstract = {Candida auris is an emerging multidrug-resistant and opportunistic pathogenic yeast. Whole-genome sequencing analysis has defined five major clades, each from a distinct geographic region. The current study aimed to examine the genome of the C. auris 20-1498 strain, which is the first isolate of this fungus identified in Mexico. Based on whole-genome sequencing, the draft genome was found to contain 70 contigs. It had a total genome size of 12.86 Mbp, an N50 value of 1.6 Mbp, and an average guanine-cytosine (GC) content of 45.5%. Genome annotation revealed a total of 5432 genes encoding 5515 proteins. According to the genomic analysis, the C. auris 20-1498 strain belongs to clade IV (containing strains endemic to South America). Of the two genes (ERG11 and FKS1) associated with drug resistance in C. auris, a mutation was detected in K143R, a gene located in a mutation hotspot of ERG11 (lanosterol 14-α-demethylase), an antifungal drug target. The focus on whole-genome sequencing and the identification of mutations linked to the drug resistance of fungi could lead to the discovery of new therapeutic targets and new antifungal compounds.}, }
@article {pmid38920366, year = {2024}, author = {Ardalani, O and Phaneuf, PV and Mohite, OS and Nielsen, LK and Palsson, BO}, title = {Pangenome reconstruction of Lactobacillaceae metabolism predicts species-specific metabolic traits.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0015624}, doi = {10.1128/msystems.00156-24}, pmid = {38920366}, issn = {2379-5077}, abstract = {Strains across the Lactobacillaceae family form the basis for a trillion-dollar industry. Our understanding of the genomic basis for their key traits is fragmented, however, including the metabolism that is foundational to their industrial uses. Pangenome analysis of publicly available Lactobacillaceae genomes allowed us to generate genome-scale metabolic network reconstructions for 26 species of industrial importance. Their manual curation led to more than 75,000 gene-protein-reaction associations that were deployed to generate 2,446 genome-scale metabolic models. Cross-referencing genomes and known metabolic traits allowed for manual metabolic network curation and validation of the metabolic models. As a result, we provide the first pangenomic basis for metabolism in the Lactobacillaceae family and a collection of predictive computational metabolic models that enable a variety of practical uses.IMPORTANCELactobacillaceae, a bacterial family foundational to a trillion-dollar industry, is increasingly relevant to biosustainability initiatives. Our study, leveraging approximately 2,400 genome sequences, provides a pangenomic analysis of Lactobacillaceae metabolism, creating over 2,400 curated and validated genome-scale models (GEMs). These GEMs successfully predict (i) unique, species-specific metabolic reactions; (ii) niche-enriched reactions that increase organism fitness; (iii) essential media components, offering insights into the global amino acid essentiality of Lactobacillaceae; and (iv) fermentation capabilities across the family, shedding light on the metabolic basis of Lactobacillaceae-based commercial products. This quantitative understanding of Lactobacillaceae metabolic properties and their genomic basis will have profound implications for the food industry and biosustainability, offering new insights and tools for strain selection and manipulation.}, }
@article {pmid38919498, year = {2024}, author = {Zambounis, A and Boutsika, A and Gray, N and Hossain, M and Chatzidimopoulos, M and Tsitsigiannis, DI and Paplomatas, E and Hane, J}, title = {Pan-genome survey of Septoria pistaciarum, causal agent of Septoria leaf spot of pistachios, across three Aegean sub-regions of Greece.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1396760}, pmid = {38919498}, issn = {1664-302X}, abstract = {Septoria pistaciarum, a causal agent of Septoria leaf spot disease of pistachio, is a fungal pathogen that causes substantial losses in the cultivation, worldwide. This study describes the first pan-genome-based survey of this phytopathogen-comprising a total of 27 isolates, with 9 isolates each from 3 regional units of Greece (Pieria, Larissa and Fthiotida). The reference isolate (SPF8) assembled into a total of 43.1 Mb, with 38.6% contained within AT-rich regions of approximately 37.5% G:C. The genomes of the 27 isolates exhibited on average 42% gene-coding and 20% repetitive regions. The genomes of isolates from the southern Fthiotida region appeared to more diverged from each other than the other regions based on SNP-derived trees, and also contained isolates similar to both the Pieria and Larissa regions. In contrast, isolates of the Pieria and Larissa were less diverse and distinct from one another. Asexual reproduction appeared to be typical, with no MAT1-2 locus detected in any isolate. Genome-based prediction of infection mode indicated hemibiotrophic and saprotrophic adaptations, consistent with its long latent phase. Gene prediction and orthology clustering generated a pan-genome-wide gene set of 21,174 loci. A total of 59 ortholog groups were predicted to contain candidate effector proteins, with 36 (61%) of these either having homologs to known effectors from other species or could be assigned predicted functions from matches to conserved domains. Overall, effector prediction suggests that S. pistaciarum employs a combination of defensive effectors with roles in suppression of host defenses, and offensive effectors with a range of cytotoxic activities. Some effector-like ortholog groups presented as divergent versions of the same protein, suggesting region-specific adaptations may have occurred. These findings provide insights and future research directions in uncovering the pathogenesis and population dynamics of S. pistaciarum toward the efficient management of Septoria leaf spot of pistachio.}, }
@article {pmid38918389, year = {2024}, author = {Hämälä, T and Moore, C and Cowan, L and Carlile, M and Gopaulchan, D and Brandrud, MK and Birkeland, S and Loose, M and Kolář, F and Koch, MA and Yant, L}, title = {Impact of whole-genome duplications on structural variant evolution in Cochlearia.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {5377}, pmid = {38918389}, issn = {2041-1723}, support = {679056//EC | EU Framework Programme for Research and Innovation H2020 | H2020 Excellent Science (H2020 Priority Excellent Science)/ ; 850852//EC | EU Framework Programme for Research and Innovation H2020 | H2020 Excellent Science (H2020 Priority Excellent Science)/ ; RPG-2020-367//Leverhulme Trust/ ; }, mesh = {*Polyploidy ; *Genome, Plant/genetics ; *Evolution, Molecular ; *Gene Duplication ; Genomic Structural Variation/genetics ; Mutation ; }, abstract = {Polyploidy, the result of whole-genome duplication (WGD), is a major driver of eukaryote evolution. Yet WGDs are hugely disruptive mutations, and we still lack a clear understanding of their fitness consequences. Here, we study whether WGDs result in greater diversity of genomic structural variants (SVs) and how they influence evolutionary dynamics in a plant genus, Cochlearia (Brassicaceae). By using long-read sequencing and a graph-based pangenome, we find both negative and positive interactions between WGDs and SVs. Masking of recessive mutations due to WGDs leads to a progressive accumulation of deleterious SVs across four ploidal levels (from diploids to octoploids), likely reducing the adaptive potential of polyploid populations. However, we also discover putative benefits arising from SV accumulation, as more ploidy-specific SVs harbor signals of local adaptation in polyploids than in diploids. Together, our results suggest that SVs play diverse and contrasting roles in the evolutionary trajectories of young polyploids.}, }
@article {pmid38916318, year = {2024}, author = {Lypaczewski, P and Chac, D and Dunmire, CN and Tandoc, KM and Chowdhury, F and Khan, AI and Bhuiyan, TR and Harris, JB and LaRocque, RC and Calderwood, SB and Ryan, ET and Qadri, F and Shapiro, BJ and Weil, AA}, title = {Vibrio cholerae O1 experiences mild bottlenecks through the gastrointestinal tract in some but not all cholera patients.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0078524}, doi = {10.1128/spectrum.00785-24}, pmid = {38916318}, issn = {2165-0497}, abstract = {UNLABELLED: Vibrio cholerae O1 causes the diarrheal disease cholera, and the small intestine is the site of active infection. During cholera, cholera toxin is secreted from V. cholerae and induces a massive fluid influx into the small intestine, which causes vomiting and diarrhea. Typically, V. cholerae genomes are sequenced from bacteria passed in stool, but rarely from vomit, a fluid that may more closely represents the site of active infection. We hypothesized that V. cholerae O1 population bottlenecks along the gastrointestinal tract would result in reduced genetic variation in stool compared to vomit. To test this, we sequenced V. cholerae genomes from 10 cholera patients with paired vomit and stool samples. Genetic diversity was low in both vomit and stool, consistent with a single infecting population rather than coinfection with divergent V. cholerae O1 lineages. The amount of single-nucleotide variation decreased from vomit to stool in four patients, increased in two, and remained unchanged in four. The variation in gene presence/absence decreased between vomit and stool in eight patients and increased in two. Pangenome analysis of assembled short-read sequencing demonstrated that the toxin-coregulated pilus operon more frequently contained deletions in genomes from vomit compared to stool. However, these deletions were not detected by PCR or long-read sequencing, indicating that interpreting gene presence or absence patterns from short-read data alone may be incomplete. Overall, we found that V. cholerae O1 isolated from stool is genetically similar to V. cholerae recovered from the upper intestinal tract.
IMPORTANCE: Vibrio cholerae O1, the bacterium that causes cholera, is ingested in contaminated food or water and then colonizes the upper small intestine and is excreted in stool. Shed V. cholerae genomes from stool are usually studied, but V. cholerae isolated from vomit may be more representative of where V. cholerae colonizes in the upper intestinal epithelium. V. cholerae may experience bottlenecks, or large reductions in bacterial population sizes and genetic diversity, as it passes through the gut. Passage through the gut may select for distinct V. cholerae mutants that are adapted for survival and gut colonization. We did not find strong evidence for such adaptive mutations, and instead observed that passage through the gut results in modest reductions in V. cholerae genetic diversity, and only in some patients. These results fill a gap in our understanding of the V. cholerae life cycle, transmission, and evolution.}, }
@article {pmid38916315, year = {2024}, author = {Bhalla, N and Nanda, RK}, title = {Pangenome-wide association study reveals the selective absence of CRISPR genes (Rv2816c-19c) in drug-resistant Mycobacterium tuberculosis.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0052724}, doi = {10.1128/spectrum.00527-24}, pmid = {38916315}, issn = {2165-0497}, abstract = {The presence of intermittently dispersed insertion sequences and transposases in the Mycobacterium tuberculosis (Mtb) genome makes intra-genome recombination events inevitable. Understanding their effect on the gene repertoires (GR), which may contribute to the development of drug-resistant Mtb, is critical. In this study, publicly available WGS data of clinical Mtb isolates (endemic region n = 2,601; non-endemic region n = 1,130) were de novo assembled, filtered, scaffolded into assemblies, and functionally annotated. Out of 2,601 Mtb WGS data sets from endemic regions, 2,184 (drug resistant/sensitive: 1,386/798) qualified as high quality. We identified 3,784 core genes, 123 softcore genes, 224 shell genes, and 762 cloud genes in the pangenome of Mtb clinical isolates from endemic regions. Sets of 33 and 39 genes showed positive and negative associations (P < 0.01) with drug resistance status, respectively. Gene ontology clustering showed compromised immunity to phages and impaired DNA repair in drug-resistant Mtb clinical isolates compared to the sensitive ones. Multidrug efflux pump repressor genes (Rv3830c and Rv3855c) and CRISPR genes (Rv2816c-19c) were absent in the drug-resistant Mtb. A separate WGS data analysis of drug-resistant Mtb clinical isolates from the Netherlands (n = 1130) also showed the absence of CRISPR genes (Rv2816c-17c). This study highlights the role of CRISPR genes in drug resistance development in Mtb clinical isolates and helps in understanding its evolutionary trajectory and as useful targets for diagnostics development.IMPORTANCEThe results from the present Pan-GWAS study comparing gene sets in drug-resistant and drug-sensitive Mtb clinical isolates revealed intricate presence-absence patterns of genes encoding DNA-binding proteins having gene regulatory as well as DNA modification and DNA repair roles. Apart from the genes with known functions, some uncharacterized and hypothetical genes that seem to have a potential role in drug resistance development in Mtb were identified. We have been able to extrapolate many findings of the present study with the existing literature on the molecular aspects of drug-resistant Mtb, further strengthening the relevance of the results presented in this study.}, }
@article {pmid38916310, year = {2024}, author = {Mahmoud, FM and Pritsch, K and Siani, R and Benning, S and Radl, V and Kublik, S and Bunk, B and Spröer, C and Schloter, M}, title = {Comparative genomic analysis of strain Priestia megaterium B1 reveals conserved potential for adaptation to endophytism and plant growth promotion.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0042224}, doi = {10.1128/spectrum.00422-24}, pmid = {38916310}, issn = {2165-0497}, abstract = {In our study, we aimed to explore the genomic and phenotypic traits of Priestia megaterium strain B1, which was isolated from root material of healthy apple plants, to adapt to the endophytic lifestyle and promote plant growth. We identified putative genes encoding proteins involved in chemotaxis, flagella biosynthesis, biofilm formation, secretory systems, detoxification, transporters, and transcription regulation. Furthermore, B1 exhibited both swarming and swimming motilities, along with biofilm formation. Both genomic and physiological analyses revealed the potential of B1 to promote plant growth through the production of indole-3-acetic acid and siderophores, as well as the solubilization of phosphate and zinc. To deduce potential genomic features associated with endophytism across members of P. megaterium strains, we conducted a comparative genomic analysis involving 27 and 31 genomes of strains recovered from plant and soil habitats, respectively, in addition to our strain B1. Our results indicated a closed pan genome and comparable genome size of strains from both habitats, suggesting a facultative host association and adaptive lifestyle to both habitats. Additionally, we performed a sparse Partial Least Squares Discriminant Analysis to infer the most discriminative functional features of the two habitats based on Pfam annotation. Despite the distinctive clustering of both groups, functional enrichment analysis revealed no significant enrichment of any Pfam domain in both habitats. Furthermore, when assessing genetic elements related to adaptation to endophytism in each individual strain, we observed their widespread presence among strains from both habitats. Moreover, all members displayed potential genetic elements for promoting plant growth.IMPORTANCEBoth genomic and phenotypic analyses yielded valuable insights into the capacity of P. megaterium B1 to adapt to the plant niche and enhance its growth. The comparative genomic analysis revealed that P. megaterium members, whether derived from soil or plant sources, possess the essential genetic machinery for interacting with plants and enhancing their growth. The conservation of these traits across various strains of this species extends its potential application as a bio-stimulant in diverse environments. This significance also applies to strain B1, particularly regarding its application to enhance the growth of plants facing apple replant disease conditions.}, }
@article {pmid38915671, year = {2024}, author = {Parmigiani, L and Garrison, E and Stoye, J and Marschall, T and Doerr, D}, title = {Panacus: fast and exact pangenome growth and core size estimation.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.06.11.598418}, pmid = {38915671}, abstract = {MOTIVATION: Using a single linear reference genome poses a limitation to exploring the full genomic diversity of a species. The release of a draft human pangenome underscores the increasing relevance of pangenomics to overcome these limitations. Pangenomes are commonly represented as graphs, which can represent billions of base pairs of sequence. Presently, there is a lack of scalable software able to perform key tasks on pangenomes, such as quantifying universally shared sequence across genomes (the core genome) and measuring the extent of genomic variability as a function of sample size (pangenome growth).
RESULTS: We introduce Panacus (pangenome-abacus), a tool designed to rapidly perform these tasks and visualize the results in interactive plots. Panacus can process GFA files, the accepted standard for pangenome graphs, and is able to analyze a human pangenome graph with 110 million nodes in less than one hour.
AVAILABILITY: Panacus is implemented in Rust and is published as Open Source software under the MIT license. The source code and documentation are available at https://github.com/marschall-lab/panacus . Panacus can be installed via Bioconda at https://bioconda.github.io/recipes/panacus/README.html .
CONTACT: Luca Parmigiani (luca.parmigiani@uni-bielefeld.de), Daniel Doerr (daniel.doerr@hhu.de).}, }
@article {pmid38915450, year = {2024}, author = {Trouche, B and Schrieke, H and Duron, O and Eren, AM and Reveillaud, J}, title = {Wolbachia populations across organs of individual Culex pipiens: highly conserved intra-individual core pangenome with inter-individual polymorphisms.}, journal = {ISME communications}, volume = {4}, number = {1}, pages = {ycae078}, pmid = {38915450}, issn = {2730-6151}, abstract = {Wolbachia is a maternally inherited intracellular bacterium that infects a wide range of arthropods including mosquitoes. The endosymbiont is widely used in biocontrol strategies due to its capacity to modulate arthropod reproduction and limit pathogen transmission. Wolbachia infections in Culex spp. are generally assumed to be monoclonal but the potential presence of genetically distinct Wolbachia subpopulations within and between individual organs has not been investigated using whole genome sequencing. Here we reconstructed Wolbachia genomes from ovary and midgut metagenomes of single naturally infected Culex pipiens mosquitoes from Southern France to investigate patterns of intra- and inter-individual differences across mosquito organs. Our analyses revealed a remarkable degree of intra-individual conservancy among Wolbachia genomes from distinct organs of the same mosquito both at the level of gene presence-absence signal and single-nucleotide polymorphisms (SNPs). Yet, we identified several synonymous and non-synonymous substitutions between individuals, demonstrating the presence of some level of genomic heterogeneity among Wolbachia that infect the same C. pipiens field population. Overall, the absence of genetic heterogeneity within Wolbachia populations in a single individual confirms the presence of a dominant Wolbachia that is maintained under strong purifying forces of evolution.}, }
@article {pmid38905343, year = {2024}, author = {Buschi, E and Dell'Anno, A and Tangherlini, M and Candela, M and Rampelli, S and Turroni, S and Palladino, G and Esposito, E and Martire, ML and Musco, L and Stefanni, S and Munari, C and Fiori, J and Danovaro, R and Corinaldesi, C}, title = {Resistance to freezing conditions of endemic Antarctic polychaetes is enhanced by cryoprotective proteins produced by their microbiome.}, journal = {Science advances}, volume = {10}, number = {25}, pages = {eadk9117}, doi = {10.1126/sciadv.adk9117}, pmid = {38905343}, issn = {2375-2548}, mesh = {*Polychaeta/microbiology ; Animals ; Antarctic Regions ; *Microbiota ; *Freezing ; Phylogeny ; Bacterial Proteins/metabolism/genetics ; }, abstract = {The microbiome plays a key role in the health of all metazoans. Whether and how the microbiome favors the adaptation processes of organisms to extreme conditions, such as those of Antarctica, which are incompatible with most metazoans, is still unknown. We investigated the microbiome of three endemic and widespread species of Antarctic polychaetes: Leitoscoloplos geminus, Aphelochaeta palmeri, and Aglaophamus trissophyllus. We report here that these invertebrates contain a stable bacterial core dominated by Meiothermus and Anoxybacillus, equipped with a versatile genetic makeup and a unique portfolio of proteins useful for coping with extremely cold conditions as revealed by pangenomic and metaproteomic analyses. The close phylosymbiosis between Meiothermus and Anoxybacillus and these Antarctic polychaetes indicates a connection with their hosts that started in the past to support holobiont adaptation to the Antarctic Ocean. The wide suite of bacterial cryoprotective proteins found in Antarctic polychaetes may be useful for the development of nature-based biotechnological applications.}, }
@article {pmid38904756, year = {2024}, author = {Kaur, J and Verma, H and Kaur, J and Lata, P and Dhingra, GG and Lal, R}, title = {In Silico Analysis of the Phylogenetic and Physiological Characteristics of Sphingobium indicum B90A: A Hexachlorocyclohexane-Degrading Bacterium.}, journal = {Current microbiology}, volume = {81}, number = {8}, pages = {233}, pmid = {38904756}, issn = {1432-0991}, mesh = {*Phylogeny ; *Sphingomonadaceae/genetics/metabolism/classification ; *Genome, Bacterial ; *Hexachlorocyclohexane/metabolism ; *Computer Simulation ; *Biodegradation, Environmental ; Genomic Islands ; Gene Transfer, Horizontal ; }, abstract = {The study focuses on the in silico genomic characterization of Sphingobium indicum B90A, revealing a wealth of genes involved in stress response, carbon monoxide oxidation, β-carotene biosynthesis, heavy metal resistance, and aromatic compound degradation, suggesting its potential as a bioremediation agent. Furthermore, genomic adaptations among nine Sphingomonad strains were explored, highlighting shared core genes via pangenome analysis, including those related to the shikimate pathway and heavy metal resistance. The majority of genes associated with aromatic compound degradation, heavy metal resistance, and stress response were found within genomic islands across all strains. Sphingobium indicum UT26S exhibited the highest number of genomic islands, while Sphingopyxis alaskensis RB2256 had the maximum fraction of its genome covered by genomic islands. The distribution of lin genes varied among the strains, indicating diverse genetic responses to environmental pressures. Additionally, in silico evidence of horizontal gene transfer (HGT) between plasmids pSRL3 and pISP3 of the Sphingobium and Sphingomonas genera, respectively, has been provided. The manuscript offers novel insights into strain B90A, highlighting its role in horizontal gene transfer and refining evolutionary relationships among Sphingomonad strains. The discovery of stress response genes and the czcABCD operon emphasizes the potential of Sphingomonads in consortia development, supported by genomic island analysis.}, }
@article {pmid38902723, year = {2024}, author = {Liang, Y and Dikow, RB and Su, X and Wen, J and Ren, Z}, title = {Comparative genomics of the primary endosymbiont Buchnera aphidicola in aphid hosts and their coevolutionary relationships.}, journal = {BMC biology}, volume = {22}, number = {1}, pages = {137}, pmid = {38902723}, issn = {1741-7007}, support = {31870366//National Natural Science Foundation of China/ ; 31170359//National Natural Science Foundation of China/ ; 201803D421051//International Science and Technology Cooperation Program of Shanxi Province/ ; 2020-018//Shanxi Scholarship Council of China/ ; 2014AA021802//National High-tech Research and Development Program/ ; }, mesh = {*Aphids/microbiology/genetics ; Animals ; *Buchnera/genetics/physiology ; *Symbiosis/genetics ; *Genome, Bacterial ; *Genomics ; *Phylogeny ; Biological Coevolution ; }, abstract = {BACKGROUND: Coevolution between modern aphids and their primary obligate, bacterial endosymbiont, Buchnera aphidicola, has been previously reported at different classification levels based on molecular phylogenetic analyses. However, the Buchnera genome remains poorly understood within the Rhus gall aphids.
RESULTS: We assembled the complete genome of the endosymbiont Buchnera in 16 aphid samples, representing 13 species in all six genera of Rhus gall aphids by shotgun genome skimming method. We compared the newly assembled genomes with those from GenBank to comprehensively investigate patterns of coevolution between the bacteria Buchnera and their aphid hosts. Buchnera genomes were mostly collinear, and the pan-genome contained 684 genes, in which the core genome contained 256 genes with some lineages having large numbers of tandem gene duplications. There has been substantial gene-loss in each Buchnera lineage. We also reconstructed the phylogeny for Buchnera and their host aphids, respectively, using 72 complete genomes of Buchnera, along with the complete mitochondrial genomes and three nuclear genes of 31 corresponding host aphid accessions. The cophylogenetic test demonstrated significant coevolution between these two partner groups at individual, species, generic, and tribal levels.
CONCLUSIONS: Buchnera exhibits very high levels of genomic sequence divergence but relative stability in gene order. The relationship between the symbionts Buchnera and its aphid hosts shows a significant coevolutionary pattern and supports complexity of the obligate symbiotic relationship.}, }
@article {pmid38897667, year = {2024}, author = {Yang, ZD and Kuo, HY and Hsieh, PW and Hung, JH}, title = {Efficient Construction and Utilization of k-Ordered FM-indexes with kISS for Ultra-Fast Read Mapping in Large Genomes.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btae409}, pmid = {38897667}, issn = {1367-4811}, abstract = {MOTIVATION: The Full-text index in Minute space (FM-index) is a memory-efficient data structure widely used in bioinformatics for solving the fundamental pattern-matching task of searching for short patterns within a long reference. With the demand for short query patterns, the k-ordered concept has been proposed for FM-indexes. However, few construction algorithms in the state of the art fully exploit this idea to achieve significant speedups in the pan-genome era.
RESULTS: We introduce the k-ordered Induced Suffix Sorting (kISS) for efficient construction and utilization of k-ordered FM-indexes. We present an algorithmic workflow for building k-ordered suffix arrays, incorporating two novel strategies to improve time and memory efficiency. We also demonstrate the compatibility of integrating k-ordered FM-indexes with locate operations in FMtree. Experiments show that kISS can improve the construction time, and the generated k-ordered suffix array can also be applied to FMtree without any additional in computation or memory usage.
AVAILABILITY: https://github.com/jhhung/kISS.
SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, }
@article {pmid38897058, year = {2024}, author = {Sreya, PK and Hari Naga Papa Rao, A and Suresh, G and Sasikala, C and Venkata Ramana, C}, title = {Genomic and functional insights of a mucin foraging Rhodopirellula halodulae sp. nov.}, journal = {Systematic and applied microbiology}, volume = {47}, number = {4}, pages = {126523}, doi = {10.1016/j.syapm.2024.126523}, pmid = {38897058}, issn = {1618-0984}, abstract = {Nine novel strains were obtained from various algal and seagrass samples. The analysis of the 16S rRNA gene-based phylogenetic tree revealed monophyletic placement of all novel strains within the Rhodopirellula genus. The type strain was identified as JC737[T], which shared 99.1 % 16S rRNA gene sequence identity with Rhodopirellula baltica SH1[T], while strain JC740 was designated as an additional strain. The genome sizes of strains JC737[T] and JC740 were 6.6 and 6.7 Mb, respectively, and the G + C content was 56.2 %. The strains cladded distinctly in the phylogenomic tree, and the ANI and dDDH values of the strain JC737[T] were 75.8-76.1 % and 20.8-21.3 %, respectively, in comparison to other Rhodopirellula members. The strain demonstrated a versatile degradation capability, exhibiting a diverse array of complex polysaccharides, including mucin which had not been previously identified within the members of the phylum Planctomycetota. The phylogenomic, pan-genomic, morphological, physiological, and genomic characterization of the strain lead to the proposal to describe the strain as Rhodopirellula halodulae sp. nov.}, }
@article {pmid38890994, year = {2024}, author = {Sung, K and Nawaz, M and Park, M and Chon, J and Khan, SA and Alotaibi, K and Revollo, J and Miranda, JA and Khan, AA}, title = {Whole-Genome Sequence Analysis of Antibiotic Resistance, Virulence, and Plasmid Dynamics in Multidrug-Resistant E. coli Isolates from Imported Shrimp.}, journal = {Foods (Basel, Switzerland)}, volume = {13}, number = {11}, pages = {}, pmid = {38890994}, issn = {2304-8158}, support = {E0782801//United States Food and Drug Administration/ ; }, abstract = {We analyzed antimicrobial resistance and virulence traits in multidrug-resistant (MDR) E. coli isolates obtained from imported shrimp using whole-genome sequences (WGSs). Antibiotic resistance profiles were determined phenotypically. WGSs identified key characteristics, including their multilocus sequence type (MLST), serotype, virulence factors, antibiotic resistance genes, and mobile elements. Most of the isolates exhibited resistance to gentamicin, streptomycin, ampicillin, chloramphenicol, nalidixic acid, ciprofloxacin, tetracycline, and trimethoprim/sulfamethoxazole. Multilocus sequence type (MLST), serotype, average nucleotide identity (ANI), and pangenome analysis showed high genomic similarity among isolates, except for EC15 and ECV01. The EC119 plasmid contained a variety of efflux pump genes, including those encoding the acid resistance transcriptional activators (gadE, gadW, and gadX), resistance-nodulation-division-type efflux pumps (mdtE and mdtF), and a metabolite, H1 symporter (MHS) family major facilitator superfamily transporter (MNZ41_23075). Virulence genes displayed diversity, particularly EC15, whose plasmids carried genes for adherence (faeA and faeC-I), invasion (ipaH and virB), and capsule (caf1A and caf1M). This comprehensive analysis illuminates antimicrobial resistance, virulence, and plasmid dynamics in E. coli from imported shrimp and has profound implications for public health, emphasizing the need for continued surveillance and research into the evolution of these important bacterial pathogens.}, }
@article {pmid38885946, year = {2024}, author = {Jackson, TK and Rhode, C}, title = {Comparative genomics of dusky kob (Argyrosomus japonicus, Sciaenidae) conspecifics: Evidence for speciation and the genetic mechanisms underlying traits.}, journal = {Journal of fish biology}, volume = {}, number = {}, pages = {}, doi = {10.1111/jfb.15844}, pmid = {38885946}, issn = {1095-8649}, support = {MCR180616347589//National Research Foundation/ ; }, abstract = {Dusky kob (Argyrosomus japonicus) is a commercially important finfish, indigenous to South Africa, Australia, and China. Previous studies highlighted differences in genetic composition, life history, and morphology of the species across geographic regions. A draft genome sequence of 0.742 Gb (N50 = 5.49 Mb; BUSCO completeness = 97.8%) and 22,438 predicted protein-coding genes was generated for the South African (SA) conspecific. A comparison with the Chinese (CN) conspecific revealed a core set of 32,068 orthologous protein clusters across both genomes. The SA genome exhibited 440 unique clusters compared to 1928 unique clusters in the CN genome. Transportation and immune response processes were overrepresented among the SA accessory genome, whereas the CN accessory genome was enriched for immune response, DNA transposition, and sensory detection (FDR-adjusted p < 0.01). These unique clusters may represent an adaptive component of the species' pangenome that could explain population divergence due to differential environmental specialisation. Furthermore, 700 single-copy orthologues (SCOs) displayed evidence of positive selection between the SA and CN genomes, and globally these genomes shared only 92% similarity, suggesting they might be distinct species. These genes primarily play roles in metabolism and digestion, illustrating the evolutionary pathways that differentiate the species. Understanding these genomic mechanisms underlying adaptation and evolution within and between species provides valuable insights into growth and maturation of kob, traits that are particularly relevant to commercial aquaculture.}, }
@article {pmid38882499, year = {2024}, author = {Lin, X and Hu, T and Wu, Z and Li, L and Wang, Y and Wen, D and Liu, X and Li, W and Liang, H and Jin, X and Xu, X and Wang, J and Yang, H and Kristiansen, K and Xiao, L and Zou, Y}, title = {Isolation of potentially novel species expands the genomic and functional diversity of Lachnospiraceae.}, journal = {iMeta}, volume = {3}, number = {2}, pages = {e174}, pmid = {38882499}, issn = {2770-596X}, abstract = {The Lachnospiraceae family holds promise as a source of next-generation probiotics, yet a comprehensive delineation of its diversity is lacking, hampering the identification of suitable strains for future applications. To address this knowledge gap, we conducted an in-depth genomic and functional analysis of 1868 high-quality genomes, combining data from public databases with our new isolates. This data set represented 387 colonization-selective species-level clusters, of which eight genera represented multilineage clusters. Pan-genome analysis, single-nucleotide polymorphism (SNP) identification, and probiotic functional predictions revealed that species taxonomy, habitats, and geography together shape the functional diversity of Lachnospiraceae. Moreover, analyses of associations with atherosclerotic cardiovascular disease (ACVD) and inflammatory bowel disease (IBD) indicated that several strains of potentially novel Lachnospiraceae species possess the capacity to reduce the abundance of opportunistic pathogens, thereby imparting potential health benefits. Our findings shed light on the untapped potential of novel species enabling knowledge-based selection of strains for the development of next-generation probiotics holding promise for improving human health and disease management.}, }
@article {pmid38873647, year = {2024}, author = {Chanket, W and Pipatthana, M and Sangphukieo, A and Harnvoravongchai, P and Chankhamhaengdecha, S and Janvilisri, T and Phanchana, M}, title = {The complete catalog of antimicrobial resistance secondary active transporters in Clostridioides difficile: evolution and drug resistance perspective.}, journal = {Computational and structural biotechnology journal}, volume = {23}, number = {}, pages = {2358-2374}, doi = {10.1016/j.csbj.2024.05.027}, pmid = {38873647}, issn = {2001-0370}, abstract = {Secondary active transporters shuttle substrates across eukaryotic and prokaryotic membranes, utilizing different electrochemical gradients. They are recognized as one of the antimicrobial efflux pumps among pathogens. While primary active transporters within the genome of C. difficile 630 have been completely cataloged, the systematical study of secondary active transporters remains incomplete. Here, we not only identify secondary active transporters but also disclose their evolution and role in drug resistance in C. difficile 630. Our analysis reveals that C. difficile 630 carries 147 secondary active transporters belonging to 27 (super)families. Notably, 50 (34%) of them potentially contribute to antimicrobial resistance (AMR). AMR-secondary active transporters are structurally classified into five (super)families: the p-aminobenzoyl-glutamate transporter (AbgT), drug/metabolite transporter (DMT) superfamily, major facilitator (MFS) superfamily, multidrug and toxic compound extrusion (MATE) family, and resistance-nodulation-division (RND) family. Surprisingly, complete RND genes found in C. difficile 630 are likely an evolutionary leftover from the common ancestor with the diderm. Through protein structure comparisons, we have potentially identified six novel AMR-secondary active transporters from DMT, MATE, and MFS (super)families. Pangenome analysis revealed that half of the AMR-secondary transporters are accessory genes, which indicates an important role in adaptive AMR function rather than innate physiological homeostasis. Gene expression profile firmly supports their ability to respond to a wide spectrum of antibiotics. Our findings highlight the evolution of AMR-secondary active transporters and their integral role in antibiotic responses. This marks AMR-secondary active transporters as interesting therapeutic targets to synergize with other antibiotic activity.}, }
@article {pmid38873165, year = {2024}, author = {Li, W and Lin, X and Liang, H and Wu, Z and Wang, M and Sun, J and Li, X and He, W and Gao, X and Hu, T and Xiao, L and Zou, Y}, title = {Genomic and functional diversity of the human-derived isolates of Faecalibacterium.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1379500}, doi = {10.3389/fmicb.2024.1379500}, pmid = {38873165}, issn = {1664-302X}, abstract = {INTRODUCTION: Faecalibacterium is one of the most abundant bacteria in the gut microbiota of healthy adults, highly regarded as a next-generation probiotic. However, the functions of Faecalibacterium genomes from cultured strains and the distribution of different species in populations may differ among different sources.
METHODS: We here performed an extensive analysis of pan-genomes, functions, and safety evaluation of 136 Faecalibacterium genomes collected from 10 countries.
RESULTS: The genomes are clustered into 11 clusters, with only five of them were characterized and validly nomenclated. Over 80% of the accessory genes and unique genes of Faecalibacterium are found with unknown function, which reflects the importance of expanding the collection of Faecalibacterium strains. All the genomes have the potential to produce acetic acid and butyric acid. Nine clusters of Faecalibacterium are found significantly enriched in the healthy individuals compared with patients with type II diabetes..
DISCUSSION: This study provides a comprehensive view of genomic characteristic and functions and of culturable Faecalibacterium bacterium from human gut, and enables clinical advances in the future.}, }
@article {pmid38872883, year = {2024}, author = {Logachev, A and Kanapin, A and Rozhmina, T and Stanin, V and Bankin, M and Samsonova, A and Orlova, E and Samsonova, M}, title = {Pangenomics of flax fungal parasite Fusarium oxysporum f. sp. lini.}, journal = {Frontiers in plant science}, volume = {15}, number = {}, pages = {1383914}, doi = {10.3389/fpls.2024.1383914}, pmid = {38872883}, issn = {1664-462X}, abstract = {To assess the genomic diversity of Fusarium oxysporum f. sp. lini strains and compile a comprehensive gene repertoire, we constructed a pangenome using 13 isolates from four different clonal lineages, each exhibiting distinct levels of virulence. Syntenic analyses of two selected genomes revealed significant chromosomal rearrangements unique to each genome. A comprehensive examination of both core and accessory pangenome content and diversity points at an open genome state. Additionally, Gene Ontology (GO) enrichment analysis indicated that non-core pangenome genes are associated with pathogen recognition and immune signaling. Furthermore, the Folini pansecterome, encompassing secreted proteins critical for fungal pathogenicity, primarily consists of three functional classes: effector proteins, CAZYmes, and proteases. These three classes account for approximately 3.5% of the pangenome. Each functional class within the pansecterome was meticulously annotated and characterized with respect to pangenome category distribution, PFAM domain frequency, and strain virulence assessment. This analysis revealed that highly virulent isolates have specific types of PFAM domains that are exclusive to them. Upon examining the repertoire of SIX genes known for virulence in other formae speciales, it was found that all isolates had a similar gene content except for two, which lacked SIX genes entirely.}, }
@article {pmid38872506, year = {2024}, author = {Tariq, A and Meng, M and Jiang, X and Bolger, A and Beier, S and Buchmann, JP and Fernie, AR and Wen, W and Usadel, B}, title = {In-depth exploration of the genomic diversity in tea varieties based on a newly constructed pangenome of Camellia sinensis.}, journal = {The Plant journal : for cell and molecular biology}, volume = {}, number = {}, pages = {}, doi = {10.1111/tpj.16874}, pmid = {38872506}, issn = {1365-313X}, support = {390686111//Deutsche Forschungsgemeinschaft/ ; 468870408//Deutsche Forschungsgemeinschaft/ ; 3211101118//National Natural Science Foundation of China/ ; U23A20213//National Natural Science Foundation of China/ ; 2662023PY011//Fundamental Research Funds for the Central Universities/ ; }, abstract = {Tea, one of the most widely consumed beverages globally, exhibits remarkable genomic diversity in its underlying flavour and health-related compounds. In this study, we present the construction and analysis of a tea pangenome comprising a total of 11 genomes, with a focus on three newly sequenced genomes comprising the purple-leaved assamica cultivar "Zijuan", the temperature-sensitive sinensis cultivar "Anjibaicha" and the wild accession "L618" whose assemblies exhibited excellent quality scores as they profited from latest sequencing technologies. Our analysis incorporates a detailed investigation of transposon complement across the tea pangenome, revealing shared patterns of transposon distribution among the studied genomes and improved transposon resolution with long read technologies, as shown by long terminal repeat (LTR) Assembly Index analysis. Furthermore, our study encompasses a gene-centric exploration of the pangenome, exploring the genomic landscape of the catechin pathway with our study, providing insights on copy number alterations and gene-centric variants, especially for Anthocyanidin synthases. We constructed a gene-centric pangenome by structurally and functionally annotating all available genomes using an identical pipeline, which both increased gene completeness and allowed for a high functional annotation rate. This improved and consistently annotated gene set will allow for a better comparison between tea genomes. We used this improved pangenome to capture the core and dispensable gene repertoire, elucidating the functional diversity present within the tea species. This pangenome resource might serve as a valuable resource for understanding the fundamental genetic basis of traits such as flavour, stress tolerance, and disease resistance, with implications for tea breeding programmes.}, }
@article {pmid38871498, year = {2024}, author = {Doukbi, E and Ancel, P and Dutour, A and Soghomonian, A and Ahmed, S and Castejon, V and Piperoglou, C and Gariboldi, V and Lenoir, M and Lechevallier, E and Gondran-Tellier, B and Boissier, R and Ebbo, M and Vély, F and Gaborit, B}, title = {Human epicardial adipose tissue contains innate and adaptive lymphoid cells and a higher proportion of innate type 2 lymphoid cells compared to other adipose tissues.}, journal = {Annales d'endocrinologie}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.ando.2024.05.009}, pmid = {38871498}, issn = {2213-3941}, abstract = {IMPORTANCE: Epicardial adipose tissue (EAT) is a biologically active organ surrounding myocardium and coronary arteries that has been associated with coronary artery disease (CAD) and atrial fibrillation. Previous work has shown that EAT exhibits beige features.
OBJECTIVE: Our objective was to determine whether the stromal vascular fraction of the human EAT contains innate or adaptive lymphoid cells compared to thoracic subcutaneous (thSAT), visceral abdominal (VAT) and subcutaneous abdominal (abSAT).
PARTICIPANTS: New pangenomic microarray analysis was performed on previous transcriptomic dataset using significance analysis of microarray and ingenuity pathway analysis (n=41) to identify specific immune signature and its link with browning genes. EAT, thSAT, VAT and abSAT samples from explanted patients with severe cardiomyopathies and multi-organ donor patients (n=17) were used for flow cytometry (FC) immunophenotyping assay. Patients were on average 55±16 years-old; 47% had hypertension and 6% CAD. Phenotypic adaptive and innate immune profiles were performed using a TBNK panel and a specific ILC1-2-3 panel including CD127, CD117, CRTH2 (CD294) and activation markers such as CD25 and CD69.
RESULTS: Transcriptomic analysis showed a significant positive correlation between the TH2 immune pathway (IL-4, IL-5, IL-13, IL-25, IL-33) and browning genes (UCP-1, PRDM16, TMEM26, CITED1, TBX1) in EAT versus thSAT (R=0.82, P<0.0001). Regarding adaptive immune cells, a preponderance of CD8T cells, a contingent of CD4T cells, and a few B cells were observed in all ATs (P<0.0001). In innate lymphoid cells (ILCs), an increase was observed in visceral ATs (i.e. EAT; VAT 35±8ILCs/g of tissue) compared to their subcutaneous counterpart (i.e. thSAT+abSAT: 8±3 ILCs/g of AT, P=0.002), with a difference in the proportion of the 3 subtypes of ILCs (ILC1>ILC3>ILC2). In addition, we observed an increase in EAT-ILC2 compared to other ATs and almost all these EAT-ILC2 expressed CD69 and/or CD25 activation markers (99.75±0.16%; P<0.0001). We also observed more NKs in EAT and VAT (1520±71 cells/g of AT) than in SATs (562±17 cells/g of AT); P=0.01.
CONCLUSION: This is the first study to provide a comparison between innate and adaptive lymphoid cells in human epicardial versus abdominal or thoracic adipose tissues. Further studies are ongoing to decipher whether these cells could be involved in EAT beiging.
TRIAL REGISTRATION: CODECOH No. DC-2021-4518 The French agency of biomedicine PFS21-005.}, }
@article {pmid38868520, year = {2024}, author = {Wang, K and Hua, G and Li, J and Yang, Y and Zhang, C and Yang, L and Hu, X and Scheben, A and Wu, Y and Gong, P and Zhang, S and Fan, Y and Zeng, T and Lu, L and Gong, Y and Jiang, R and Sun, G and Tian, Y and Kang, X and Hu, H and Li, W}, title = {Duck pan-genome reveals two transposon insertions caused bodyweight enlarging and white plumage phenotype formation during evolution.}, journal = {iMeta}, volume = {3}, number = {1}, pages = {e154}, doi = {10.1002/imt2.154}, pmid = {38868520}, issn = {2770-596X}, abstract = {Structural variations (SVs) are a major source of domestication and improvement traits. We present the first duck pan-genome constructed using five genome assemblies capturing ∼40.98 Mb new sequences. This pan-genome together with high-depth sequencing data (∼46.5×) identified 101,041 SVs, of which substantial proportions were derived from transposable element (TE) activity. Many TE-derived SVs anchoring in a gene body or regulatory region are linked to duck's domestication and improvement. By combining quantitative genetics with molecular experiments, we, for the first time, unraveled a 6945 bp Gypsy insertion as a functional mutation of the major gene IGF2BP1 associated with duck bodyweight. This Gypsy insertion, to our knowledge, explains the largest effect on bodyweight among avian species (27.61% of phenotypic variation). In addition, we also examined another 6634 bp Gypsy insertion in MITF intron, which triggers a novel transcript of MITF, thereby contributing to the development of white plumage. Our findings highlight the importance of using a pan-genome as a reference in genomics studies and illuminate the impact of transposons in trait formation and livestock breeding.}, }
@article {pmid38867900, year = {2022}, author = {Liu, D and Zhang, Y and Fan, G and Sun, D and Zhang, X and Yu, Z and Wang, J and Wu, L and Shi, W and Ma, J}, title = {IPGA: A handy integrated prokaryotes genome and pan-genome analysis web service.}, journal = {iMeta}, volume = {1}, number = {4}, pages = {e55}, doi = {10.1002/imt2.55}, pmid = {38867900}, issn = {2770-596X}, abstract = {Pan-genomics is one of the most powerful means to study genomic variation and obtain a sketch of genes within a defined clade of species. Though there are a lot of computational tools to achieve this, an integrated framework to evaluate their performance and offer the best choice to users has never been achieved. To ease the process of large-scale prokaryotic genome analysis, we introduce Integrated Prokaryotes Genome and pan-genome Analysis (IPGA), a one-stop web service to analyze, compare, and visualize pan-genome as well as individual genomes, that rids users of installing any specific tools. IPGA features a scoring system that helps users to evaluate the reliability of pan-genome profiles generated by different packages. Thus, IPGA can help users ascertain the profiling method that is most suitable for their data set for the following analysis. In addition, IPGA integrates several downstream comparative analysis and genome analysis modules to make users achieve diverse targets.}, }
@article {pmid38868710, year = {2022}, author = {Hu, H and Tan, Y and Li, C and Chen, J and Kou, Y and Xu, ZZ and Liu, YY and Tan, Y and Dai, L}, title = {StrainPanDA: Linked reconstruction of strain composition and gene content profiles via pangenome-based decomposition of metagenomic data.}, journal = {iMeta}, volume = {1}, number = {3}, pages = {e41}, doi = {10.1002/imt2.41}, pmid = {38868710}, issn = {2770-596X}, abstract = {Microbial strains of variable functional capacities coexist in microbiomes. Current bioinformatics methods of strain analysis cannot provide the direct linkage between strain composition and their gene contents from metagenomic data. Here we present Strain-level Pangenome Decomposition Analysis (StrainPanDA), a novel method that uses the pangenome coverage profile of multiple metagenomic samples to simultaneously reconstruct the composition and gene content variation of coexisting strains in microbial communities. We systematically validate the accuracy and robustness of StrainPanDA using synthetic data sets. To demonstrate the power of gene-centric strain profiling, we then apply StrainPanDA to analyze the gut microbiome samples of infants, as well as patients treated with fecal microbiota transplantation. We show that the linked reconstruction of strain composition and gene content profiles is critical for understanding the relationship between microbial adaptation and strain-specific functions (e.g., nutrient utilization and pathogenicity). Finally, StrainPanDA has minimal requirements for computing resources and can be scaled to process multiple species in a community in parallel. In short, StrainPanDA can be applied to metagenomic data sets to detect the association between molecular functions and microbial/host phenotypes to formulate testable hypotheses and gain novel biological insights at the strain or subspecies level.}, }
@article {pmid38866938, year = {2024}, author = {Jia, M and Zhu, S and Xue, MY and Chen, H and Xu, J and Song, M and Tang, Y and Liu, X and Tao, Y and Zhang, T and Liu, JX and Wang, Y and Sun, HZ}, title = {Single-cell transcriptomics across 2,534 microbial species reveals functional heterogeneity in the rumen microbiome.}, journal = {Nature microbiology}, volume = {}, number = {}, pages = {}, pmid = {38866938}, issn = {2058-5276}, abstract = {Deciphering the activity of individual microbes within complex communities and environments remains a challenge. Here we describe the development of microbiome single-cell transcriptomics using droplet-based single-cell RNA sequencing and pangenome-based computational analysis to characterize the functional heterogeneity of the rumen microbiome. We generated a microbial genome database (the Bovine Gastro Microbial Genome Map) as a functional reference map for the construction of a single-cell transcriptomic atlas of the rumen microbiome. The atlas includes 174,531 microbial cells and 2,534 species, of which 172 are core active species grouped into 12 functional clusters. We detected single-cell-level functional roles, including a key role for Basfia succiniciproducens in the carbohydrate metabolic niche of the rumen microbiome. Furthermore, we explored functional heterogeneity and reveal metabolic niche trajectories driven by biofilm formation pathway genes within B. succiniciproducens. Our results provide a resource for studying the rumen microbiome and illustrate the diverse functions of individual microbial cells that drive their ecological niche stability or adaptation within the ecosystem.}, }
@article {pmid38866876, year = {2024}, author = {Yu, D and Stothard, P and Neumann, NF}, title = {Emergence of potentially disinfection-resistant, naturalized Escherichia coli populations across food- and water-associated engineered environments.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {13478}, pmid = {38866876}, issn = {2045-2322}, mesh = {*Escherichia coli/genetics ; *Wastewater/microbiology ; *Phylogeny ; Disinfection/methods ; Water Microbiology ; Food Microbiology ; Multilocus Sequence Typing ; Biofilms/growth & development ; }, abstract = {The Escherichia coli species is comprised of several 'ecotypes' inhabiting a wide range of host and natural environmental niches. Recent studies have suggested that novel naturalized ecotypes have emerged across wastewater treatment plants and meat processing facilities. Phylogenetic and multilocus sequence typing analyses clustered naturalized wastewater and meat plant E. coli strains into two main monophyletic clusters corresponding to the ST635 and ST399 sequence types, with several serotypes identified by serotyping, potentially representing distinct lineages that have naturalized across wastewater treatment plants and meat processing facilities. This evidence, taken alongside ecotype prediction analyses that distinguished the naturalized strains from their host-associated counterparts, suggests these strains may collectively represent a novel ecotype that has recently emerged across food- and water-associated engineered environments. Interestingly, pan-genomic analyses revealed that the naturalized strains exhibited an abundance of biofilm formation, defense, and disinfection-related stress resistance genes, but lacked various virulence and colonization genes, indicating that their naturalization has come at the cost of fitness in the original host environment.}, }
@article {pmid38864630, year = {2024}, author = {Deschner, D and Voordouw, MJ and Fernando, C and Campbell, J and Waldner, CL and Hill, JE}, title = {Identification of genetic markers of resistance to macrolide class antibiotics in Mannheimia haemolytica isolates from a Saskatchewan feedlot.}, journal = {Applied and environmental microbiology}, volume = {}, number = {}, pages = {e0050224}, doi = {10.1128/aem.00502-24}, pmid = {38864630}, issn = {1098-5336}, abstract = {Mannheimia haemolytica is a major contributor to bovine respiratory disease (BRD), which causes substantial economic losses to the beef industry, and there is an urgent need for rapid and accurate diagnostic tests to provide evidence for treatment decisions and support antimicrobial stewardship. Diagnostic sequencing can provide information about antimicrobial resistance genes in M. haemolytica more rapidly than conventional diagnostics. Realizing the full potential of diagnostic sequencing requires a comprehensive understanding of the genetic markers of antimicrobial resistance. We identified genetic markers of resistance in M. haemolytica to macrolide class antibiotics commonly used for control of BRD. Genome sequences were determined for 99 M. haemolytica isolates with six different susceptibility phenotypes collected over 2 years from a feedlot in Saskatchewan, Canada. Known macrolide resistance genes estT, msr(E), and mph(E) were identified in most resistant isolates within predicted integrative and conjugative elements (ICEs). ICE sequences lacking antibiotic resistance genes were detected in 10 of 47 susceptible isolates. No resistance-associated polymorphisms were detected in ribosomal RNA genes, although previously unreported mutations in the L22 and L23 ribosomal proteins were identified in 12 and 27 resistant isolates, respectively. Pangenome analysis led to the identification of 79 genes associated with resistance to gamithromycin, of which 95% (75 of 79) had no functional annotation. Most of the observed phenotypic resistance was explained by previously identified antibiotic resistance genes, although resistance to the macrolides gamithromycin and tulathromycin was not explained in 39 of 47 isolates, demonstrating the need for continued surveillance for novel determinants of macrolide resistance.IMPORTANCEBovine respiratory disease is the costliest disease of beef cattle in North America and the most common reason for injectable antibiotic use in beef cattle. Metagenomic sequencing offers the potential to make economically significant reductions in turnaround time for diagnostic information for evidence-based selection of antibiotics for use in the feedlot. The success of diagnostic sequencing depends on a comprehensive catalog of antimicrobial resistance genes and other genome features associated with reduced susceptibility. We analyzed the genome sequences of isolates of Mannheimia haemolytica, a major bovine respiratory disease pathogen, and identified both previously known and novel genes associated with reduced susceptibility to macrolide class antimicrobials. These findings reinforce the need for ongoing surveillance for markers of antimicrobial resistance to support improved diagnostics and antimicrobial stewardship.}, }
@article {pmid38859586, year = {2024}, author = {Li, Q and Qiao, X and Li, L and Gu, C and Yin, H and Qi, K and Xie, Z and Yang, S and Zhao, Q and Wang, Z and Yang, Y and Pan, J and Li, H and Wang, J and Wang, C and Rieseberg, LH and Zhang, S and Tao, S}, title = {Haplotype-resolved T2T genome assemblies and pangenome graph of pear reveal diverse patterns of allele-specific expression and genomic basis of fruit quality traits.}, journal = {Plant communications}, volume = {}, number = {}, pages = {101000}, doi = {10.1016/j.xplc.2024.101000}, pmid = {38859586}, issn = {2590-3462}, abstract = {Hybrid crops often exhibit increased yield and greater resilience, yet the genomic mechanism(s) underlying hybrid vigor or heterosis remain unclear, hindering our ability to predict the expression of phenotypic traits in hybrid breeding. Here, we generated haplotype-resolved T2T genome assemblies of two pear hybrid varieties 'Yuluxiangli' (YLX) and 'Hongxiangsu' (HXS) that share the same maternal parent, but differ in their paternal parents. We then used these assemblies to explore genome-scale landscape of allele-specific expression and create a pangenome graph for pear. Allele specific expression (ASE) was observed for close to 6000 genes in both hybrid cultivars. A subset of ASEGs related to fruit quality including sugar, organic acid and cuticular wax were identified, suggesting their important contributions to heterosis. Specifically, Ma1, a gene regulating fruit acidity, was absent in the paternal haplotypes of HXS and YLX. Further, a pangenome graph was built based on our assemblies and eight published pear genomes. Resequencing data for 139 cultivated pear genotypes (including 97 genotypes sequenced here) were subsequently aligned to the pangenome graph, revealing numerous SV hotspots and selective sweeps during pear diversification. As predicted, the Ma1 allele was found to be absent in varieties with low organic acid content, an association that was functionally validated by Ma1 over-expression in pear fruit and calli. Overall, the results unraveled contributions of allele-specific expression to heterosis involving fruit quality and provided a robust pangenome reference for high resolution allele discovery and association mapping.}, }
@article {pmid38855144, year = {2024}, author = {Roy, A and Swetha, RG and Basu, S and Biswas, R and Ramaiah, S and Anbarasu, A}, title = {Integrating pan-genome and reverse vaccinology to design multi-epitope vaccine against Herpes simplex virus type-1.}, journal = {3 Biotech}, volume = {14}, number = {7}, pages = {176}, pmid = {38855144}, issn = {2190-572X}, abstract = {UNLABELLED: Herpes simplex virus type-1 (HSV-1), the etiological agent of sporadic encephalitis and recurring oral (sometimes genital) infections in humans, affects millions each year. The evolving viral genome reduces susceptibility to existing antivirals and, thus, necessitates new therapeutic strategies. Immunoinformatics strategies have shown promise in designing novel vaccine candidates in the absence of a clinically licensed vaccine to prevent HSV-1. However, to encourage clinical translation, the HSV-1 pan-genome was integrated with the reverse-vaccinology pipeline for rigorous screening of universal vaccine candidates. Viral targets were screened from 104 available complete genomes. Among 364 proteins, envelope glycoprotein D being an outer membrane protein with a high antigenicity score (> 0.4) and solubility (> 0.6) was selected for epitope screening. A total of 17 T-cell and 4 B-cell epitopes with highly antigenic, immunogenic, non-toxic properties and high global population coverage were identified. Furthermore, 8 vaccine constructs were designed using different combinations of epitopes and suitable linkers. VC-8 was identified as the most potential vaccine candidate regarding chemical and structural stability. Molecular docking revealed high interactive affinity (low binding energy: - 56.25 kcal/mol) of VC-8 with the target elicited by firm intermolecular H-bonds, salt-bridges, and hydrophobic interactions, which was validated with simulations. Compatibility of the vaccine candidate to be expressed in pET-29(a) + plasmid was established by in silico cloning studies. Immune simulations confirmed the potential of VC-8 to trigger robust B-cell, T-cell, cytokine, and antibody-mediated responses, thereby suggesting a promising candidate for the future of HSV-1 prevention.
SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s13205-024-04022-6.}, }
@article {pmid38854079, year = {2024}, author = {Depuydt, L and Renders, L and Van de Vyver, S and Veys, L and Gagie, T and Fostier, J}, title = {b-move: faster bidirectional character extensions in a run-length compressed index.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.05.30.596587}, pmid = {38854079}, abstract = {UNLABELLED: Due to the increasing availability of high-quality genome sequences, pan-genomes are gradually replacing single consensus reference genomes in many bioinformatics pipelines to better capture genetic diversity. Traditional bioinformatics tools using the FM-index face memory limitations with such large genome collections. Recent advancements in run-length compressed indices like Gagie et al.'s r-index and Nishimoto and Tabei's move structure, alleviate memory constraints but focus primarily on backward search for MEM-finding. Arakawa et al.'s br-index initiates complete approximate pattern matching using bidirectional search in run-length compressed space, but with significant computational overhead due to complex memory access patterns. We introduce b-move, a novel bidirectional extension of the move structure, enabling fast, cache-efficient bidirectional character extensions in run-length compressed space. It achieves bidirectional character extensions up to 8 times faster than the br-index, closing the performance gap with FM-index-based alternatives, while maintaining the br-index's favorable memory characteristics. For example, all available complete E. coli genomes on NCBI's RefSeq collection can be compiled into a b-move index that fits into the RAM of a typical laptop. Thus, b-move proves practical and scalable for pan-genome indexing and querying. We provide a C++ implementation of b-move, supporting efficient lossless approximate pattern matching including locate functionality, available at https://github.com/biointec/b-move under the AGPL-3.0 license.
FUNDING: Lore Depuydt : PhD Fellowship FR (1117322N), Research Foundation - Flanders (FWO) Luca Renders : PhD Fellowship SB (1SE7822N), Research Foundation - Flanders (FWO) Travis Gagie : NSERC Discovery Grant RGPIN-07185-2020 to Travis Gagie and NIH grant R01HG011392 to Ben Langmead.}, }
@article {pmid38853857, year = {2024}, author = {Park, A and Koslicki, D}, title = {Prokrustean Graph: A substring index supporting rapid enumeration across a range of k-mer sizes.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.11.21.568151}, pmid = {38853857}, abstract = {UNLABELLED: Despite the widespread adoption of k -mer-based methods in bioinformatics, a fundamental question persists: How can we quantify the influence of k sizes in applications? With no universal answer available, choosing an optimal k size or employing multiple k sizes remains application-specific, arbitrary, and computationally expensive. The assessment of the primary parameter k is typically empirical, based on the end products of applications which pass complex processes of genome analysis, comparison, assembly, alignment, and error correction. The elusiveness of the problem stems from a limited understanding of the transitions of k -mers with respect to k sizes. Indeed, there is considerable room for improving both practice and theory by exploring k -mer-specific quantities across multiple k sizes. This paper introduces an algorithmic framework built upon a novel substring representation: the Pro k rustean graph. The primary functionality of this framework is to extract various k -mer-based quantities across a range of k sizes, but its computational complexity depends only on maximal repeats, not on the k range. For example, counting maximal unitigs of de Bruijn graphs for k = 10, …, 100 takes just a few seconds with a Pro k rustean graph built on a read set of gigabases in size. This efficiency sets the graph apart from other substring indices, such as the FM-index, which are normally optimized for string pattern searching rather than for depicting the substring structure across varying lengths. However, the Pro k rustean graph is expected to close this gap, as it can be built using the extended Burrows-Wheeler Transform (eBWT) in a space-efficient manner. The framework is particularly useful in pangenome and metagenome analyses, where the demand for precise multi- k approaches is increasing due to the complex and diverse nature of the information being managed. We introduce four applications implemented with the framework that extract key quantities actively utilized in modern pangenomics and metagenomics. Code implementing our data structure and algorithms (along with correctness tests) is available at https://github.com/KoslickiLab/prokrustean .
ACM SUBJECT CLASSIFICATION: 2012Applied computing → Computational biology.
DIGITAL OBJECT IDENTIFIER: 10.4230/LIPIcs.WABI.2024.YY.
SUPPLEMENTARY MATERIAL: https://github.com/KoslickiLab/prokrustean.}, }
@article {pmid38850255, year = {2024}, author = {Tkalec, KI and Hayes, AJ and Lim, KS and Lewis, JM and Davies, MR and Scott, NE}, title = {Glycan-Tailored Glycoproteomic Analysis Reveals Serine is the Sole Residue Subjected to O-Linked Glycosylation in Acinetobacter baumannii.}, journal = {Journal of proteome research}, volume = {}, number = {}, pages = {}, doi = {10.1021/acs.jproteome.4c00148}, pmid = {38850255}, issn = {1535-3907}, abstract = {Protein glycosylation is a ubiquitous process observed across all domains of life. Within the human pathogen Acinetobacter baumannii, O-linked glycosylation is required for virulence; however, the targets and conservation of glycosylation events remain poorly defined. In this work, we expand our understanding of the breadth and site specificity of glycosylation within A. baumannii by demonstrating the value of strain specific glycan electron-transfer/higher-energy collision dissociation (EThcD) triggering for bacterial glycoproteomics. By coupling tailored EThcD-triggering regimes to complementary glycopeptide enrichment approaches, we assessed the observable glycoproteome of three A. baumannii strains (ATCC19606, BAL062, and D1279779). Combining glycopeptide enrichment techniques including ion mobility (FAIMS), metal oxide affinity chromatography (titanium dioxide), and hydrophilic interaction liquid chromatography (ZIC-HILIC), as well as the use of multiple proteases (trypsin, GluC, pepsin, and thermolysis), we expand the known A. baumannii glycoproteome to 33 unique glycoproteins containing 42 glycosylation sites. We demonstrate that serine is the sole residue subjected to glycosylation with the substitution of serine for threonine abolishing glycosylation in model glycoproteins. An A. baumannii pan-genome built from 576 reference genomes identified that serine glycosylation sites are highly conserved. Combined this work expands our knowledge of the conservation and site specificity of A. baumannii O-linked glycosylation.}, }
@article {pmid38843609, year = {2024}, author = {Bouzid, N and Bugada, M and Pissaloux, D and Burillon, C and Tirode, F and Barbier, J and de la Fouchardière, A and Kielwasser, G}, title = {An orbital perivascular epithelioid cell tumor (PEComa) in a 9-year-old boy: Case report and review of the literature.}, journal = {Journal francais d'ophtalmologie}, volume = {47}, number = {7}, pages = {104215}, doi = {10.1016/j.jfo.2024.104215}, pmid = {38843609}, issn = {1773-0597}, abstract = {Perivascular epithelioid cell tumors (PEComas) are a family of benign neoplasms characterized by smooth muscle and melanocytic differentiation. Orbital cases are rare. A 9-year-old male presented with a slowly growing orbital mass. Magnetic resonance imaging (MRI) revealed a well-defined orbital mass without intracranial extension. The microscopic appearance of the complete resection specimen showed large nests of epithelioid cells with wide cytoplasm containing melanin pigment and round to oval nuclei with mild cytonuclear atypia and low mitotic activity. Immunohistochemistry was positive for HMB45 and negative for melanA, smooth muscle actin, desmin and S-100 protein. Pangenomic RNA-sequencing identified an in-frame NONO-TFE3 rearrangement, and clustering data showed that the tumor's gene expression profile was grouped with other previously studied PEComas. A diagnosis of orbital pigmented PEComa with uncertain malignant potential associated with a NONO-TFE3 rearrangement was made. There was no recurrence after 1 year of follow-up.}, }
@article {pmid38842312, year = {2024}, author = {Klepa, MS and diCenzo, GC and Hungria, M}, title = {Comparative genomic analysis of Bradyrhizobium strains with natural variability in the efficiency of nitrogen fixation, competitiveness, and adaptation to stressful edaphoclimatic conditions.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0026024}, doi = {10.1128/spectrum.00260-24}, pmid = {38842312}, issn = {2165-0497}, abstract = {Bradyrhizobium is known for fixing atmospheric nitrogen in symbiosis with agronomically important crops. This study focused on two groups of strains, each containing eight natural variants of the parental strains, Bradyrhizobium japonicum SEMIA 586 (=CNPSo 17) or Bradyrhizobium diazoefficiens SEMIA 566 (=CNPSo 10). CNPSo 17 and CNPSo 10 were used as commercial inoculants for soybean crops in Brazil at the beginning of the crop expansion in the southern region in the 1960s-1970s. Variants derived from these parental strains were obtained in the late 1980s through a strain selection program aimed at identifying elite strains adapted to a new cropping frontier in the central-western Cerrado region, with a higher capacity of biological nitrogen fixation (BNF) and competitiveness. Here, we aimed to detect genetic variations possibly related to BNF, competitiveness for nodule occupancy, and adaptation to the stressful conditions of the Brazilian Cerrado soils. High-quality genome assemblies were produced for all strains. The core genome phylogeny revealed that strains of each group are closely related, as confirmed by high average nucleotide identity values. However, variants accumulated divergences resulting from horizontal gene transfer, genomic rearrangements, and nucleotide polymorphisms. The B. japonicum group presented a larger pangenome and a higher number of nucleotide polymorphisms than the B. diazoefficiens group, possibly due to its longer adaptation time to the Cerrado soil. Interestingly, five strains of the B. japonicum group carry two plasmids. The genetic variability found in both groups is discussed considering the observed differences in their BNF capacity, competitiveness for nodule occupancy, and environmental adaptation.IMPORTANCEToday, Brazil is a global leader in the study and use of biological nitrogen fixation with soybean crops. As Brazilian soils are naturally void of soybean-compatible bradyrhizobia, strain selection programs were established, starting with foreign isolates. Selection searched for adaptation to the local edaphoclimatic conditions, higher efficiency of nitrogen fixation, and strong competitiveness for nodule occupancy. We analyzed the genomes of two parental strains of Bradyrhizobium japonicum and Bradyrhizobium diazoefficiens and eight variant strains derived from each parental strain. We detected two plasmids in five strains and several genetic differences that might be related to adaptation to the stressful conditions of the soils of the Brazilian Cerrado biome. We also detected genetic variations in specific regions that may impact symbiotic nitrogen fixation. Our analysis contributes to new insights into the evolution of Bradyrhizobium, and some of the identified differences may be applied as genetic markers to assist strain selection programs.}, }
@article {pmid38840572, year = {2024}, author = {Cui, Y and Lin, Y and Wei, H and Pan, Y and He, H and Qian, H and Yang, L and Cao, X and Zhang, Z and Zeng, X and Wang, T and He, W and Liu, X and Shi, C and Yuan, Q and Yu, X and Chen, L and Wang, F and Zhu, Y and Qian, Q and Shang, L}, title = {Identification of salt tolerance-associated presence-absence variations in the OsMADS56 gene through the integration of DEGs dataset and eQTL analysis.}, journal = {The New phytologist}, volume = {}, number = {}, pages = {}, doi = {10.1111/nph.19887}, pmid = {38840572}, issn = {1469-8137}, support = {2023ZD04076//STI2030-Major Projects/ ; 2021R1027005//The Special Project for Public Welfare Research Institute of Fujian Province/ ; 32188102//The National Natural Science Foundation of China/ ; 32301882//The National Natural Science Foundation of China/ ; Y2023QC36//Innovation Program of Chinese Academy of Agricultural Sciences, Youth Innovation of Chinese Academy of Agricultural Sciences/ ; }, }
@article {pmid38839365, year = {2024}, author = {Mukhopadhyay, S and Singh, M and Ghosh, MM and Chakrabarti, S and Ganguli, S}, title = {Comparative Genomics and Characterization of Shigella flexneri Isolated from Urban Wastewater.}, journal = {Microbes and environments}, volume = {39}, number = {2}, pages = {}, doi = {10.1264/jsme2.ME23105}, pmid = {38839365}, issn = {1347-4405}, mesh = {*Shigella flexneri/genetics/isolation & purification/classification/drug effects ; *Genome, Bacterial/genetics ; *Genomics ; *Wastewater/microbiology ; *Anti-Bacterial Agents/pharmacology ; Phylogeny ; Whole Genome Sequencing ; Drug Resistance, Multiple, Bacterial/genetics ; Virulence/genetics ; }, abstract = {Shigella species are a group of highly transmissible Gram-negative pathogens. Increasing reports of infection with extensively drug-resistant varieties of this stomach bug has convinced the World Health Organization to prioritize Shigella for novel therapeutic interventions. We herein coupled the whole-genome sequencing of a natural isolate of Shigella flexneri with a pangenome ana-lysis to characterize pathogen genomics within this species, which will provide us with an insight into its existing genomic diversity and highlight the root causes behind the emergence of quick vaccine escape variants. The isolated novel strain of S. flexneri contained ~4,500 protein-coding genes, 57 of which imparted resistance to antibiotics. A comparative pan-genomic ana-lysis revealed genomic variability of ~64%, the shared conservation of core genes in central metabolic processes, and the enrichment of unique/accessory genes in virulence and defense mechanisms that contributed to much of the observed antimicrobial resistance (AMR). A pathway ana-lysis of the core genome mapped 22 genes to 2 antimicrobial resistance pathways, with the bulk coding for multidrug efflux pumps and two component regulatory systems that are considered to work synergistically towards the development of resistance phenotypes. The prospective evolvability of Shigella species as witnessed by the marked difference in genomic content, the strain-specific essentiality of unique/accessory genes, and the inclusion of a potent resistance mechanism within the core genome, strengthens the possibility of novel serotypes emerging in the near future and emphasizes the importance of tracking down genomic diversity in drug/vaccine design and AMR governance.}, }
@article {pmid38836744, year = {2024}, author = {Islam, MM and Kolling, GL and Glass, EM and Goldberg, JB and Papin, JA}, title = {Model-driven characterization of functional diversity of Pseudomonas aeruginosa clinical isolates with broadly representative phenotypes.}, journal = {Microbial genomics}, volume = {10}, number = {6}, pages = {}, doi = {10.1099/mgen.0.001259}, pmid = {38836744}, issn = {2057-5858}, mesh = {*Pseudomonas aeruginosa/genetics/isolation & purification ; Humans ; *Phenotype ; *Pseudomonas Infections/microbiology ; *Genotype ; *Metabolic Networks and Pathways/genetics ; Whole Genome Sequencing/methods ; Multilocus Sequence Typing ; Genome, Bacterial ; Genetic Variation ; }, abstract = {Pseudomonas aeruginosa is a leading cause of infections in immunocompromised individuals and in healthcare settings. This study aims to understand the relationships between phenotypic diversity and the functional metabolic landscape of P. aeruginosa clinical isolates. To better understand the metabolic repertoire of P. aeruginosa in infection, we deeply profiled a representative set from a library of 971 clinical P. aeruginosa isolates with corresponding patient metadata and bacterial phenotypes. The genotypic clustering based on whole-genome sequencing of the isolates, multilocus sequence types, and the phenotypic clustering generated from a multi-parametric analysis were compared to each other to assess the genotype-phenotype correlation. Genome-scale metabolic network reconstructions were developed for each isolate through amendments to an existing PA14 network reconstruction. These network reconstructions show diverse metabolic functionalities and enhance the collective P. aeruginosa pangenome metabolic repertoire. Characterizing this rich set of clinical P. aeruginosa isolates allows for a deeper understanding of the genotypic and metabolic diversity of the pathogen in a clinical setting and lays a foundation for further investigation of the metabolic landscape of this pathogen and host-associated metabolic differences during infection.}, }
@article {pmid38836702, year = {2024}, author = {Xue, Z and Zhou, A and Zhu, X and Li, L and Zhu, H and Jin, X and Wang, J}, title = {NIPT-PG: empowering non-invasive prenatal testing to learn from population genomics through an incremental pan-genomic approach.}, journal = {Briefings in bioinformatics}, volume = {25}, number = {4}, pages = {}, doi = {10.1093/bib/bbae266}, pmid = {38836702}, issn = {1477-4054}, support = {72293581//National Natural Science Foundation of China/ ; }, mesh = {Humans ; Female ; Pregnancy ; *Noninvasive Prenatal Testing/methods ; *Aneuploidy ; Algorithms ; Genomics/methods ; Prenatal Diagnosis/methods ; Sequence Analysis, DNA/methods ; }, abstract = {Non-invasive prenatal testing (NIPT) is a quite popular approach for detecting fetal genomic aneuploidies. However, due to the limitations on sequencing read length and coverage, NIPT suffers a bottleneck on further improving performance and conducting earlier detection. The errors mainly come from reference biases and population polymorphism. To break this bottleneck, we proposed NIPT-PG, which enables the NIPT algorithm to learn from population data. A pan-genome model is introduced to incorporate variant and polymorphic loci information from tested population. Subsequently, we proposed a sequence-to-graph alignment method, which considers the read mis-match rates during the mapping process, and an indexing method using hash indexing and adjacency lists to accelerate the read alignment process. Finally, by integrating multi-source aligned read and polymorphic sites across the pan-genome, NIPT-PG obtains a more accurate z-score, thereby improving the accuracy of chromosomal aneuploidy detection. We tested NIPT-PG on two simulated datasets and 745 real-world cell-free DNA sequencing data sets from pregnant women. Results demonstrate that NIPT-PG outperforms the standard z-score test. Furthermore, combining experimental and theoretical analyses, we demonstrate the probably approximately correct learnability of NIPT-PG. In summary, NIPT-PG provides a new perspective for fetal chromosomal aneuploidies detection. NIPT-PG may have broad applications in clinical testing, and its detection results can serve as a reference for false positive samples approaching the critical threshold.}, }
@article {pmid38834921, year = {2024}, author = {Juby, S and Soumya, P and Jayachandran, K and Radhakrishnan, EK}, title = {Morphological, Metabolomic and Genomic Evidences on Drought Stress Protective Functioning of the Endophyte Bacillus safensis Ni7.}, journal = {Current microbiology}, volume = {81}, number = {7}, pages = {209}, pmid = {38834921}, issn = {1432-0991}, mesh = {*Endophytes/genetics/metabolism/physiology ; *Bacillus/genetics/metabolism/physiology ; *Droughts ; *Stress, Physiological ; *Biofilms/growth & development ; *Genome, Bacterial ; Metabolomics ; Whole Genome Sequencing ; Genomics ; Base Composition ; Capsicum/microbiology ; }, abstract = {The metabolomic and genomic characterization of an endophytic Bacillus safensis Ni7 was carried out in this study. This strain has previously been isolated from the xerophytic plant Nerium indicum L. and reported to enhance the drought tolerance in Capsicum annuum L. seedlings. The effects of drought stress on the morphology, biofilm production, and metabolite production of B. safensis Ni7 are analyzed in the current study. From the results obtained, the organism was found to have multiple strategies such as aggregation and clumping, robust biofilm production, and increased production of surfactin homologues under the drought induced condition when compared to non-stressed condition. Further the whole genome sequencing (WGS) based analysis has demonstrated B. safensis Ni7 to have a genome size of 3,671,999 bp, N50 value of 3,527,239, and a mean G+C content of 41.58%. Interestingly the organism was observed to have the presence of various stress-responsive genes (13, 20U, 16U,160, 39, 17M, 18, 26, and ctc) and genes responsible for surfactin production (srfAA, srfAB, srfAC, and srfAD), biofilm production (epsD, epsE, epsF, epsG, epsH, epsI, epsK, epsL, epsM, epsN, and pel), chemotaxis (cheB_1, cheB_2, cheB_3, cheW_1, cheW_2 cheR, cheD, cheC, cheA, cheY, cheV, and cheB_4), flagella synthesis (flgG_1, flgG_2, flgG_3, flgC, and flgB) as supportive to the drought tolerance. Besides these, the genes responsible for plant growth promotion (PGP), including the genes for nitrogen (nasA, nasB, nasC, nasD, and nasE) and sulfur assimilation (cysL_1&L_2, cysI) and genes for phosphate solubilization (phoA, phoP_1& phoP_2, and phoR) could also be predicted. Along with the same, the genes for catalase, superoxide dismutase, protein homeostasis, cellular fitness, osmoprotectants production, and protein folding could also be predicted from its WGS data. Further pan-genome analysis with plant associated B. safensis strains available in the public databases revealed B. safensis Ni7 to have the presence of a total of 5391 gene clusters. Among these, 3207 genes were identified as core genes, 954 as shell genes and 1230 as cloud genes. This variation in gene content could be taken as an indication of evolution of strains of Bacillus safensis as per specific conditions and hence in the case of B. safensis Ni7 its role in habitat adaptation of plant is well expected. This diversity in endophytic bacterial genes may attribute its role to support the plant system to cope up with stress conditions. Overall, the study provides genomic evidence on Bacillus safensis Ni7 as a stress alleviating microbial partner in plants.}, }
@article {pmid38832465, year = {2024}, author = {Shrestha, AMS and Gonzales, MEM and Ong, PCL and Larmande, P and Lee, HS and Jeung, JU and Kohli, A and Chebotarov, D and Mauleon, RP and Lee, JS and McNally, KL}, title = {RicePilaf: a post-GWAS/QTL dashboard to integrate pangenomic, coexpression, regulatory, epigenomic, ontology, pathway, and text-mining information to provide functional insights into rice QTLs and GWAS loci.}, journal = {GigaScience}, volume = {13}, number = {}, pages = {}, doi = {10.1093/gigascience/giae013}, pmid = {38832465}, issn = {2047-217X}, support = {PJ016405//Rural Development Administration/ ; }, mesh = {*Oryza/genetics ; *Quantitative Trait Loci ; *Genome-Wide Association Study ; *Data Mining ; Software ; Epigenomics/methods ; Computational Biology/methods ; Polymorphism, Single Nucleotide ; Genomics/methods ; Genome, Plant ; Chromosome Mapping ; Databases, Genetic ; }, abstract = {BACKGROUND: As the number of genome-wide association study (GWAS) and quantitative trait locus (QTL) mappings in rice continues to grow, so does the already long list of genomic loci associated with important agronomic traits. Typically, loci implicated by GWAS/QTL analysis contain tens to hundreds to thousands of single-nucleotide polmorphisms (SNPs)/genes, not all of which are causal and many of which are in noncoding regions. Unraveling the biological mechanisms that tie the GWAS regions and QTLs to the trait of interest is challenging, especially since it requires collating functional genomics information about the loci from multiple, disparate data sources.
RESULTS: We present RicePilaf, a web app for post-GWAS/QTL analysis, that performs a slew of novel bioinformatics analyses to cross-reference GWAS results and QTL mappings with a host of publicly available rice databases. In particular, it integrates (i) pangenomic information from high-quality genome builds of multiple rice varieties, (ii) coexpression information from genome-scale coexpression networks, (iii) ontology and pathway information, (iv) regulatory information from rice transcription factor databases, (v) epigenomic information from multiple high-throughput epigenetic experiments, and (vi) text-mining information extracted from scientific abstracts linking genes and traits. We demonstrate the utility of RicePilaf by applying it to analyze GWAS peaks of preharvest sprouting and genes underlying yield-under-drought QTLs.
CONCLUSIONS: RicePilaf enables rice scientists and breeders to shed functional light on their GWAS regions and QTLs, and it provides them with a means to prioritize SNPs/genes for further experiments. The source code, a Docker image, and a demo version of RicePilaf are publicly available at https://github.com/bioinfodlsu/rice-pilaf.}, }
@article {pmid38826299, year = {2024}, author = {Hwang, S and Brown, NK and Ahmed, OY and Jenike, KM and Kovaka, S and Schatz, MC and Langmead, B}, title = {MEM-based pangenome indexing for k -mer queries.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.05.20.595044}, pmid = {38826299}, abstract = {Pangenomes are growing in number and size, thanks to the prevalence of high-quality long-read assemblies. However, current methods for studying sequence composition and conservation within pangenomes have limitations. Methods based on graph pangenomes require a computationally expensive multiple-alignment step, which can leave out some variation. Indexes based on k -mers and de Bruijn graphs are limited to answering questions at a specific substring length k . We present Maximal Exact Match Ordered (MEMO), a pangenome indexing method based on maximal exact matches (MEMs) between sequences. A single MEMO index can handle arbitrary-length queries over pangenomic windows. MEMO enables both queries that test k -mer presence/absence (membership queries) and that count the number of genomes containing k -mers in a window (conservation queries). MEMO's index for a pangenome of 89 human autosomal haplotypes fits in 2.04 GB, 8.8 × smaller than a comparable KMC3 index and 11.4 × smaller than a PanKmer index. MEMO indexes can be made smaller by sacrificing some counting resolution, with our decile-resolution HPRC index reaching 0.67 GB. MEMO can conduct a conservation query for 31-mers over the human leukocyte antigen locus in 13.89 seconds, 2.5x faster than other approaches. MEMO's small index size, lack of k -mer length dependence, and efficient queries make it a flexible tool for studying and visualizing substring conservation in pangenomes.}, }
@article {pmid38826277, year = {2024}, author = {Barcia-Cruz, R and Balboa, S and Lema, A and Romalde, JL}, title = {Comparative genomics of Vibrio toranzoniae strains.}, journal = {Research square}, volume = {}, number = {}, pages = {}, doi = {10.21203/rs.3.rs-4360386/v1}, pmid = {38826277}, abstract = {Vibrio toranzoniae is a marine bacterium belonging to the Splendidus clade, originally isolated from healthy clams in Galicia (NW Spain). Its isolation from different hosts and seawater indicated two lifestyles and wide geographical distribution. The aim of the present study was to determine the differences at genome level among strains, as well as to determine their phylogeny. For this purpose, whole genomes were sequenced by different technologies and the resulting sequences corrected. Genomes were annotated and compared with different online tools. Furthermore, the study of core and pan genome was examined, and the phylogeny was inferred. The content of the core genome ranged from 2,953 to 2,766 genes and that of the pangenome from 6,278 to 6,132, depending on the tool used. The comparison revealed that although the strains shared certain homology, with DDH values ranging from 77.10 to 82.30 and values of OrthoANI higher than 97%,notable differences were found related to motility, capsule synthesis, iron acquisition system or mobile genetic elements. The phylogenetic analysis of the core genome did not reveal a differentiation of the strains according to their lifestyle, but that of the pangenome pointed out certain geographical isolation in the same growing area. The study led to a reclassification of some isolates formerly described as V. toranzoniae and manifested the importance of cured deposited sequences to proper phylogenetic assignment.}, }
@article {pmid38825856, year = {2024}, author = {Qiu, L and Chirman, D and Clark, JR and Xing, Y and Hernandez Santos, H and Vaughan, EE and Maresso, AW}, title = {Vaccines against extraintestinal pathogenic Escherichia coli (ExPEC): progress and challenges.}, journal = {Gut microbes}, volume = {16}, number = {1}, pages = {2359691}, doi = {10.1080/19490976.2024.2359691}, pmid = {38825856}, issn = {1949-0984}, mesh = {Humans ; *Escherichia coli Infections/microbiology/prevention & control ; *Escherichia coli Vaccines/immunology ; *Extraintestinal Pathogenic Escherichia coli/genetics ; Animals ; }, abstract = {The emergence of antimicrobial resistance (AMR) is a principal global health crisis projected to cause 10 million deaths annually worldwide by 2050. While the Gram-negative bacteria Escherichia coli is commonly found as a commensal microbe in the human gut, some strains are dangerously pathogenic, contributing to the highest AMR-associated mortality. Strains of E. coli that can translocate from the gastrointestinal tract to distal sites, called extraintestinal E. coli (ExPEC), are particularly problematic and predominantly afflict women, the elderly, and immunocompromised populations. Despite nearly 40 years of clinical trials, there is still no vaccine against ExPEC. One reason for this is the remarkable diversity in the ExPEC pangenome across pathotypes, clades, and strains, with hundreds of genes associated with pathogenesis including toxins, adhesins, and nutrient acquisition systems. Further, ExPEC is intimately associated with human mucosal surfaces and has evolved creative strategies to avoid the immune system. This review summarizes previous and ongoing preclinical and clinical ExPEC vaccine research efforts to help identify key gaps in knowledge and remaining challenges.}, }
@article {pmid38825220, year = {2024}, author = {Ramos, B and Cunha, MV}, title = {The mobilome of Staphylococcus aureus from wild ungulates reveals epidemiological links at the animal-human interface.}, journal = {Environmental pollution (Barking, Essex : 1987)}, volume = {}, number = {}, pages = {124241}, doi = {10.1016/j.envpol.2024.124241}, pmid = {38825220}, issn = {1873-6424}, abstract = {Staphylococcus aureus thrives at animal-human-environment interfaces. A large-scale work from our group indicated that antimicrobial resistance (AMR) in commensal S. aureus strains from wild ungulates is associated with agricultural land cover and livestock farming, raising the hypothesis that AMR genes in wildlife strains may originate from different hosts, namely via exchange of mobile genetic elements (MGE). In this work, we generate the largest available dataset of S. aureus draft genomes from wild ungulates in Portugal and explore their mobilome, which can determine important traits such as AMR, virulence, and host specificity, to understand MGE exchange. Core genome multi-locus sequence typing based on 98 newly generated draft genomes and 101 publicly available genomes from Portugal demonstrated that the genomic relatedness of S. aureus from wild ungulates assigned to livestock-associated sequence types (ST) is greater compared to wild ungulate isolates assigned to human-associated STs. Screening of host specificity determinants disclosed the unexpected presence in wildlife of the immune evasion cluster encoded in φSa3 prophage, described as a human-specific virulence determinant. Additionally, two plasmids, pAVX and pETB, previously associated with avian species and humans, respectively, and the Tn553 transposon were detected. Both pETB and Tn553 encode penicillin resistance through blaZ. Pangenome analysis of wild ungulate isolates shows a core genome fraction of 2133 genes, with isolates assigned to ST72 and ST3224 being distinguished from the remaining by MGEs, although there is no reported role of these in adaptation to wildlife. AMR related gene clusters found in the shell genome are directly linked to resistance against penicillin, macrolides, fosfomycin, and aminoglycosides, and they represent mobile ARGs. Altogether, our findings support epidemiological interactions of human and non-human hosts at interfaces, with MGE exchange, including AMR determinants, associated with putative indirect movements of S. aureus among human and wildlife hosts that might be bridged by livestock.}, }
@article {pmid38820989, year = {2024}, author = {de Almeida, OGG and Bertozzi, BG and de Oliveira Rocha, L and von Hertwig, AM and Arroyo, DMD and de Martinis, ECP and Nascimento, MS}, title = {Genomic-wide analysis of Salmonella enterica strains isolated from peanuts in Brazil.}, journal = {International journal of food microbiology}, volume = {420}, number = {}, pages = {110767}, doi = {10.1016/j.ijfoodmicro.2024.110767}, pmid = {38820989}, issn = {1879-3460}, abstract = {Peanut-based products have been associated with Salmonella foodborne outbreaks and/or recalls worldwide. The ability of Salmonella to persist for a long time in a low moisture environment can contribute to this kind of contamination. The objective of this study was to analyse the genome of five S. enterica enterica strains isolated from the peanut supply chain in Brazil, as well as to identify genetic determinants for survival under desiccation and validate these findings by phenotypic test of desiccation stress. The strains were in silico serotyped using the platform SeqSero2 as Miami (M2851), Javiana (M2973), Oranienburg (M2976), Muenster (M624), and Glostrup/Chomedey (M7864); with phylogenomic analysis support. Based on Multilocus Sequence Typing (MLST) the strains were assigned to STs 140, 1674, 321, 174, and 2519. In addition, eight pathogenicity islands were found in all the genomes using the SPIFinder 2.0 (SPI-1, SPI-2, SPI-3, SPI-5, SPI-9, SPI-13, SPI-14). The absence of a SPI-4 may indicate a loss of this island in the surveyed genomes. For the pangenomic analysis, 49 S. enterica genomes were input into the Roary pipeline. The majority of the stress related genes were considered as soft-core genes and were located on the chromosome. A desiccation stress phenotypic test was performed in trypticase soy broth (TSB) with four different water activity (aw) values. M2976 and M7864, both isolated from the peanut samples with the lowest aw, showed the highest OD570nm in TSB aw 0.964 and were statistically different (p < 0.05) from the strain isolated from the peanut sample with the highest aw (0.997). In conclusion, genome analyses have revealed signatures of desiccation adaptation in Salmonella strains, but phenotypic analyses suggested the environment influences the adaptive ability of Salmonella to overcome desiccation stress.}, }
@article {pmid38819558, year = {2024}, author = {Carhuaricra-Huaman, D and Setubal, JC}, title = {Step-by-Step Bacterial Genome Comparison.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2802}, number = {}, pages = {107-134}, pmid = {38819558}, issn = {1940-6029}, mesh = {*Genome, Bacterial ; *Genomics/methods ; *Software ; *Phylogeny ; *Computational Biology/methods ; Workflow ; Databases, Genetic ; Molecular Sequence Annotation ; Salmonella typhimurium/genetics ; }, abstract = {Thanks to advancements in genome sequencing and bioinformatics, thousands of bacterial genome sequences are available in public databases. This presents an opportunity to study bacterial diversity in unprecedented detail. This chapter describes a complete bioinformatics workflow for comparative genomics of bacterial genomes, including genome annotation, pangenome reconstruction and visualization, phylogenetic analysis, and identification of sequences of interest such as antimicrobial-resistance genes, virulence factors, and phage sequences. The workflow uses state-of-the-art, open-source tools. The workflow is presented by means of a comparative analysis of Salmonella enterica serovar Typhimurium genomes. The workflow is based on Linux commands and scripts, and result visualization relies on the R environment. The chapter provides a step-by-step protocol that researchers with basic expertise in bioinformatics can easily follow to conduct investigations on their own genome datasets.}, }
@article {pmid38819557, year = {2024}, author = {Schulz, T and Parmigiani, L and Rempel, A and Stoye, J}, title = {Methods for Pangenomic Core Detection.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2802}, number = {}, pages = {73-106}, pmid = {38819557}, issn = {1940-6029}, mesh = {*Software ; *Genomics/methods ; *Algorithms ; *Computational Biology/methods ; Sequence Analysis, DNA/methods ; Humans ; High-Throughput Nucleotide Sequencing/methods ; }, abstract = {Computational pangenomics deals with the joint analysis of all genomic sequences of a species. It has already been successfully applied to various tasks in many research areas. Further advances in DNA sequencing technologies constantly let more and more genomic sequences become available for many species, leading to an increasing attractiveness of pangenomic studies. At the same time, larger datasets also pose new challenges for data structures and algorithms that are needed to handle the data. Efficient methods oftentimes make use of the concept of k-mers.Core detection is a common way of analyzing a pangenome. The pangenome's core is defined as the subset of genomic information shared among all individual members. Classically, it is not only determined on the abstract level of genes but can also be described on the sequence level.In this chapter, we provide an overview of k-mer-based methods in the context of pangenomics studies. We first revisit existing software solutions for k-mer counting and k-mer set representation. Afterward, we describe the usage of two k-mer-based approaches, Pangrowth and Corer, for pangenomic core detection.}, }
@article {pmid38818736, year = {2024}, author = {Kimchi, O and Meir, Y and Wingreen, NS}, title = {Lytic and temperate phage naturally coexist in a dynamic population model.}, journal = {The ISME journal}, volume = {}, number = {}, pages = {}, doi = {10.1093/ismejo/wrae093}, pmid = {38818736}, issn = {1751-7370}, abstract = {When phage infect their bacterial hosts, they may either lyse the cell and generate a burst of new phage, or lysogenize the bacterium, incorporating the phage genome into it. Phage lysis/lysogeny strategies are assumed to be highly optimized, with the optimal tradeoff depending on environmental conditions. However, in nature, phage of radically different lysis/lysogeny strategies coexist in the same environment, preying on the same bacteria. How can phage preying on the same bacteria coexist if one is more optimal than the other? Here, we address this conundrum within a modeling framework, simulating the population dynamics of communities of phage and their lysogens. We find that coexistence between phage of different lysis/lysogeny strategies is a natural outcome of chaotic population dynamics that arise within sufficiently diverse communities, which ensure no phage is able to absolutely dominate its competitors. Our results further suggest a bet-hedging mechanism at the level of the phage pan-genome, wherein obligate lytic (virulent) strains typically outcompete temperate strains, but also more readily fluctuate to extinction within a local community.}, }
@article {pmid38817968, year = {2024}, author = {Zhao, Y and Ding, WJ and Xu, L and Sun, JQ}, title = {A comprehensive comparative genomic analysis revealed that plant growth promoting traits are ubiquitous in strains of Stenotrophomonas.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1395477}, doi = {10.3389/fmicb.2024.1395477}, pmid = {38817968}, issn = {1664-302X}, abstract = {Stenotrophomonas strains, which are often described as plant growth promoting (PGP) bacteria, are ubiquitous in many environments. A total of 213 genomes of strains of Stenotrophomonas were analyzed using comparative genomics to better understand the ecological roles of these bacteria in the environment. The pan-genome of the 213 strains of Stenotrophomonas consists of 27,186 gene families, including 710 core gene families, 11,039 unique genes and 15,437 accessory genes. Nearly all strains of Stenotrophomonas harbor the genes for GH3-family cellulose degradation and GH2- and GH31-family hemicellulose hydrolase, as well as intact glycolysis and tricarboxylic acid cycle pathways. These abilities suggest that the strains of this genus can easily obtain carbon and energy from the environment. The Stenotrophomonas strains can respond to oxidative stress by synthesizing catalase, superoxide dismutase, methionine sulfoxide reductase, and disulfide isomerase, as well as managing their osmotic balance by accumulating potassium and synthesizing compatible solutes, such as betaine, trehalose, glutamate, and proline. Each Stenotrophomonas strain also contains many genes for resistance to antibiotics and heavy metals. These genes that mediate stress tolerance increase the ability of Stenotrophomonas strains to survive in extreme environments. In addition, many functional genes related to attachment and plant colonization, growth promotion and biocontrol were identified. In detail, the genes associated with flagellar assembly, motility, chemotaxis and biofilm formation enable the strains of Stenotrophomonas to effectively colonize host plants. The presence of genes for phosphate-solubilization and siderophore production and the polyamine, indole-3-acetic acid, and cytokinin biosynthetic pathways confer the ability to promote plant growth. These strains can produce antimicrobial compounds, chitinases, lipases and proteases. Each Stenotrophomonas genome contained 1-9 prophages and 17-60 genomic islands, and the genes related to antibiotic and heavy metal resistance and the biosynthesis of polyamines, indole-3-acetic acid, and cytokinin may be acquired by horizontal gene transfer. This study demonstrates that strains of Stenotrophomonas are highly adaptable for different environments and have strong potential for use as plant growth-promoting bacteria.}, }
@article {pmid38818281, year = {2023}, author = {Martínez-Guardiola, C and Brown, NK and Silva-Coira, F and Köppl, D and Gagie, T and Ladra, S}, title = {Augmented Thresholds for MONI.}, journal = {Proceedings. Data Compression Conference}, volume = {2023}, number = {}, pages = {268-277}, doi = {10.1109/dcc55655.2023.00035}, pmid = {38818281}, issn = {2375-0383}, abstract = {MONI (Rossi et al., 2022) can store a pangenomic dataset T in small space and later, given a pattern P, quickly find the maximal exact matches (MEMs) of P with respect to T. In this paper we consider its one-pass version (Boucher et al., 2021), whose query times are dominated in our experiments by longest common extension (LCE) queries. We show how a small modification lets us avoid most of these queries which significantly speeds up MONI in practice while only slightly increasing its size.}, }
@article {pmid38811845, year = {2024}, author = {Khan, AW and Garg, V and Sun, S and Gupta, S and Dudchenko, O and Roorkiwal, M and Chitikineni, A and Bayer, PE and Shi, C and Upadhyaya, HD and Bohra, A and Bharadwaj, C and Mir, RR and Baruch, K and Yang, B and Coyne, CJ and Bansal, KC and Nguyen, HT and Ronen, G and Aiden, EL and Veneklaas, E and Siddique, KHM and Liu, X and Edwards, D and Varshney, RK}, title = {Author Correction: Cicer super-pangenome provides insights into species evolution and agronomic trait loci for crop improvement in chickpea.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, doi = {10.1038/s41588-024-01813-8}, pmid = {38811845}, issn = {1546-1718}, }
@article {pmid38812828, year = {2022}, author = {Oliva, M and Cenzato, D and Rossi, M and Lipták, Z and Gagie, T and Boucher, C}, title = {CSTs for Terabyte-Sized Data.}, journal = {Proceedings. Data Compression Conference}, volume = {2022}, number = {}, pages = {93-102}, pmid = {38812828}, issn = {2375-0383}, abstract = {Generating pangenomic datasets is becoming increasingly common but there are still few tools able to handle them and even fewer accessible to non-specialists. Building compressed suffix trees (CSTs) for pangenomic datasets is still a major challenge but could be enormously beneficial to the community. In this paper, we present a method, which we refer to as RePFP-CST, for building CSTs in a manner that is scalable. To accomplish this, we show how to build a CST directly from VCF files without decompressing them, and to prune from the prefix-free parse (PFP) phrase boundaries whose removal reduces the total size of the dictionary and the parse. We show that these improvements reduce the time and space required for the construction of the CST, and the memory footprint of the finished CST, enabling us to build a CST for a terabyte of DNA for the first time in the literature.}, }
@article {pmid38810119, year = {2024}, author = {Ramsbottom, KA and Prakash, A and Perez-Riverol, Y and Camacho, OM and Sun, Z and Kundu, DJ and Bowler-Barnett, E and Martin, M and Fan, J and Chebotarov, D and McNally, KL and Deutsch, EW and Vizcaíno, JA and Jones, AR}, title = {Meta-Analysis of Rice Phosphoproteomics Data to Understand Variation in Cell Signaling Across the Rice Pan-Genome.}, journal = {Journal of proteome research}, volume = {}, number = {}, pages = {}, doi = {10.1021/acs.jproteome.4c00187}, pmid = {38810119}, issn = {1535-3907}, abstract = {Phosphorylation is the most studied post-translational modification, and has multiple biological functions. In this study, we have reanalyzed publicly available mass spectrometry proteomics data sets enriched for phosphopeptides from Asian rice (Oryza sativa). In total we identified 15,565 phosphosites on serine, threonine, and tyrosine residues on rice proteins. We identified sequence motifs for phosphosites, and link motifs to enrichment of different biological processes, indicating different downstream regulation likely caused by different kinase groups. We cross-referenced phosphosites against the rice 3,000 genomes, to identify single amino acid variations (SAAVs) within or proximal to phosphosites that could cause loss of a site in a given rice variety and clustered the data to identify groups of sites with similar patterns across rice family groups. The data has been loaded into UniProt Knowledge-Base─enabling researchers to visualize sites alongside other data on rice proteins, e.g., structural models from AlphaFold2, PeptideAtlas, and the PRIDE database─enabling visualization of source evidence, including scores and supporting mass spectra.}, }
@article {pmid38808122, year = {2024}, author = {Xie, X and Deng, X and Chen, J and Chen, L and Yuan, J and Chen, H and Wei, C and Liu, X and Qiu, G}, title = {Two new clades recovered at high temperatures provide novel phylogenetic and genomic insights into Candidatus Accumulibacter.}, journal = {ISME communications}, volume = {4}, number = {1}, pages = {ycae049}, pmid = {38808122}, issn = {2730-6151}, abstract = {Candidatus Accumulibacter, a key genus of polyphosphate-accumulating organisms, plays key roles in lab- and full-scale enhanced biological phosphorus removal (EBPR) systems. A total of 10 high-quality Ca. Accumulibacter genomes were recovered from EBPR systems operated at high temperatures, providing significantly updated phylogenetic and genomic insights into the Ca. Accumulibacter lineage. Among these genomes, clade IIF members SCELSE-3, SCELSE-4, and SCELSE-6 represent the to-date known genomes encoding a complete denitrification pathway, suggesting that Ca. Accumulibacter alone could achieve complete denitrification. Clade IIC members SSA1, SCUT-1, SCELCE-2, and SCELSE-8 lack the entire set of denitrifying genes, representing to-date known non-denitrifying Ca. Accumulibacter. A pan-genomic analysis with other Ca. Accumulibacter members suggested that all Ca. Accumulibacter likely has the potential to use dicarboxylic amino acids. Ca. Accumulibacter aalborgensis AALB and Ca. Accumulibacter affinis BAT3C720 seemed to be the only two members capable of using glucose for EBPR. A heat shock protein Hsp20 encoding gene was found exclusively in genomes recovered at high temperatures, which was absent in clades IA, IC, IG, IIA, IIB, IID, IIG, and II-I members. High transcription of this gene in clade IIC members SCUT-2 and SCUT-3 suggested its role in surviving high temperatures for Ca. Accumulibacter. Ambiguous clade identity was observed for newly recovered genomes (SCELSE-9 and SCELSE-10). Five machine learning models were developed using orthogroups as input features. Prediction results suggested that they belong to a new clade (IIK). The phylogeny of Ca. Accumulibacter was re-evaluated based on the laterally derived polyphosphokinase 2 gene, showing improved resolution in differentiating different clades.}, }
@article {pmid38806511, year = {2024}, author = {Medvedev, KE and Zhang, J and Schaeffer, RD and Kinch, LN and Cong, Q and Grishin, NV}, title = {Structure classification of the proteins from Salmonella enterica pangenome revealed novel potential pathogenicity islands.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {12260}, pmid = {38806511}, issn = {2045-2322}, support = {GM147367/GM/NIGMS NIH HHS/United States ; GM127390/GM/NIGMS NIH HHS/United States ; I-2095-20220331//Welch Foundation/ ; I-1505//Welch Foundation/ ; DBI 2224128//National Science Foundation/ ; }, mesh = {*Genomic Islands/genetics ; *Salmonella enterica/genetics/pathogenicity/classification ; *Bacterial Proteins/genetics/chemistry/metabolism ; *Genome, Bacterial ; Humans ; Protein Domains ; }, abstract = {Salmonella enterica is a pathogenic bacterium known for causing severe typhoid fever in humans, making it important to study due to its potential health risks and significant impact on public health. This study provides evolutionary classification of proteins from Salmonella enterica pangenome. We classified 17,238 domains from 13,147 proteins from 79,758 Salmonella enterica strains and studied in detail domains of 272 proteins from 14 characterized Salmonella pathogenicity islands (SPIs). Among SPIs-related proteins, 90 proteins function in the secretion machinery. 41% domains of SPI proteins have no previous sequence annotation. By comparing clinical and environmental isolates, we identified 3682 proteins that are overrepresented in clinical group that we consider as potentially pathogenic. Among domains of potentially pathogenic proteins only 50% domains were annotated by sequence methods previously. Moreover, 36% (1330 out of 3682) of potentially pathogenic proteins cannot be classified into Evolutionary Classification of Protein Domains database (ECOD). Among classified domains of potentially pathogenic proteins the most populated homology groups include helix-turn-helix (HTH), Immunoglobulin-related, and P-loop domains-related. Functional analysis revealed overrepresentation of these protein in biological processes related to viral entry into host cell, antibiotic biosynthesis, DNA metabolism and conformation change, and underrepresentation in translational processes. Analysis of the potentially pathogenic proteins indicates that they form 119 clusters or novel potential pathogenicity islands (NPPIs) within the Salmonella genome, suggesting their potential contribution to the bacterium's virulence. One of the NPPIs revealed significant overrepresentation of potentially pathogenic proteins. Overall, our analysis revealed that identified potentially pathogenic proteins are poorly studied.}, }
@article {pmid38805143, year = {2024}, author = {Tian, S and Jiang, Y and Han, Q and Meng, C and Ji, F and Zhou, B and Ye, M}, title = {Putative Probiotic Ligilactobacillus salivarius Strains Isolated from the Intestines of Meat-Type Pigeon Squabs.}, journal = {Probiotics and antimicrobial proteins}, volume = {}, number = {}, pages = {}, pmid = {38805143}, issn = {1867-1314}, support = {SJCX22_1770//The Postgraduate Research & Practice Innovation Programs of Jiangsu Province (Yangzhou University), Jiangsu Provincial Department of Education/ ; SJCX23_1960//The Postgraduate Research & Practice Innovation Programs of Jiangsu Province (Yangzhou University), Jiangsu Provincial Department of Education/ ; R2110//Jiangsu Key Laboratory of Zoonosis/ ; R2110//Jiangsu Key Laboratory of Zoonosis/ ; R2110//Jiangsu Key Laboratory of Zoonosis/ ; R2110//Jiangsu Key Laboratory of Zoonosis/ ; }, abstract = {This study aims to screen for potential probiotic lactic acid bacteria from the intestines of meat-type pigeon squabs. Ligilactobacillus salivarius YZU37 was identified as the best comprehensive performed strain. Being acid- and bile salt-tolerant, it displayed growth-inhibition activities against Staphylococcus aureus ATCC25923, Escherichia coli ATCC25922, and Salmonella typhimurium SL1344, exhibited sensitivity to 6 commonly used antibiotics, and endowed with good cell surface hydrophobicity, auto-aggregation property, and anti-oxidant activities. Results of in vitro experiments indicated that the bacteriostatic effects of this strain were related to the production of proteinaceous substances that depend on acidic conditions. Whole-genome sequencing of L. salivarius YZU37 was performed to elucidate the genetic basis underlying its probiotic potential. Pangenome analysis of L. salivarius YZU37 and other 212 L. salivarius strains available on NCBI database revealed a pigeon-unique gene coding choloylglycine hydrolase (CGH), which had higher enzyme-substrate binding affinity than that of the common CGH shared by L. salivarius strains of other sources. Annotation of the functional genes in the genome of L. salivarius YZU37 revealed genes involved in responses to acid, bile salt, heat, cold, heavy metal, and oxidative stresses. The whole genome analysis also revealed the absence of virulence and toxin genes and the presence of 65 genes distributed under 4 CAZymes classes, 2 CRISPR-cas regions, and 3 enterolysin A clusters which may confer the acid-dependent antimicrobial potential of L. salivarius YZU37. Altogether, our results highlighted the probiotic potential of L. salivarius YZU37. Further in vivo investigations are required to elucidate its beneficial effects on pigeons.}, }
@article {pmid38802754, year = {2024}, author = {Shaaban, MT and Abdel-Raouf, M and Zayed, M and Emara, MA}, title = {Microbiological and molecular studies on a multidrug-resistant Pseudomonas aeruginosa from a liver transplant patient with urinary tract infection in Egypt.}, journal = {BMC microbiology}, volume = {24}, number = {1}, pages = {184}, pmid = {38802754}, issn = {1471-2180}, abstract = {BACKGROUND: Pseudomonas aeruginosa is an opportunistic pathogen responsible for complicated UTIs and exhibits high antibiotic resistance, leading to increased mortality rates, especially in cases of multidrug-resistant strains. This study aimed to investigate the antibiotic susceptibility patterns and genomic characterization of XDR strains identified in end-stage liver disease patients who underwent liver transplants.
METHODS: In this study, a number of 30 individuals who underwent liver transplants were registered. Ninety urine and 60 wound site swab samples were collected and processed for culturing, identification, and antimicrobial sensitivity. Extensively drug-resistant strain EMARA01 was confirmed through Sanger sequencing and was then processed for whole genome sequencing to characterize the genomic pattern. Sequencing data were processed for de novo assembly using various tools and databases, including genome annotation, serotype identification, virulence factor genes, and antimicrobial resistance gene. Pangenome analysis of randomly selected 147 reference strains and EMAR01 sequenced strain was performed using the Bacterial Pan Genome Analysis (BPGA) software.
RESULTS: Of these total examined samples, nosocomial infection due to P. aeruginosa was detected in twelve patients' samples. AST analysis showed that P. aeruginosa strains exhibit resistance to tobramycin, erythromycin, and gentamicin, followed by piperacillin and ofloxacin, and no strains exhibit resistance to meropenem and imipenem. The CARD database identified 59 AMR genes similar to the EMAR01 strain genome and mostly belong to the family involved in the resistance-nodulation-cell division (RND) antibiotic efflux pump. Five genes; nalC, nalD, MexR, MexA, and MexB, exhibit resistance to 14 classes of antibiotics, while two AMR; CpxR, and OprM, exhibit resistance to 15 classes of drugs. Pangenome analysis revealed that the pan-genome remained open, suggesting the potential for acquiring accessory and unique genes. Notably, the genes predominantly involved in amino acid transport metabolism were identified using the KEGG database.
CONCLUSIONS: This study provides valuable insights into the antimicrobial resistance profile, genetic features, and genomic evolution of P. aeruginosa strains causing UTIs in liver transplant patients. The findings emphasize the significance of comprehending AMR mechanisms and genetic diversity in P. aeruginosa for developing effective treatment strategies and infection control measures.}, }
@article {pmid38802738, year = {2024}, author = {Tian, X and Teo, WFA and Yang, Y and Dong, L and Wong, A and Chen, L and Ahmed, H and Choo, SW and Jakubovics, NS and Tan, GYA}, title = {Genome characterisation and comparative analysis of Schaalia dentiphila sp. nov. and its subspecies, S. dentiphila subsp. denticola subsp. nov., from the human oral cavity.}, journal = {BMC microbiology}, volume = {24}, number = {1}, pages = {185}, pmid = {38802738}, issn = {1471-2180}, abstract = {BACKGROUND: Schaalia species are primarily found among the oral microbiota of humans and other animals. They have been associated with various infections through their involvement in biofilm formation, modulation of host responses, and interaction with other microorganisms. In this study, two strains previously indicated as Actinomyces spp. were found to be novel members of the genus Schaalia based on their whole genome sequences.
RESULTS: Whole-genome sequencing revealed both strains with a genome size of 2.3 Mbp and GC contents of 65.5%. Phylogenetics analysis for taxonomic placement revealed strains NCTC 9931 and C24 as distinct species within the genus Schaalia. Overall genome-relatedness indices including digital DNA-DNA hybridization (dDDH), and average nucleotide/amino acid identity (ANI/AAI) confirmed both strains as distinct species, with values below the species boundary thresholds (dDDH < 70%, and ANI and AAI < 95%) when compared to nearest type strain Schaalia odontolytica NCTC 9935[ T]. Pangenome and orthologous analyses highlighted their differences in gene properties and biological functions compared to existing type strains. Additionally, the identification of genomic islands (GIs) and virulence-associated factors indicated their genetic diversity and potential adaptive capabilities, as well as potential implications for human health. Notably, CRISPR-Cas systems in strain NCTC 9931 underscore its adaptive immune mechanisms compared to strain C24.
CONCLUSIONS: Based on these findings, strain NCTC 9931[T] (= ATCC 17982[T] = DSM 43331[T] = CIP 104728[T] = CCUG 18309[T] = NCTC 14978[T] = CGMCC 1.90328[T]) represents a novel species, for which the name Schaalia dentiphila subsp. dentiphila sp. nov. subsp. nov. is proposed, while strain C24[T] (= NCTC 14980[T] = CGMCC 1.90329[T]) represents a distinct novel subspecies, for which the name Schaalia dentiphila subsp. denticola. subsp. nov. is proposed. This study enriches our understanding of the genomic diversity of Schaalia species and paves the way for further investigations into their roles in oral health.
SIGNIFICANCE: This research reveals two Schaalia strains, NCTC 9931[ T] and C24[T], as novel entities with distinct genomic features. Expanding the taxonomic framework of the genus Schaalia, this study offers a critical resource for probing the metabolic intricacies and resistance patterns of these bacteria. This work stands as a cornerstone for microbial taxonomy, paving the way for significant advances in clinical diagnostics.}, }
@article {pmid38802662, year = {2024}, author = {Ode, H and Matsuda, M and Shigemi, U and Mori, M and Yamamura, Y and Nakata, Y and Okazaki, R and Kubota, M and Setoyama, Y and Imahashi, M and Yokomaku, Y and Iwatani, Y}, title = {Population-based nanopore sequencing of the HIV-1 pangenome to identify drug resistance mutations.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {12099}, pmid = {38802662}, issn = {2045-2322}, support = {JP23K07938//Japan Society for the Promotion of Science/ ; JP22H02882//Japan Society for the Promotion of Science/ ; JP23fk0410050h0402//Japan Agency for Medical Research and Development/ ; JP23fk0410058h0001//Japan Agency for Medical Research and Development/ ; }, abstract = {HIV-1 drug resistance genotypic tests have primarily been performed by Sanger sequencing of gene segments encoding different drug target proteins. Since the number of targets has increased with the addition of a new class of antiretroviral drugs, a simple high-throughput system for assessing nucleotide sequences throughout the HIV-1 genome is required. Here, we developed a new solution using nanopore sequencing of viral pangenomes amplified by PCR. Benchmark tests using HIV-1 molecular clones demonstrated an accuracy of up to 99.9%. In addition, validation tests of our protocol in 106 clinical samples demonstrated high concordance of drug resistance and tropism genotypes (92.5% and 98.1%, respectively) between the nanopore sequencing-based results and archived clinical determinations made based on Sanger sequencing data. These results suggest that our new approach will be a powerful solution for the comprehensive survey of HIV-1 drug resistance mutations in clinical settings.}, }
@article {pmid38800696, year = {2024}, author = {Sourty, B and Basset, L and Fontaine, A and Garcion, E and Rousseau, A}, title = {Chromothripsis is rare in IDH-mutant gliomas compared to IDH-wild-type glioblastomas whereas whole-genome duplication is equally frequent in both tumor types.}, journal = {Neuro-oncology advances}, volume = {6}, number = {1}, pages = {vdae059}, pmid = {38800696}, issn = {2632-2498}, abstract = {BACKGROUND: Adult-type diffuse gliomas comprise IDH (isocitrate dehydrogenase)-mutant astrocytomas, IDH-mutant 1p/19q-codeleted oligodendrogliomas (ODG), and IDH-wild-type glioblastomas (GBM). GBM displays genome instability, which may result from 2 genetic events leading to massive chromosome alterations: Chromothripsis (CT) and whole-genome duplication (WGD). These events are scarcely described in IDH-mutant gliomas. The better prognosis of the latter may be related to their genome stability compared to GBM.
METHODS: Pangenomic profiles of 297 adult diffuse gliomas were analyzed at initial diagnosis using SNP arrays, including 192 GBM and 105 IDH-mutant gliomas (61 astrocytomas and 44 ODG). Tumor ploidy was assessed with Genome Alteration Print and CT events with CTLPScanner and through manual screening. Survival data were compared using the Kaplan-Meier method.
RESULTS: At initial diagnosis, 37 GBM (18.7%) displayed CT versus 5 IDH-mutant gliomas (4.7%; P = .0008), the latter were all high-grade (grade 3 or 4) astrocytomas. WGD was detected at initial diagnosis in 18 GBM (9.3%) and 9 IDH-mutant gliomas (5 astrocytomas and 4 oligodendrogliomas, either low- or high-grade; 8.5%). Neither CT nor WGD was associated with overall survival in GBM or in IDH-mutant gliomas.
CONCLUSIONS: CT is less frequent in IDH-mutant gliomas compared to GBM. The absence of CT in ODG and grade 2 astrocytomas might, in part, explain their genome stability and better prognosis, while CT might underlie aggressive biological behavior in some high-grade astrocytomas. WGD is a rare and early event occurring equally in IDH-mutant gliomas and GBM.}, }
@article {pmid38798703, year = {2024}, author = {Yang, S and Yu, X and Gao, X and Fatima, K and Tahir Ul Qamar, M}, title = {Comparative genomic profiling of transport inhibitor Response1/Auxin signaling F-box (TIR1/AFB) genes in eight Pyrus genomes revealed the intraspecies diversity and stress responsiveness patterns.}, journal = {Frontiers in genetics}, volume = {15}, number = {}, pages = {1393487}, pmid = {38798703}, issn = {1664-8021}, abstract = {In the genomics of plants and the phytoecosystem, Pyrus (pear) is among the most nutritious fruits and contains fiber that has great health benefits to humans. It is mostly cultivated in temperate regions and is one of the most cultivated pome fruits globally. Pears are highly subjected to biotic and abiotic stresses that affect their yield. TIR1/AFB proteins act as auxin co-receptors during the signaling of nuclear auxins and play a primary role in development-related regulatory processes and responses to biotic and abiotic stresses. However, this gene family and its members have not been explored in Pyrus genomes, and understanding these genes will help obtain useful insights into stress tolerance and ultimately help maintain a high yield of pears. This study reports a pangenome-wide investigation of TIR1/AFB genes from eight Pyrus genomes: Cuiguan (Pyrus pyrifolia), Shanxi Duli (P. betulifolia), Zhongai 1 [(P. ussuriensis × communis) × spp.], Nijisseiki (P. pyrifolia), Yunhong No.1 (P. pyrifolia), d'Anjou (P. communis), Bartlett v2.0 (P. communis), and Dangshansuli v.1.1 (P. bretschneideri). These genes were randomly distributed on 17 chromosomes in each genome. Based on phylogenetics, the identified TIR1/AFB genes were divided into six groups. Their gene structure and motif pattern showed the intraspecific structural conservation as well as evolutionary patterns of Pyrus TIR1/AFBs. The expansion of this gene family in Pyrus is mainly caused by segmental duplication; however, a few genes showed tandem duplication. Moreover, positive and negative selection pressure equally directed the gene's duplication process. The GO and PPI analysis showed that Pyrus TIR1/AFB genes are associated with abiotic stress- and development-related signaling pathways. The promoter regions of Pyrus TIR1/AFB genes were enriched in hormone-, light-, development-, and stress-related cis elements. Furthermore, publicly available RNA-seq data analysis showed that DaTIR1/AFBs have varied levels of expression in various tissues and developmental stages, fruit hardening disease conditions, and drought stress conditions. This indicated that DaTIR1/AFB genes might play critical roles in response to biotic and abiotic stresses. The DaTIR1/AFBs have similar protein structures, which show that they are involved in the same function. Hence, this study will broaden our knowledge of the TIR1/AFB gene family in Pyrus, elucidating their contribution to conferring resistance against various environmental stresses, and will also provide valuable insights for future researchers.}, }
@article {pmid38797020, year = {2024}, author = {Arrieta-Gisasola, A and Martínez-Ballesteros, I and Martinez-Malaxetxebarria, I and Garrido, V and Grilló, MJ and Bikandi, J and Laorden, L}, title = {Pan-Genome-Wide Association Study reveals a key role of the salmochelin receptor IroN in the biofilm formation of Salmonella Typhimurium and its monophasic variant 4,[5],12:i:.}, journal = {International journal of food microbiology}, volume = {419}, number = {}, pages = {110753}, doi = {10.1016/j.ijfoodmicro.2024.110753}, pmid = {38797020}, issn = {1879-3460}, abstract = {Salmonella enterica subsp. enterica serovar Typhimurium variant 4,[5],12:i:- (so called S. 4,[5],12:i:-) has rapidly become one of the most prevalent serovars in humans in Europe, with clinical cases associated with foodborne from pork products. The mechanisms, genetic basis and biofilms relevance by which S. 4,[5],12:i:- maintains and spreads its presence in pigs remain unclear. In this study, we examined the genetic basis of biofilm production in 78 strains of S. 4,[5],12:i:- (n = 57) and S. Typhimurium (n = 21), from human gastroenteritis, food products and asymptomatic pigs. The former showed a lower Specific Biofilm Formation index (SBF) and distant phylogenetic clades, suggesting that the ability to form biofilms is not a crucial adaptation for the S. 4,[5],12:i:- emerging success in pigs. However, using a pan-Genome-Wide Association Study (pan-GWAS) we identified genetic determinants of biofilm formation, revealing 167 common orthologous groups and genes associated with the SBF. The analysis of annotated sequences highlighted specific genetic deletions in three chromosomal regions of S. 4,[5],12:i:- correlating with SBF values: i) the complete fimbrial operon stbABCDE widely recognized as the most critical factor involved in Salmonella adherence; ii) the hxlA, hlxB, and pgiA genes, which expression in S. Typhimurium is induced in the tonsils during swine infection, and iii) the entire iroA locus related to the characteristic deletion of the second-phase flagellar genomic region in S. 4,[5],12:i:-. Consequently, we further investigated the role of the iro-genes on biofilm by constructing S. Typhimurium deletion mutants in iroBCDE and iroN. While iroBCDE showed no significant impact, iroN clearly contributed to S. Typhimurium biofilm formation. In conclusion, the pan-GWAS approach allowed us to uncover complex interactions between genetic and phenotypic factors influencing biofilm formation in S. 4,[5],12:i:- and S. Typhimurium.}, }
@article {pmid38796709, year = {2024}, author = {Zhang, Z and Liu, D and Li, B and Wang, W and Zhang, J and Xin, M and Hu, Z and Liu, J and Du, J and Peng, H and Hao, C and Zhang, X and Ni, Z and Sun, Q and Guo, W and Yao, Y}, title = {A k-mer-based pangenome approach for cataloging seed-storage-protein genes in wheat to facilitate genotype-to-phenotype prediction and improvement of end-use quality.}, journal = {Molecular plant}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.molp.2024.05.006}, pmid = {38796709}, issn = {1752-9867}, abstract = {Wheat (Triticum aestivum L.) is a staple food for more than 35% of the world's population, with flour used to make hundreds of baked goods. Superior end-use quality is a major breeding target, however, improving it is especially time-consuming and expensive. Furthermore, genes encoding seed-storage proteins (SSPs) form multi-gene families and are repetitive, with gaps commonplace in several genome assemblies. To overcome these barriers and efficiently identify superior wheat SSP alleles, we developed 'PanSK' (Pan-SSP k-mer) for genotype-to-phenotype prediction based on a SSP-based pangenome resource. PanSK uses 29-mer sequences that represent each SSP gene at the pangenomic level to reveal untapped diversity across landraces and modern cultivars. Genome-wide association studies with k-mer identified 23 SSP genes associated with end-use quality representing novel targets for improvement. We evaluated the effect of rye secalin genes on end-use quality and found that removing ω-secalins from 1BL/1RS wheat translocation lines is associated with enhanced end-use quality. Finally, using machine-learning-based prediction inspired by PanSK, we predict quality phenotypes with high accuracy from genotype alone. This study provides an effective approach for genome design based on SSP genes, enabling breeding of wheat varieties with superior processing capabilities and improved end-use quality.}, }
@article {pmid38792845, year = {2024}, author = {Toth, R and Ilic, AM and Huettel, B and Duduk, B and Kube, M}, title = {Divergence within the Taxon 'Candidatus Phytoplasma asteris' Confirmed by Comparative Genome Analysis of Carrot Strains.}, journal = {Microorganisms}, volume = {12}, number = {5}, pages = {}, doi = {10.3390/microorganisms12051016}, pmid = {38792845}, issn = {2076-2607}, abstract = {Phytoplasmas are linked to diseases in hundreds of economically important crops, including carrots. In carrots, phytoplasmosis is associated with leaf chlorosis and necrosis, coupled with inhibited root system development, ultimately leading to significant economic losses. During a field study conducted in Baden-Württemberg (Germany), two strains of the provisional taxon 'Candidatus Phytoplasma asteris' were identified within a carrot plot. For further analysis, strains M8 and M33 underwent shotgun sequencing, utilising single-molecule-real-time (SMRT) long-read sequencing and sequencing-by-synthesis (SBS) paired-end short-read sequencing techniques. Hybrid assemblies resulted in complete de novo assemblies of two genomes harboring circular chromosomes and two plasmids. Analyses, including average nucleotide identity and sequence comparisons of established marker genes, confirmed the phylogenetic divergence of 'Ca. P. asteris' and a different assignment of strains to the 16S rRNA subgroup I-A for M33 and I-B for M8. These groups exhibited unique features, encompassing virulence factors and genes, associated with the mobilome. In contrast, pan-genome analysis revealed a highly conserved gene set related to metabolism across these strains. This analysis of the Aster Yellows (AY) group reaffirms the perception of phytoplasmas as bacteria that have undergone extensive genome reduction during their co-evolution with the host and an increase of genome size by mobilome.}, }
@article {pmid38792815, year = {2024}, author = {Wang, T and Shi, Y and Zheng, M and Zheng, J}, title = {Comparative Genomics Unveils Functional Diversity, Pangenome Openness, and Underlying Biological Drivers among Bacillus subtilis Group.}, journal = {Microorganisms}, volume = {12}, number = {5}, pages = {}, doi = {10.3390/microorganisms12050986}, pmid = {38792815}, issn = {2076-2607}, support = {31970003//National Natural Science Foundation of China/ ; }, abstract = {The Bacillus subtilis group (Bs group), with Bacillus subtilis as its core species, holds significant research and economic value in various fields, including science, industrial production, food, and pharmaceuticals. However, most studies have been confined to comparative genomics analyses and exploration within individual genomes at the level of species, with few conducted within groups across different species. This study focused on Bacillus subtilis, the model of Gram-positive bacteria, and 14 other species with significant research value, employing comparative pangenomics as well as population enrichment analysis to ascertain the functional enrichment and diversity. Through the quantification of pangenome openness, this work revealed the underlying biological drivers and significant correlation between pangenome openness and various factors, including the distribution of toxin-antitoxin- and integrase-related genes, as well as the number of endonucleases, recombinases, repair system-related genes, prophages, integrases, and transfer mobile elements. Furthermore, the functional enrichment results indicated the potential for secondary metabolite, probiotic, and antibiotic exploration in Bacillus licheniformis, Bacillus paralicheniformis, and Bacillus spizizenii, respectively. In general, this work systematically exposed the quantification of pangenome openness, biological drivers, the pivotal role of genomic instability factors, and mobile elements, providing targeted exploration guidance for the Bs group.}, }
@article {pmid38792744, year = {2024}, author = {Xia, Y and Wang, Z and Hu, Y and Zhao, P and Li, J and Zhang, L and Fang, R and Zhao, J}, title = {Isolation, Identification, Genomic Diversity, and Antimicrobial Resistance Analysis of Streptococcus suis in Hubei Province of China from 2021 to 2023.}, journal = {Microorganisms}, volume = {12}, number = {5}, pages = {}, doi = {10.3390/microorganisms12050917}, pmid = {38792744}, issn = {2076-2607}, support = {2662020DKPY016//Fundamental Research Funds for the Central Universities in China/ ; }, abstract = {Streptococcus suis (S. suis) is a zoonotic pathogen capable of causing severe diseases in humans and pigs, including meningitis, sepsis, polyserositis, arthritis, and endocarditis. This study aimed to investigate the biological characteristics of 19 strains of S. suis isolated from diseased pigs in Hubei Province between 2021 and 2023. Through bioinformatics analysis, we investigated the serotype, MLST, pan-genome characteristics, SNP, AMR, and ICE of the 19 S. suis isolates. Among the 19 S. suis strains, ten serotypes were identified, and serotype 9 was the most prevalent (21.05%). Ten new alleles and nine new sequence types (STs) were discovered, with ST28 and ST243 emerging as the predominant STs. The results of the pan-genomic analysis of S. suis indicate that there are 943 core genes, 2259 shell genes, and 5663 cloud genes. Through SNP evolutionary analysis, we identified a strong genetic similarity between SS31 and the reference genome P1/7. The analysis of antibiotic resistance genes revealed widespread presence of erm(B) and tet(O) genes among 19 strains of S. suis. This association may be linked to the high resistance of S. suis to lincosamides, macrolides, and tetracyclines. Integrative and conjugative elements (ICEs) and integrative and mobilizable elements (IMEs) were identified in 16 strains, with a carriage rate of 84.21%, and resistance genes were identified within the ICE/IME elements of 8 strains. Antimicrobial susceptibility testing revealed that all strains showed sensitivity to vancomycin and lincomycin but resistance to tilmicosin, tiamulin, amoxicillin, and doxycycline. This study contributes to our understanding of the genomic diversity of S. suis in Hubei Province of China, providing essential data for the comprehensive prevention and control of S. suis infections in China.}, }
@article {pmid38792675, year = {2024}, author = {He, X and Yu, Y and Kemperman, R and Jimenez, L and Ahmed Sadiq, F and Zhang, G}, title = {Comparative Genomics Reveals Genetic Diversity and Variation in Metabolic Traits in Fructilactobacillus sanfranciscensis Strains.}, journal = {Microorganisms}, volume = {12}, number = {5}, pages = {}, doi = {10.3390/microorganisms12050845}, pmid = {38792675}, issn = {2076-2607}, support = {32172179//National Natural Science Foundation of China/ ; }, abstract = {Fructilactobacillus sanfranciscensis is a significant and dominant bacterial species of sourdough microbiota from ecological and functional perspectives. Despite the remarkable prevalence of different strains of this species in sourdoughs worldwide, the drivers behind the genetic diversity of this species needed to be clarified. In this research, 14 F. sanfranciscensis strains were isolated from sourdough samples to evaluate the genetic diversity and variation in metabolic traits. These 14 and 31 other strains (obtained from the NCBI database) genomes were compared. The values for genome size and GC content, on average, turned out to 1.31 Mbp and 34.25%, respectively. In 45 F. sanfranciscensis strains, there were 162 core genes and 0 to 51 unique genes present in each strain. The primary functions of core genes were related to nucleotide, lipid transport, and amino acid, as well as carbohydrate metabolism. The size of core genes accounted for 41.18% of the pan-genome size in 14 F. sanfranciscensis strains, i.e., 0.70 Mbp of 1.70 Mbp. There were genetic variations among the 14 strains involved in carbohydrate utilization and antibiotic resistance. Moreover, exopolysaccharides biosynthesis-related genes were annotated, including epsABD, wxz, wzy. The Type IIA & IE CRISPR-Cas systems, pediocin PA-1 and Lacticin_3147_A1 bacteriocins operons were also discovered in F. sanfranciscensis. These findings can help to select desirable F. sanfranciscensis strains to develop standardized starter culture for sourdough fermentation, and expect to provide traditional fermented pasta with a higher quality and nutritional value for the consumers.}, }
@article {pmid38785821, year = {2024}, author = {Frantsuzova, E and Bogun, A and Kopylova, O and Vetrova, A and Solyanikova, I and Streletskii, R and Delegan, Y}, title = {Genomic, Phylogenetic and Physiological Characterization of the PAH-Degrading Strain Gordonia polyisoprenivorans 135.}, journal = {Biology}, volume = {13}, number = {5}, pages = {}, doi = {10.3390/biology13050339}, pmid = {38785821}, issn = {2079-7737}, support = {22-74-10082//Russian Science Foundation/ ; }, abstract = {The strain Gordonia polyisoprenivorans 135 is able to utilize a wide range of aromatic compounds. The aim of this work was to study the features of genetic organization and biotechnological potential of the strain G. polyisoprenivorans 135 as a degrader of aromatic compounds. The study of the genome of the strain 135 and the pangenome of the G. polyisoprenivorans species revealed that some genes, presumably involved in PAH catabolism, are atypical for Gordonia and belong to the pangenome of Actinobacteria. Analyzing the intergenic regions of strain 135 alongside the "panIGRome" of G. polyisoprenivorans showed that some intergenic regions in strain 135 also differ from those located between the same pairs of genes in related strains. The strain G. polyisoprenivorans 135 in our work utilized naphthalene (degradation degree 39.43%) and grew actively on salicylate. At present, this is the only known strain of G. polyisoprenivorans with experimentally confirmed ability to utilize these compounds.}, }
@article {pmid38784818, year = {2024}, author = {He, L and Huang, R and Chen, H and Zhao, L and Zhang, Z}, title = {Discovery and characterization of a novel pathogen Erwinia pyri sp. nov. associated with pear dieback: taxonomic insights and genomic analysis.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1365685}, doi = {10.3389/fmicb.2024.1365685}, pmid = {38784818}, issn = {1664-302X}, abstract = {In 2022, a novel disease similar to pear fire blight was found in a pear orchard in Zhangye City, Gansu Province, China. The disease mainly damages the branches, leaves, and fruits of the plant. To identify the pathogen, tissue isolation and pathogenicity testing (inoculating the potential pathogen on healthy plant tissues) were conducted. Furthermore, a comprehensive analysis encompassing the pathogen's morphological, physiological, and biochemical characteristics and whole-genome sequencing was conducted. The results showed that among the eight isolates, the symptoms on the detached leaves and fruits inoculated with isolate DE2 were identical to those observed in the field. Verifying Koch's postulates confirmed that DE2 was the pathogenic bacterium that causes the disease. Based on a 16S rRNA phylogenetic tree, isolate DE2 belongs to the genus Erwinia. Biolog and API 20E results also indicated that isolate DE2 is an undescribed species of Erwinia. Isolate DE2 was negative for oxidase. Subsequently, the complete genome sequence of isolate DE2 was determined and compared to the complete genome sequences of 29 other Erwinia species based on digital DNA-DNA hybridization (dDDH) and average nucleotide identity (ANI) analyses. The ANI and dDDH values between strain DE2 and Erwinia species were both below the species thresholds (ANI < 95-96%, dDDH<70%), suggesting that isolate DE2 is a new species of Erwinia. We will temporarily name strain DE2 as Erwinia pyri sp. nov. There were 548 predicted virulence factors in the genome of strain DE2, comprising 534 on the chromosome and 5 in the plasmids. The whole genome sequence of strain DE2 has been submitted to the NCBI database (ASM3075845v1) with accession number GCA_030758455.1. The strain DE2 has been preserved at the China Center for Type Culture Collection (CCTCC) under the deposit number CCTCC AB 2024080. This study represents the initial report of a potentially new bacterial species in the genus Erwinia that causes a novel pear dieback disease. The findings provide a valuable strain resource for the study of the genus Erwinia and establish a robust theoretical foundation for the prevention and control of emerging pear dieback diseases.}, }
@article {pmid38784399, year = {2024}, author = {Vilela, FP and Felice, AG and Seribelli, AA and Rodrigues, DP and Soares, SC and Allard, MW and Falcão, JP}, title = {Comparative genomics reveals high genetic similarity among strains of Salmonella enterica serovar Infantis isolated from multiple sources in Brazil.}, journal = {PeerJ}, volume = {12}, number = {}, pages = {e17306}, doi = {10.7717/peerj.17306}, pmid = {38784399}, issn = {2167-8359}, mesh = {Brazil ; *Salmonella enterica/genetics/isolation & purification ; *Genome, Bacterial/genetics ; Humans ; *Genomics ; Animals ; *Multilocus Sequence Typing ; Salmonella Infections/microbiology/epidemiology ; Serogroup ; Food Microbiology ; Phylogeny ; Salmonella Infections, Animal/microbiology/epidemiology ; }, abstract = {BACKGROUND: Salmonella enterica serovar Infantis (Salmonella Infantis) is a zoonotic, ubiquitous and foodborne pathogen of worldwide distribution. Despite Brazil's relevance as a major meat exporter, few studies were conducted to characterize strains of this serovar by genomic analyses in this country. Therefore, this study aimed to assess the diversity of 80 Salmonella Infantis strains isolated from veterinary, food and human sources in Brazil between 2013 and 2018 by comparative genomic analyses. Additional genomes of non-Brazilian countries (n = 18) were included for comparison purposes in some analyses.
METHODS: Analyses of whole-genome multi-locus sequence typing (wgMLST), using PGAdb-builder, and of fragmented genomes, using Gegenees, were conducted to compare the 80 Brazilian strains to the 18 non-Brazilian genomes. Pangenome analyses and calculations were performed for all Salmonella Infantis genomes analyzed. The presence of prophages was determined using PHASTER for the 80 Brazilian strains. The genome plasticity using BLAST Ring Image Generator (BRIG) and gene synteny using Mauve were evaluated for 20 selected Salmonella Infantis genomes from Brazil and ten from non-Brazilian countries. Unique orthologous protein clusters were searched in ten selected Salmonella Infantis genomes from Brazil and ten from non-Brazilian countries.
RESULTS: wgMLST and Gegenees showed a high genomic similarity among some Brazilian Salmonella Infantis genomes, and also the correlation of some clusters with non-Brazilian genomes. Gegenees also showed an overall similarity >91% among all Salmonella Infantis genomes. Pangenome calculations revealed an open pangenome for all Salmonella Infantis subsets analyzed and a high gene content in the core genomes. Fifteen types of prophages were detected among 97.5% of the Brazilian strains. BRIG and Mauve demonstrated a high structural similarity among the Brazilian and non-Brazilian isolates. Unique orthologous protein clusters related to biological processes, molecular functions, and cellular components were detected among Brazilian and non-Brazilian genomes.
CONCLUSION: The results presented using different genomic approaches emphasized the significant genomic similarity among Brazilian Salmonella Infantis genomes analyzed, suggesting wide distribution of closely related genotypes among diverse sources in Brazil. The data generated contributed to novel information regarding the genomic diversity of Brazilian and non-Brazilian Salmonella Infantis in comparison. The different genetically related subtypes of Salmonella Infantis from Brazil can either occur exclusively within the country, or also in other countries, suggesting that some exportation of the Brazilian genotypes may have already occurred.}, }
@article {pmid38783120, year = {2024}, author = {Khan, AW and Garg, V and Sun, S and Gupta, S and Dudchenko, O and Roorkiwal, M and Chitikineni, A and Bayer, PE and Shi, C and Upadhyaya, HD and Bohra, A and Bharadwaj, C and Mir, RR and Baruch, K and Yang, B and Coyne, CJ and Bansal, KC and Nguyen, HT and Ronen, G and Aiden, EL and Veneklaas, E and Siddique, KHM and Liu, X and Edwards, D and Varshney, RK}, title = {Cicer super-pangenome provides insights into species evolution and agronomic trait loci for crop improvement in chickpea.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {38783120}, issn = {1546-1718}, abstract = {Chickpea (Cicer arietinum L.)-an important legume crop cultivated in arid and semiarid regions-has limited genetic diversity. Efforts are being undertaken to broaden its diversity by utilizing its wild relatives, which remain largely unexplored. Here, we present the Cicer super-pangenome based on the de novo genome assemblies of eight annual Cicer wild species. We identified 24,827 gene families, including 14,748 core, 2,958 softcore, 6,212 dispensable and 909 species-specific gene families. The dispensable genome was enriched for genes related to key agronomic traits. Structural variations between cultivated and wild genomes were used to construct a graph-based genome, revealing variations in genes affecting traits such as flowering time, vernalization and disease resistance. These variations will facilitate the transfer of valuable traits from wild Cicer species into elite chickpea varieties through marker-assisted selection or gene-editing. This study offers valuable insights into the genetic diversity and potential avenues for crop improvement in chickpea.}, }
@article {pmid38782931, year = {2024}, author = {Sroithongkham, P and Nittayasut, N and Yindee, J and Nimsamer, P and Payungporn, S and Pinpimai, K and Ponglowhapan, S and Chanchaithong, P}, title = {Multidrug-resistant Escherichia coli causing canine pyometra and urinary tract infections are genetically related but distinct from those causing prostatic abscesses.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {11848}, pmid = {38782931}, issn = {2045-2322}, support = {CU_FRB65_hea (90)_195_31_14//Thailand Science Research and Innovation/ ; N42A660897//National Research Council of Thailand/ ; }, mesh = {Dogs ; Animals ; *Urinary Tract Infections/microbiology/veterinary ; *Drug Resistance, Multiple, Bacterial/genetics ; Male ; *Dog Diseases/microbiology ; Cats ; *Escherichia coli Infections/microbiology/veterinary ; *Pyometra/microbiology/veterinary/genetics ; *Abscess/microbiology/veterinary ; Female ; Cat Diseases/microbiology ; Uropathogenic Escherichia coli/genetics/drug effects/pathogenicity ; Escherichia coli/genetics/pathogenicity/drug effects ; Anti-Bacterial Agents/pharmacology ; Prostatic Diseases/microbiology/veterinary/genetics ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {Despite extensive characterisation of uropathogenic Escherichia coli (UPEC) causing urinary tract infections (UTIs), the genetic background of non-urinary extraintestinal pathogenic E. coli (ExPEC) in companion animals remains inadequately understood. In this study, we characterised virulence traits of 104 E. coli isolated from canine pyometra (n = 61) and prostatic abscesses (PAs) (n = 38), and bloodstream infections (BSIs) in dogs (n = 2), and cats (n = 3). A stronger association with UPEC of pyometra strains in comparison to PA strains was revealed. Notably, 44 isolates exhibited resistance to third-generation cephalosporins and/or fluoroquinolones, 15 were extended-spectrum ß-lactamase-producers. Twelve multidrug-resistant (MDR) strains, isolated from pyometra (n = 4), PAs (n = 5), and BSIs (n = 3), along with 7 previously characterised UPEC strains from dogs and cats, were sequenced. Genomic characteristics revealed that MDR E. coli associated with UTIs, pyometra, and BSIs belonged to international high-risk E. coli clones, including sequence type (ST) 38, ST131, ST617, ST648, and ST1193. However, PA strains belonged to distinct lineages, including ST12, ST44, ST457, ST744, and ST13037. The coreSNPs, cgMLST, and pan-genome illustrated intra-clonal variations within the same ST from different sources. The high-risk ST131 and ST1193 (phylogroup B2) contained high numbers of ExPEC virulence genes on pathogenicity islands, predominating in pyometra and UTI. Hybrid MDR/virulence IncF multi-replicon plasmids, containing aerobactin genes, were commonly found in non-B2 phylogroups from all sources. These findings offer genomic insights into non-urinary ExPEC, highlighting its potential for invasive infections in pets beyond UTIs, particularly with regards to high-risk global clones.}, }
@article {pmid38777691, year = {2024}, author = {Cantu, D and Massonnet, M and Cochetel, N}, title = {The wild side of grape genomics.}, journal = {Trends in genetics : TIG}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.tig.2024.04.014}, pmid = {38777691}, issn = {0168-9525}, abstract = {With broad genetic diversity and as a source of key agronomic traits, wild grape species (Vitis spp.) are crucial to enhance viticulture's climatic resilience and sustainability. This review discusses how recent breakthroughs in the genome assembly and analysis of wild grape species have led to discoveries on grape evolution, from wild species' adaptation to environmental stress to grape domestication. We detail how diploid chromosome-scale genomes from wild Vitis spp. have enabled the identification of candidate disease-resistance and flower sex determination genes and the creation of the first Vitis graph-based pangenome. Finally, we explore how wild grape genomics can impact grape research and viticulture, including aspects such as data sharing, the development of functional genomics tools, and the acceleration of genetic improvement.}, }
@article {pmid38775217, year = {2024}, author = {Messyasz, A and Männistö, MK and Kerkhof, LJ and Häggblom, MM}, title = {Genome analysis and description of Tunturibacter gen. nov. expands the diversity of Terriglobia in tundra soils.}, journal = {Environmental microbiology}, volume = {26}, number = {5}, pages = {e16640}, doi = {10.1111/1462-2920.16640}, pmid = {38775217}, issn = {1462-2920}, support = {Project accession number 1012785//USDA National Institute of Food and Agriculture/ ; Hatch Project NJ01160//USDA National Institute of Food and Agriculture/ ; DEB 2129351//US National Science Foundation/ ; 130507//Academy of Finland/ ; 310776//Academy of Finland/ ; }, mesh = {*Soil Microbiology ; *Phylogeny ; *Genome, Bacterial ; *Tundra ; Acidobacteria/genetics/classification/isolation & purification ; RNA, Ribosomal, 16S/genetics ; Arctic Regions ; }, abstract = {Increased temperatures in Arctic tundra ecosystems are leading to higher microbial respiration rates of soil organic matter, resulting in the release of carbon dioxide and methane. To understand the effects of this microbial activity, it is important to better characterize the diverse microbial communities in Arctic soil. Our goal is to refine our understanding of the phylogenetic diversity of Terriglobia, a common but elusive group within the Acidobacteriota phylum. This will help us link this diversity to variations in carbon and nitrogen usage patterns. We used long-read Oxford Nanopore MinION sequences in combination with metagenomic short-read sequences to assemble complete Acidobacteriota genomes. This allowed us to build multi-locus phylogenies and annotate pangenome markers to distinguish Acidobacteriota strains from several tundra soil isolates. We identified a phylogenetic cluster containing four new species previously associated with Edaphobacter lichenicola. We conclude that this cluster represents a new genus, which we have named Tunturibacter. We describe four new species: Tunturibacter lichenicola comb. nov., Tunturibacter empetritectus sp. nov., Tunturibacter gelidoferens sp. nov., and Tunturibacter psychrotolerans sp. nov. By uncovering new species and strains within the Terriglobia and improving the accuracy of their phylogenetic placements, we hope to enhance our understanding of this complex phylum and shed light on the mechanisms that shape microbial communities in polar soils.}, }
@article {pmid38774514, year = {2024}, author = {Lamoureux, CR and Phaneuf, PV and Palsson, BO and Zielinski, DC}, title = {Escherichia coli non-coding regulatory regions are highly conserved.}, journal = {NAR genomics and bioinformatics}, volume = {6}, number = {2}, pages = {lqae041}, pmid = {38774514}, issn = {2631-9268}, abstract = {Microbial genome sequences are rapidly accumulating, enabling large-scale studies of sequence variation. Existing studies primarily focus on coding regions to study amino acid substitution patterns in proteins. However, non-coding regulatory regions also play a distinct role in determining physiologic responses. To investigate intergenic sequence variation on a large-scale, we identified non-coding regulatory region alleles across 2350 Escherichia coli strains. This 'alleleome' consists of 117 781 unique alleles for 1169 reference regulatory regions (transcribing 1975 genes) at single base-pair resolution. We find that 64% of nucleotide positions are invariant, and variant positions vary in a median of just 0.6% of strains. Additionally, non-coding alleles are sufficient to recover E. coli phylogroups. We find that core promoter elements and transcription factor binding sites are significantly conserved, especially those located upstream of essential or highly-expressed genes. However, variability in conservation of transcription factor binding sites is significant both within and across regulons. Finally, we contrast mutations acquired during adaptive laboratory evolution with wild-type variation, finding that the former preferentially alter positions that the latter conserves. Overall, this analysis elucidates the wealth of information found in E. coli non-coding sequence variation and expands pangenomic studies to non-coding regulatory regions at single-nucleotide resolution.}, }
@article {pmid38769697, year = {2024}, author = {Vaughn, JN and Korani, W and Clevenger, J and Ozias-Akins, P}, title = {Agile Genetics: Single gene resolution without the fuss.}, journal = {BioEssays : news and reviews in molecular, cellular and developmental biology}, volume = {}, number = {}, pages = {e2300206}, doi = {10.1002/bies.202300206}, pmid = {38769697}, issn = {1521-1878}, support = {6066-21310-005-000-D//USDA/ ; 6066-21310-006-000-D//USDA/ ; 2023-78408-39694//AFRI/ ; }, abstract = {Gene discovery reveals new biology, expands the utility of marker-assisted selection, and enables targeted mutagenesis. Still, such discoveries can take over a decade. We present a general strategy, "Agile Genetics," that uses nested, structured populations to overcome common limits on gene resolution. Extensive simulation work on realistic genetic architectures shows that, at population sizes of >5000 samples, single gene-resolution can be achieved using bulk segregant pools. At this scale, read depth and technical replication become major drivers of resolution. Emerging enrichment methods to address coverage are on the horizon; we describe one possibility - iterative depth sequencing (ID-seq). In addition, graph-based pangenomics in experimental populations will continue to maximize accuracy and improve interpretation. Based on this merger of agronomic scale with molecular and bioinformatic innovation, we predict a new age of rapid gene discovery.}, }
@article {pmid38769598, year = {2024}, author = {Miranda-López, DC and Pérez-Rueda, E and Rojas-Vargas, J and Cortez, CH and Saldaña-Padilla, A and Castelán-Sánchez, HG and Castro-Escarpulli, G}, title = {Comprehensive comparative analysis of the periodontal pathogen Porphyromonas gingivalis: exploring the pan-genome, the reconstruction of the gene regulatory network and genome-scale metabolic network.}, journal = {Letters in applied microbiology}, volume = {}, number = {}, pages = {}, doi = {10.1093/lambio/ovae048}, pmid = {38769598}, issn = {1472-765X}, abstract = {Porphyromonas gingivalis is a nonmotile, obligate anaerobic, Gram-negative bacterium known for its association with periodontal disease and its involvement in systemic diseases such as atherosclerosis, cardiovascular disease, colon cancer and Alzheimer's disease. This bacterium produces several virulence factors, including capsules, fimbriae, lipopolysaccharides, proteolytic enzymes and hemagglutinins. A comparative genomic analysis revealed the open pangenome of P. gingivalis and identified complete type IV secretion systems (T4SS) in strain KCOM2805 and almost complete type VI secretion systems (T6SS) in strains KCOM2798 and ATCC49417, which is a new discovery as previous studies did not find the proteins involved in secretion systems IV and VI. Conservation of some virulence factors between different strains was observed, regardless of their genetic diversity and origin. In addition, we performed for the first time a reconstruction analysis of the gene regulatory network (GRN), identifying transcription factors and proteins involved in the regulatory mechanisms of bacterial pathogenesis. In particular, QseB regulates the expression of hemagglutinin and arginine deaminase, while Rex may suppress the release of gingipain through interactions with PorV and the formatum/nitrate transporter. Our study highlights the central role of conserved virulence factors and regulatory pathways, particularly QseB and Rex, in P. gingivalis and provides insights into potential therapeutic targets.}, }
@article {pmid38769436, year = {2024}, author = {Vassallo, CN and Doering, CR and Littlehale, ML and Teodoro, GIC and Laub, MT}, title = {Author Correction: A functional selection reveals previously undetected anti-phage defence systems in the E. coli pangenome.}, journal = {Nature microbiology}, volume = {}, number = {}, pages = {}, doi = {10.1038/s41564-024-01724-8}, pmid = {38769436}, issn = {2058-5276}, }
@article {pmid38764023, year = {2024}, author = {Chang, CM and Chang, WC and Hsieh, SL}, title = {Characterization of the genetic variation and evolutionary divergence of the CLEC18 family.}, journal = {Journal of biomedical science}, volume = {31}, number = {1}, pages = {53}, pmid = {38764023}, issn = {1423-0127}, support = {107-2101-01-18-03//Academia Sinica/ ; AS-TM-108-02-10//Academia Sinica/ ; AS-BRPT-110-02//Academia Sinica/ ; AS-IA-109-L02//Academia Sinica/ ; 109-2101-01-19-20//Academia Sinica/ ; VTA109-A-3-1//Academia Sinica/ ; MOST107-2321-B-001-015//Ministry of Science and Technology, Taiwan/ ; NSTC112-2320-B-001-027//National Science and Technology Council/ ; NSTC112-2320-B-038-026-MY3//National Science and Technology Council/ ; IM-112-PP-01//National Health Research Institutes/ ; }, mesh = {Humans ; *Lectins, C-Type/genetics/metabolism ; *Genetic Variation ; *Evolution, Molecular ; Animals ; Primates/genetics ; }, abstract = {BACKGROUND: The C-type lectin family 18 (CLEC18) with lipid and glycan binding capabilities is important to metabolic regulation and innate immune responses against viral infection. However, human CLEC18 comprises three paralogous genes with highly similar sequences, making it challenging to distinguish genetic variations, expression patterns, and biological functions of individual CLEC18 paralogs. Additionally, the evolutionary relationship between human CLEC18 and its counterparts in other species remains unclear.
METHODS: To identify the sequence variation and evolutionary divergence of human CLEC18 paralogs, we conducted a comprehensive analysis using various resources, including human and non-human primate reference genome assemblies, human pangenome assemblies, and long-read-based whole-genome and -transcriptome sequencing datasets.
RESULTS: We uncovered paralogous sequence variants (PSVs) and polymorphic variants (PVs) of human CLEC18 proteins, and identified distinct signatures specific to each CLEC18 paralog. Furthermore, we unveiled a novel segmental duplication for human CLEC18A gene. By comparing CLEC18 across human and non-human primates, our research showed that the CLEC18 paralogy probably occurred in the common ancestor of human and closely related non-human primates, and the lipid-binding CAP/SCP/TAPS domain of CLEC18 is more diverse than its glycan-binding CTLD. Moreover, we found that certain amino acids alterations at variant positions are exclusive to human CLEC18 paralogs.
CONCLUSIONS: Our findings offer a comprehensive profiling of the intricate variations and evolutionary characteristics of human CLEC18.}, }
@article {pmid38763984, year = {2024}, author = {Mouren, A and Chansavang, A and Hamzaoui, N and Srikaran, A and Laurent-Puig, P and Marisa, L and De Percin, S and Lupo, A and Larousserie, F and Blons, H and L'Haridon, A and Burnichon, N and Pasmant, E and Tlemsani, C}, title = {A de novo germline pathogenic BRCA1 variant identified following an osteosarcoma pangenomic molecular analysis.}, journal = {Familial cancer}, volume = {}, number = {}, pages = {}, pmid = {38763984}, issn = {1573-7292}, abstract = {De novo germline pathogenic variants (gPV) of the BReast CAncer 1 (BRCA1) gene are very rare. Only a few have been described up to date, usually in patients with a history of ovarian or breast cancer. Here, we report the first case of an incidental de novo BRCA1 germline pathogenic variant which was identified within the framework of the Plan France Médecine Génomique (PFMG) 2025 French national tumor sequencing program. The proband was a 29-year-old man diagnosed with metastatic osteosarcoma. Tumor whole exome sequencing identified a BRCA1 c.3756_3759del p.(Ser1253Argfs*10) pathogenic variant without loss-of-heterozygosity. A low genomic instability score and the absence of single base substitution signatures of homologous recombination deficiency suggested that the BRCA1 variant was not driver in the osteosarcoma tumorigenesis. Germline whole genome sequencing asserted the germline nature of this variant, with a 36% allele frequency, suggesting a mosaicism caused by a post-zygotic mutational event. The proband's family (parents and siblings) were not carriers of this variant confirming the de novo occurrence. Tumor sequencing programs like the French PFMG 2025 have been implemented worldwide and may help identify new gPV, including de novo variants.}, }
@article {pmid38757662, year = {2024}, author = {Wang, Y and Li, P and Zhu, Y and Zhang, F and Zhang, S and He, Y and Wu, Y and Lin, Y and Wang, H and Ren, W and Wang, L and Yang, Y and Wang, R and Zheng, P and Liu, Y and Wang, S and Yue, J}, title = {Graph-Based Pangenome of Actinidia chinensis Reveals Structural Variations Mediating Fruit Degreening.}, journal = {Advanced science (Weinheim, Baden-Wurttemberg, Germany)}, volume = {}, number = {}, pages = {e2400322}, doi = {10.1002/advs.202400322}, pmid = {38757662}, issn = {2198-3844}, support = {U23A20204//National Natural Science Foundation of China/ ; 31972474//National Natural Science Foundation of China/ ; 31471157//National Natural Science Foundation of China/ ; //Wanjiang Scholar" Program (Anhui Province)/ ; 2308085MC69//Anhui Provincial Natural Science Foundation/ ; }, abstract = {Fruit ripening is associated with the degreening process (loss of chlorophyll) that occurs in most fruit species. Kiwifruit is one of the special species whose fruits may maintain green flesh by accumulating a large amount of chlorophyll even after ripening. However, little is known about the genetic variations related to the fruit degreening process. Here, a graph-based kiwifruit pangenome by analyzing 14 chromosome-scale haplotype-resolved genome assemblies from seven representative cultivars or lines in Actinidia chinensis is built. A total of 49,770 non-redundant gene families are identified, with core genes constituting 46.6%, and dispensable genes constituting 53.4%. A total of 84,591 non-redundant structural variations (SVs) are identified. The pangenome graph integrating both reference genome sequences and variant information facilitates the identification of SVs related to fruit color. The SV in the promoter of the AcBCM gene determines its high expression in the late developmental stage of fruits, which causes chlorophyll accumulation in the green-flesh fruits by post-translationally regulating AcSGR2, a key enzyme of chlorophyll catabolism. Taken together, a high-quality pangenome is constructed, unraveled numerous genetic variations, and identified a novel SV mediating fruit coloration and fruit quality, providing valuable information for further investigating genome evolution and domestication, QTL genes function, and genomics-assisted breeding.}, }
@article {pmid38755527, year = {2024}, author = {Le, DQ and Nguyen, SH and Nguyen, TT and Nguyen, CH and Ho, TH and Vo, NS and Nguyen, T and Nguyen, HA and Cao, MD}, title = {AMRViz enables seamless genomics analysis and visualization of antimicrobial resistance.}, journal = {BMC bioinformatics}, volume = {25}, number = {1}, pages = {193}, pmid = {38755527}, issn = {1471-2105}, support = {VINIF.2019.DA11//Vingroup Innovation Foundation (VINIF)/ ; VINIF.2019.DA11//Vingroup Innovation Foundation (VINIF)/ ; VINIF.2019.DA11//Vingroup Innovation Foundation (VINIF)/ ; VINIF.2019.DA11//Vingroup Innovation Foundation (VINIF)/ ; VINIF.2019.DA11//Vingroup Innovation Foundation (VINIF)/ ; VINIF.2019.DA11//Vingroup Innovation Foundation (VINIF)/ ; VINIF.2019.DA11//Vingroup Innovation Foundation (VINIF)/ ; VINIF.2019.DA11//Vingroup Innovation Foundation (VINIF)/ ; }, mesh = {*Genomics/methods ; *Software ; *Genome, Bacterial ; Drug Resistance, Bacterial/genetics ; Phylogeny ; Bacteria/genetics/drug effects ; Anti-Bacterial Agents/pharmacology ; }, abstract = {We have developed AMRViz, a toolkit for analyzing, visualizing, and managing bacterial genomics samples. The toolkit is bundled with the current best practice analysis pipeline allowing researchers to perform comprehensive analysis of a collection of samples directly from raw sequencing data with a single command line. The analysis results in a report showing the genome structure, genome annotations, antibiotic resistance and virulence profile for each sample. The pan-genome of all samples of the collection is analyzed to identify core- and accessory-genes. Phylogenies of the whole genome as well as all gene clusters are also generated. The toolkit provides a web-based visualization dashboard allowing researchers to interactively examine various aspects of the analysis results. Availability: AMRViz is implemented in Python and NodeJS, and is publicly available under open source MIT license at https://github.com/amromics/amrviz .}, }
@article {pmid38753867, year = {2024}, author = {Qureshi, H and Basheer, A and Sajjad, W and Faheem, M and Babar Jamal, S}, title = {An integrated in-silico approach for drug target identification in human pathogen Shigella dysenteriae.}, journal = {PloS one}, volume = {19}, number = {5}, pages = {e0303048}, doi = {10.1371/journal.pone.0303048}, pmid = {38753867}, issn = {1932-6203}, mesh = {*Shigella dysenteriae/drug effects/genetics/pathogenicity ; Humans ; *Anti-Bacterial Agents/pharmacology ; *Molecular Docking Simulation ; *Computer Simulation ; *Dysentery, Bacillary/microbiology/drug therapy ; Genome, Bacterial ; Bacterial Proteins/genetics/metabolism ; Computational Biology/methods ; }, abstract = {Shigella dysenteriae, is a Gram-negative bacterium that emerged as the second most significant cause of bacillary dysentery. Antibiotic treatment is vital in lowering Shigella infection rates, yet the growing global resistance to broad-spectrum antibiotics poses a significant challenge. The persistent multidrug resistance of S. dysenteriae complicates its management and control. Hence, there is an urgent requirement to discover novel therapeutic targets and potent medications to prevent and treat this disease. Therefore, the integration of bioinformatics methods such as subtractive and comparative analysis provides a pathway to compute the pan-genome of S. dysenteriae. In our study, we analysed a dataset comprising 27 whole genomes. The S. dysenteriae strain SD197 was used as the reference for determining the core genome. Initially, our focus was directed towards the identification of the proteome of the core genome. Moreover, several filters were applied to the core genome, including assessments for non-host homology, protein essentiality, and virulence, in order to prioritize potential drug targets. Among these targets were Integration host factor subunit alpha and Tyrosine recombinase XerC. Furthermore, four drug-like compounds showing potential inhibitory effects against both target proteins were identified. Subsequently, molecular docking analysis was conducted involving these targets and the compounds. This initial study provides the list of novel targets against S. dysenteriae. Conclusively, future in vitro investigations could validate our in-silico findings and uncover potential therapeutic drugs for combating bacillary dysentery infection.}, }
@article {pmid38747582, year = {2024}, author = {Sun, R-Y and Fang, L-X and Dai, J-J and Chen, K-C and Ke, B-X and Sun, J and Ke, C-W and Wai Chi Chan, E and Liu, Y-H and Chen, S and Liao, X-P}, title = {Antimicrobial resistance and population genomics of emerging multidrug-resistant Salmonella 4,[5],12:i:- in Guangdong, China.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0116423}, doi = {10.1128/msystems.01164-23}, pmid = {38747582}, issn = {2379-5077}, abstract = {Salmonella 4,[5],12:i:-, a monophasic variant of Salmonella Typhimurium, has emerged as a global cause of multidrug-resistant salmonellosis and has become endemic in many developing and developed countries, especially in China. Here, we have sequenced 352 clinical isolates in Guangdong, China, during 2009-2019 and performed a large-scale collection of Salmonella 4,[5],12:i:- with whole genome sequencing (WGS) data across the globe, to better understand the population structure, antimicrobial resistance (AMR) genomic characterization, and transmission routes of Salmonella 4,[5],12:i:- across Guangdong. Salmonella 4,[5],12:i:- strains showed broad genetic diversity; Guangdong isolates were found to be widely distributed among the global lineages. Of note, we identified the formation of a novel Guangdong clade (Bayesian analysis of population structure lineage 1 [BAPS1]) genetically diversified from the global isolates and likely emerged around 1990s. BAPS1 exhibits unique genomic features, including large pan-genome, decreased ciprofloxacin susceptibility due to mutation in gyrA and carriage of plasmid-mediated quinolone resistance (PMQR) genes, and the multidrug-resistant IncHI2 plasmid. Furthermore, high genetic similarity was found between strains collected from Guangdong, Europe, and North America, indicating the association with multiple introductions from overseas. These results suggested that global dissemination and local clonal expansion simultaneously occurred in Guangdong, China, and horizontally acquired resistance to first-line and last-line antimicrobials at local level, underlying emergences of extensive drug and pan-drug resistance. Our findings have increased the knowledge of global and local epidemics of Salmonella 4,[5],12:i:- in Guangdong, China, and provided a comprehensive baseline data set essential for future molecular surveillance.IMPORTANCESalmonella 4,[5],12:i:- has been regarded as the predominant pandemic serotype causing diarrheal diseases globally, while multidrug resistance (MDR) constitutes great public health concerns. This study provided a detailed and comprehensive genome-scale analysis of this important Salmonella serovar in the past decade in Guangdong, China. Our results revealed the complexity of two distinct transmission modes, namely global transmission and local expansion, circulating in Guangdong over a decade. Using phylogeography models, the origin of Salmonella 4,[5],12:i:- was predicted from two aspects, year and country, that is, Salmonella 4,[5],12:i:- emerged in 1983, and was introduced from the UK, and subsequently differentiated into the local endemic lineage circa 1991. Additionally, based on the pan-genome analysis, it was found that the gene accumulation rate in local endemic BAPS 1 lineage was higher than in other lineages, and the horizontal transmission of MDR IncHI2 plasmid associated with high resistance played a major role, which showed the potential threat to public health.}, }
@article {pmid38747118, year = {2024}, author = {Doukbi, E and Ancel, P and Dutour, A and Soghomonian, A and Ahmed, S and Castejon, V and Piperoglou, C and Gariboldi, V and Lenoir, M and Lechevallier, E and Gondran-Tellier, B and Boissier, R and Ebbo, M and Vély, F and Gaborit, B}, title = {Human epicardial fat has a beige profile and contains higher type 2 innate lymphoid cells than subcutaneous fat.}, journal = {Obesity (Silver Spring, Md.)}, volume = {}, number = {}, pages = {}, doi = {10.1002/oby.24023}, pmid = {38747118}, issn = {1930-739X}, support = {AMX-21-PEP-026//Aix-Marseille University Excellence Initiative-A*MIDEX/ ; AP-RM-22-017//Fondation de l'Avenir/ ; }, abstract = {OBJECTIVE: Epicardial adipose tissue (EAT) is a visceral fat that has been associated with coronary artery disease and atrial fibrillation. Previous work has revealed that EAT exhibits beige features.
METHODS: First, a new pan-genomic microarray analysis was performed on previously collected paired human EAT and thoracic subcutaneous AT (thSAT) from the EPICAR study (n = 31) to decipher a specific immune signature and its link with browning genes. Then, adaptive (T and B cells) and innate lymphoid cell (ILC1, ILC2, and ILC3) immunophenotyping assay panels, including CD127, CD117, and prostaglandin D2 receptor 2, were performed on prospectively collected paired human multiorgan donors (n = 18; INTERFACE study).
RESULTS: In the EPICAR study, a positive correlation between the T helper cell subtype Th2 immune pathway and browning genes was found in EAT versus thSAT (r = 0.82; p < 0.0001). In the INTERFACE study, this correlation was also observed (r = 0.31; p = 0.017), and a preponderance of CD4[+]T cells, CD8[+]T cells, and a few B cells was observed in all ATs (p < 0.0001). An increase in ILCs was observed in visceral AT (VAT) (i.e., EAT + VAT; 30 ± 5 ILCs per gram of AT) compared with subcutaneous counterparts (i.e., thSAT + abdominal SAT; 8 ± 2 ILCs per gram of AT; p = 0.001), with ILC1 being the most frequent (ILC1 > ILC3 > ILC2). Numbers of ILCs per gram of AT correlated with several Th2 or browning genes (IL-13, TNF receptor superfamily member 9 [TNFRSF9], and alkaline phosphatase, biomineralization associated [ALPL]). Interestingly, a specific increase in EAT-ILC2 compared with other ATs was observed, including a significant proportion expressing CD69 and/or CD25 activation markers (97.9% ± 1.2%; p < 0.0001). Finally, more natural killer cells were observed in EAT + VAT than in thSAT + abdominal SAT (p = 0.01). Exclusion of patients with coronary artery disease in the EPICAR and INTERFACE studies did not modify the main findings. Gene expression phenotyping confirmed specific upregulation of Th2 pathway and browning genes (IL-33 and uncoupling protein 1 [UCP-1]) in EAT.
CONCLUSIONS: This is the first study, to our knowledge, to provide a comparison between innate and adaptive lymphoid cells in human EAT. Further studies are ongoing to decipher whether these cells could be involved in EAT beiging.}, }
@article {pmid38743633, year = {2024}, author = {Su, Y and Yang, X and Wang, Y and Li, J and Long, Q and Cao, S and Wang, X and Liu, Z and Huang, S and Chen, Z and Peng, Y and Zhang, F and Xue, H and Cao, X and Zhang, M and Yisilam, G and Chu, Z and Gao, Y and Zhou, Y and Liu, Z and Xiao, H and Tian, X}, title = {Phased Telomere-to-Telomere Reference Genome and Pangenome Reveal an Expansion of Resistance Genes during Apple Domestication.}, journal = {Plant physiology}, volume = {}, number = {}, pages = {}, doi = {10.1093/plphys/kiae258}, pmid = {38743633}, issn = {1532-2548}, abstract = {The cultivated apple (Malus domestica Borkh.) is a cross-pollinated perennial fruit tree of great economic importance. Previous versions of apple reference genomes were unphased, fragmented, and lacked comprehensive insights into the highly heterozygous genome, which impeded genetic studies and breeding programs in apple. In this study, we assembled a haplotype-resolved telomere-to-telomere reference genome for the diploid apple cultivar Golden Delicious. Subsequently, we constructed a pangenome based on twelve assemblies from wild and cultivated apples to investigate different types of resistance gene analogs (RGAs). Our results revealed the dynamics of the gene gain and loss events during apple domestication. Compared with cultivated species, more gene families in wild species were significantly enriched in oxidative phosphorylation, pentose metabolic process, responses to salt, and abscisic acid biosynthesis process. Interestingly, our analyses demonstrated a higher prevalence of RGAs in cultivated apples than their wild relatives, partially attributed to segmental and tandem duplication events in certain RGAs classes. Other types of structural variations, mainly deletions and insertions, have affected the presence and absence of TIR-NB-ARC-LRR (TNL), NB-ARC-LRR (NL), and CC-NB-ARC-LRR (CNL) genes. Additionally, hybridization/introgression from wild species has also contributed to the expansion of resistance genes in domesticated apples. Our haplotype-resolved T2T genome and pangenome provide important resources for genetic studies of apples, emphasizing the need to study the evolutionary mechanisms of resistance genes in apple breeding programs.}, }
@article {pmid38743630, year = {2024}, author = {Dewar, AE and Hao, C and Belcher, LJ and Ghoul, M and West, SA}, title = {Bacterial lifestyle shapes pangenomes.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {121}, number = {21}, pages = {e2320170121}, doi = {10.1073/pnas.2320170121}, pmid = {38743630}, issn = {1091-6490}, support = {834164//EC | ERC | HORIZON EUROPE European Research Council (ERC)/ ; }, mesh = {*Genome, Bacterial ; *Bacteria/genetics/classification ; *Phylogeny ; }, abstract = {Pangenomes vary across bacteria. Some species have fluid pangenomes, with a high proportion of genes varying between individual genomes. Other species have less fluid pangenomes, with different genomes tending to contain the same genes. Two main hypotheses have been suggested to explain this variation: differences in species' bacterial lifestyle and effective population size. However, previous studies have not been able to test between these hypotheses because the different features of lifestyle and effective population size are highly correlated with each other, and phylogenetically conserved, making it hard to disentangle their relative importance. We used phylogeny-based analyses, across 126 bacterial species, to tease apart the causal role of different factors. We found that pangenome fluidity was lower in i) host-associated compared with free-living species and ii) host-associated species that are obligately dependent on a host, live inside cells, and are more pathogenic and less motile. In contrast, we found no support for the competing hypothesis that larger effective population sizes lead to more fluid pangenomes. Effective population size appears to correlate with pangenome variation because it is also driven by bacterial lifestyle, rather than because of a causal relationship.}, }
@article {pmid38741743, year = {2024}, author = {Singh, RP and Sinha, A and Deb, S and Kumari, K}, title = {First report on in-depth genome and comparative genome analysis of a metal-resistant bacterium Acinetobacter pittii S-30, isolated from environmental sample.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1351161}, doi = {10.3389/fmicb.2024.1351161}, pmid = {38741743}, issn = {1664-302X}, abstract = {A newly isolated bacterium Acinetobacter pittii S-30 was recovered from waste-contaminated soil in Ranchi, India. The isolated bacterium belongs to the ESKAPE organisms which represent the major nosocomial pathogens that exhibit high antibiotic resistance. Furthermore, average nucleotide identity (ANI) analysis also showed its closest match (>95%) to other A. pittii genomes. The isolate showed metal-resistant behavior and was able to survive up to 5 mM of ZnSO4. Whole genome sequencing and annotations revealed the occurrence of various genes involved in stress protection, motility, and metabolism of aromatic compounds. Moreover, genome annotation identified the gene clusters involved in secondary metabolite production (biosynthetic gene clusters) such as arylpolyene, acinetobactin like NRP-metallophore, betalactone, and hserlactone-NRPS cluster. The metabolic potential of A. pittii S-30 based on cluster of orthologous, and Kyoto Encyclopedia of Genes and Genomes indicated a high number of genes related to stress protection, metal resistance, and multiple drug-efflux systems etc., which is relatively rare in A. pittii strains. Additionally, the presence of various carbohydrate-active enzymes such as glycoside hydrolases (GHs), glycosyltransferases (GTs), and other genes associated with lignocellulose breakdown suggests that strain S-30 has strong biomass degradation potential. Furthermore, an analysis of genetic diversity and recombination in A. pittii strains was performed to understand the population expansion hypothesis of A. pittii strains. To our knowledge, this is the first report demonstrating the detailed genomic characterization of a heavy metal-resistant bacterium belonging to A. pittii. Therefore, the A. pittii S-30 could be a good candidate for the promotion of plant growth and other biotechnological applications.}, }
@article {pmid38741009, year = {2024}, author = {}, title = {A panoply of pangenomes.}, journal = {Nature ecology & evolution}, volume = {8}, number = {5}, pages = {833}, pmid = {38741009}, issn = {2397-334X}, }
@article {pmid38740859, year = {2024}, author = {Khan, MF and Ali, A and Rehman, HM and Noor Khan, S and Hammad, HM and Waseem, M and Wu, Y and Clark, TG and Jabbar, A}, title = {Exploring optimal drug targets through subtractive proteomics analysis and pangenomic insights for tailored drug design in tuberculosis.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {10904}, pmid = {38740859}, issn = {2045-2322}, mesh = {*Mycobacterium tuberculosis/drug effects/genetics/metabolism ; *Antitubercular Agents/pharmacology ; Humans ; *Drug Design ; *Tuberculosis/drug therapy/microbiology ; *Proteomics/methods ; Genome, Bacterial ; Bacterial Proteins/genetics/metabolism ; Phylogeny ; Molecular Docking Simulation ; Molecular Dynamics Simulation ; Genomics/methods ; }, abstract = {Tuberculosis (TB), caused by Mycobacterium tuberculosis, ranks among the top causes of global human mortality, as reported by the World Health Organization's 2022 TB report. The prevalence of M. tuberculosis strains that are multiple and extensive-drug resistant represents a significant barrier to TB eradication. Fortunately, having many completely sequenced M. tuberculosis genomes available has made it possible to investigate the species pangenome, conduct a pan-phylogenetic investigation, and find potential new drug targets. The 442 complete genome dataset was used to estimate the pangenome of M. tuberculosis. This study involved phylogenomic classification and in-depth analyses. Sequential filters were applied to the conserved core genome containing 2754 proteins. These filters assessed non-human homology, virulence, essentiality, physiochemical properties, and pathway analysis. Through these intensive filtering approaches, promising broad-spectrum therapeutic targets were identified. These targets were docked with FDA-approved compounds readily available on the ZINC database. Selected highly ranked ligands with inhibitory potential include dihydroergotamine and abiraterone acetate. The effectiveness of the ligands has been supported by molecular dynamics simulation of the ligand-protein complexes, instilling optimism that the identified lead compounds may serve as a robust basis for the development of safe and efficient drugs for TB treatment, subject to further lead optimization and subsequent experimental validation.}, }
@article {pmid38739115, year = {2024}, author = {Truong, NHM and Nguyen, Q and Voong, PV and Chau, V and Nguyen, NHT and Nguyen, THM and Vo, PH and Nguyen, LT and Ha, TTP and Nguyen, LPH and Le, PH and Thanh, DP and Nguyen, HD}, title = {Genomic characterization of Aeromonas spp. isolates from striped catfish with motile Aeromonas septicemia and human bloodstream infections in Vietnam.}, journal = {Microbial genomics}, volume = {10}, number = {5}, pages = {}, doi = {10.1099/mgen.0.001248}, pmid = {38739115}, issn = {2057-5858}, mesh = {Animals ; *Catfishes/microbiology ; Vietnam/epidemiology ; *Aeromonas/genetics/isolation & purification/classification/pathogenicity ; *Gram-Negative Bacterial Infections/microbiology/veterinary/epidemiology ; Humans ; *Sepsis/microbiology/veterinary/epidemiology ; *Fish Diseases/microbiology ; Phylogeny ; Genomics ; Genome, Bacterial ; Virulence Factors/genetics ; Anti-Bacterial Agents/pharmacology ; }, abstract = {Aeromonas spp. are commonly found in the aquatic environment and have been responsible for motile Aeromonas septicemia (MAS) in striped catfish, resulting in significant economic loss. These organisms also cause a range of opportunistic infections in humans with compromised immune systems. Here, we conducted a genomic investigation of 87 Aeromonas isolates derived from diseased catfish, healthy catfish and environmental water in catfish farms affected by MAS outbreaks in eight provinces in Mekong Delta (years: 2012-2022), together with 25 isolates from humans with bloodstream infections (years: 2010-2020). Genomics-based typing method precisely delineated Aeromonas species while traditional methods such as aerA PCR and MALDI-TOF were unable identify A. dhakensis. A. dhakensis was found to be more prevalent than A. hydrophila in both diseased catfish and human infections. A. dhakensis sequence type (ST) 656 followed by A. hydrophila ST251 were the predominant virulent species-lineages in diseased catfish (43.7 and 20.7 %, respectively), while diverse STs were found in humans with bloodstream infections. There was evidence of widespread transmission of ST656 and ST251 on striped catfish in the Mekong Delta region. ST656 and ST251 isolates carried a significantly higher number of acquired antimicrobial resistance (AMR) genes and virulence factors in comparison to other STs. They, however, exhibited several distinctions in key virulence factors (i.e. lack of type IV pili and enterotoxin ast in A. dhakensis), AMR genes (i.e. presence of imiH carbapenemase in A. dhakensis), and accessory gene content. To uncover potential conserved proteins of Aeromonas spp. for vaccine development, pangenome analysis has unveiled 2202 core genes between ST656 and ST251, of which 78 proteins were in either outer membrane or extracellular proteins. Our study represents one of the first genomic investigations of the species distribution, genetic landscape, and epidemiology of Aeromonas in diseased catfish and human infections in Vietnam. The emergence of antimicrobial resistant and virulent A. dhakensis strains underscores the needs of enhanced genomic surveillance and strengthening vaccine research and development in preventing Aeromonas diseases in catfish and humans, and the search for potential vaccine candidates could focus on Aeromonas core genes encoded for membrane and secreted proteins.}, }
@article {pmid38737752, year = {2024}, author = {Omidiran, O and Patel, A and Usman, S and Mhatre, I and Abdelhalim, H and DeGroat, W and Narayanan, R and Singh, K and Mendhe, D and Ahmed, Z}, title = {GWAS advancements to investigate disease associations and biological mechanisms.}, journal = {Clinical and translational discovery}, volume = {4}, number = {3}, pages = {}, pmid = {38737752}, issn = {2768-0622}, abstract = {Genome-wide association studies (GWAS) have been instrumental in elucidating the genetic architecture of various traits and diseases. Despite the success of GWAS, inherent limitations such as identifying rare and ultra-rare variants, the potential for spurious associations, and in pinpointing causative agents can undermine diagnostic capabilities. This review provides an overview of GWAS and highlights recent advances in genetics that employ a range of methodologies, including Whole Genome Sequencing (WGS), Mendelian Randomization (MR), the Pangenome's high-quality T2T-CHM13 panel, and the Human BioMolecular Atlas Program (HuBMAP), as potential enablers of current and future GWAS research. State of the literature demonstrate the capabilities of these techniques in enhancing the statistical power of GWAS. WGS, with its comprehensive approach, captures the entire genome, surpassing the capabilities of the traditional GWAS technique focused on predefined Single Nucleotide Polymorphism (SNP) sites. The Pangenome's T2T-CHM13 panel, with its holistic approach, aids in the analysis of regions with high sequence identity, such as segmental duplications (SDs). Mendelian Randomization has advanced causative inference, improving clinical diagnostics and facilitating definitive conclusions. Furthermore, spatial biology techniques like HuBMAP, enable 3D molecular mapping of tissues at single-cell resolution, offering insights into pathology of complex traits. This study aims to elucidate and advocate for the increased application of these technologies, highlighting their potential to shape the future of GWAS research.}, }
@article {pmid38737571, year = {2024}, author = {Calvo-Silveria, S and González-Díaz, A and Grau, I and Marimón, JM and Cercenado, E and Quesada, MD and Casabella, A and Larrosa, N and Yuste, J and Berbel, D and Alonso, M and Tubau, F and Belman, S and Cadenas-Jiménez, I and Martín-Galiano, AJ and Domínguez, MÁ and Martí, S and Liñares, J and Pallarés, R and Càmara, J and Ardanuy, C}, title = {Evolution of invasive pneumococcal disease by serotype 3 in adults: a Spanish three-decade retrospective study.}, journal = {The Lancet regional health. Europe}, volume = {41}, number = {}, pages = {100913}, pmid = {38737571}, issn = {2666-7762}, abstract = {BACKGROUND: Invasive pneumococcal disease due to serotype 3 (S3-IPD) is associated with high mortality rates and long-term adverse effects. The introduction of the 13-valent pneumococcal conjugate vaccine (PCV13) into the Spanish paediatric immunisation programme has not led to a decrease in the adult S3-IPD. We aimed to analyse the incidence, clinical characteristics and genomics of S3-IPD in adults in Spain.
METHODS: Adult IPD episodes hospitalized in a Southern Barcelona hospital were prospectively collected (1994-2020). For genomic comparison, S3-IPD isolates from six Spanish hospitals (2008-2020) and historical isolates (1989-1993) were analysed by WGS (Illumina and/or MinION).
FINDINGS: From 1994 to 2020, 270 S3-IPD episodes were detected. When comparing pre-PCV (1994-2001) and late-PCV13 (2016-2020) periods, only modest changes in S3-IPD were observed (from 1.58 to 1.28 episodes per 100,000 inhabitants year). In this period, the incidence of the two main lineages shifted from 0.38 to 0.67 (CC180-GPSC12) and from 1.18 to 0.55 (CC260-GPSC83). The overall 30-day mortality remained high (24.1%), though a decrease was observed between the pre-PCV (32.4%; 95.0% CI, 22.0-45.0) and the late-PCV13 period (16.7%; 95.0% CI, 7.5-32.0) (p = 0.06). At the same time, comorbidities increased from 77.3% (95.0% CI, 65.0-86.0) to 85.7% (95.0% CI, 71.0-94.0) (p = 0.69). There were no differences in clinical characteristics or 30-day mortality between the two S3 lineages. Although both lineages were genetically homogeneous, the CC180-GPSC12 lineage presented a higher SNP density, a more open pan-genome, and a major presence of prophages and mobile genetic elements carrying resistance genes.
INTERPRETATION: Adult S3-IPD remained stable in our area over the study period despite PCV13 introduction in children. However, a clonal shift was observed. The decrease in mortality rates and the increase in comorbidities suggest a change in clinical management and overall population characteristics. The low genetic variability and absence of clinical differences between lineages highlight the role of the S3 capsule in the disease severity.
FUNDING: This study has been funded by Instituto de Salud Carlos III (ISCIII) "PI18/00339", "PI21/01000", "INT22/00096", "FI22/00279", CIBER "CIBERES-CB06/06/0037", "CIBERINFEC-CB21/13/00009" and MSD grant "IISP 60168".}, }
@article {pmid38734623, year = {2024}, author = {Wang, Q and Zhang, Y and Chen, R and Zhang, L and Fu, M and Zhang, L}, title = {Comparative genomic analyses provide insight into the pathogenicity of three Pseudomonas syringae pv. actinidiae strains from Anhui Province, China.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {461}, pmid = {38734623}, issn = {1471-2164}, support = {32072378//the National Natural Science Foundation of China/ ; 32072378//the National Natural Science Foundation of China/ ; 32072378//the National Natural Science Foundation of China/ ; 32072378//the National Natural Science Foundation of China/ ; 2021023//Innovation Research and Experiment Program for Youth Scholar/ ; 2021023//Innovation Research and Experiment Program for Youth Scholar/ ; 23JCQNJC01040//the Natural Science Foundation of Tianjin/ ; 23JCQNJC01040//the Natural Science Foundation of Tianjin/ ; rc342216//the Development Fund for Talent Personnel of Anhui Agricultural University/ ; rc342216//the Development Fund for Talent Personnel of Anhui Agricultural University/ ; }, mesh = {*Pseudomonas syringae/genetics/pathogenicity ; *Phylogeny ; *Genome, Bacterial ; China ; *Actinidia/microbiology ; Virulence/genetics ; *Plant Diseases/microbiology ; *Genomics ; }, abstract = {BACKGROUND: Pseudomonas syringae pv. actinidiae (Psa) is an important bacterial plant pathogen that causes severe damage to the kiwifruit industry worldwide. Three Psa strains were recently obtained from different kiwifruit orchards in Anhui Province, China. The present study mainly focused on the variations in virulence and genome characteristics of these strains based on the pathogenicity assays and comparative genomic analyses.
RESULTS: Three strains were identified as biovar 3 (Psa3), along with strain QSY6 showing higher virulence than JZY2 and YXH1 in pathogenicity assays. The whole genome assembly revealed that each of the three strains had a circular chromosome and a complete plasmid. The chromosome sizes ranged from 6.5 to 6.6 Mb with a GC content of approximately 58.39 to 58.46%, and a predicted number of protein-coding sequences ranging from 5,884 to 6,019. The three strains clustered tightly with 8 Psa3 reference strains in terms of average nucleotide identity (ANI), whole-genome-based phylogenetic analysis, and pangenome analysis, while they were evolutionarily distinct from other biovars (Psa1 and Psa5). Variations were observed in the repertoire of effectors of the type III secretion system among all 15 strains. Moreover, synteny analysis of the three sequenced strains revealed eight genomic regions containing 308 genes exclusively present in the highly virulent strain QSY6. Further investigation of these genes showed that 16 virulence-related genes highlight several key factors, such as effector delivery systems (type III secretion systems) and adherence (type IV pilus), which might be crucial for the virulence of QSY6.
CONCLUSION: Three Psa strains were identified and showed variant virulence in kiwifruit plant. Complete genome sequences and comparative genomic analyses further provided a theoretical basis for the potential pathogenic factors responsible for kiwifruit bacterial canker.}, }
@article {pmid38732070, year = {2024}, author = {Zhang, J and Liu, Q and Dai, L and Zhang, Z and Wang, Y}, title = {Pan-Genome Analysis of Wolbachia, Endosymbiont of Diaphorina citri, Reveals Independent Origin in Asia and North America.}, journal = {International journal of molecular sciences}, volume = {25}, number = {9}, pages = {}, doi = {10.3390/ijms25094851}, pmid = {38732070}, issn = {1422-0067}, support = {2021YFD1400805//Nation Key R & D Program of China/ ; 31672031//National Natural Science Foundation of China/ ; 32272537//National Natural Science Foundation of China/ ; }, mesh = {*Wolbachia/genetics/classification ; *Symbiosis/genetics ; *Genome, Bacterial ; Animals ; *Phylogeny ; Asia ; North America ; Hemiptera/microbiology/genetics ; Diptera/microbiology/genetics ; Polymorphism, Single Nucleotide ; }, abstract = {Wolbachia, a group of Gram-negative symbiotic bacteria, infects nematodes and a wide range of arthropods. Diaphorina citri Kuwayama, the vector of Candidatus Liberibacter asiaticus (CLas) that causes citrus greening disease, is naturally infected with Wolbachia (wDi). However, the interaction between wDi and D. citri remains poorly understood. In this study, we performed a pan-genome analysis using 65 wDi genomes to gain a comprehensive understanding of wDi. Based on average nucleotide identity (ANI) analysis, we classified the wDi strains into Asia and North America strains. The ANI analysis, principal coordinates analysis (PCoA), and phylogenetic tree analysis supported that the D. citri in Florida did not originate from China. Furthermore, we found that a significant number of core genes were associated with metabolic pathways. Pathways such as thiamine metabolism, type I secretion system, biotin transport, and phospholipid transport were highly conserved across all analyzed wDi genomes. The variation analysis between Asia and North America wDi showed that there were 39,625 single-nucleotide polymorphisms (SNPs), 2153 indels, 10 inversions, 29 translocations, 65 duplications, 10 SV-based insertions, and 4 SV-based deletions. The SV-based insertions and deletions involved genes encoding transposase, phage tail tube protein, ankyrin repeat (ANK) protein, and group II intron-encoded protein. Pan-genome analysis of wDi contributes to our understanding of the geographical population of wDi, the origin of hosts of D. citri, and the interaction between wDi and its host, thus facilitating the development of strategies to control the insects and huanglongbing (HLB).}, }
@article {pmid38729094, year = {2024}, author = {Feng, H and Wu, K and Yuan, Y and Fang, M and Wang, J and Li, R and Zhang, R and Wang, X and Ye, D and Yang, Z}, title = {Genomic analysis of Clostridium perfringens type D isolates from goat farms.}, journal = {Veterinary microbiology}, volume = {294}, number = {}, pages = {110105}, doi = {10.1016/j.vetmic.2024.110105}, pmid = {38729094}, issn = {1873-2542}, abstract = {C. perfringens type D strains are the leading cause of enterotoxaemia in ruminants such as goats, sheep, and cattle. However, there has been no prior research on the genomic characteristics of C. perfringens type D strains from various regions in China. Here, we investigated the antibiotic resistance, genomic characteristics, and phylogenetic relationship of C. perfringens type D isolates recovered from goat farms in Shaanxi, Gansu, and Ningxia provinces. The antibiotic resistance test indicated that the isolates displayed high minimum inhibitory concentration (MIC) values to sulfafurazole, whereas the other antibiotics tested, such as penicillin, enrofloxacin, and florfenicol, worked well on them. Additionally, only tetracycline resistance genes [tetA(P) and tetB(P)] were identified from the isolates. A collective of 13 toxin genes, including etx and cpe were detected among the isolates. Sequence comparison revealed that the etx and cpe genes shared high sequence identities, and they could coexist on a pCW3-like plasmid, representing a potential risk to both animal breeding and public health. Phylogenetic analysis using core genome multi-locus sequence typing (cgMLST) and core genome single nucleotide polymorphisms (SNPs) revealed the close genetic relationship and potential regional/transregional transmission of the C. perfringens type D isolates in Shaanxi and Gansu provinces. Furthermore, pan-genomic analysis suggested the functional differences at the protein-coding gene level, although isolates from the same source shared a close genetic relationship. In conclusion, this study indicated the antibiotic resistance, virulence markers, potential transregional transmission, and genomic diversity of C. perfringens type D strains from various regions in China, which could provide references for the prevention of C. perfringens foodborne diseases and further research.}, }
@article {pmid38723792, year = {2024}, author = {Tang, J and Jiang, Y and Hu, Z and Zhou, H and You, D and Daroch, M}, title = {Genomic and phenotypic characterization of Thermosynechococcus-like strains reveals eight species within the genus Thermosynechococcus and a novel genus Parathermosynechococcus gen. nov.}, journal = {Molecular phylogenetics and evolution}, volume = {}, number = {}, pages = {108094}, doi = {10.1016/j.ympev.2024.108094}, pmid = {38723792}, issn = {1095-9513}, abstract = {Thermophilic unicellular cyanobacteria of the family Thermosynechococcaceae are essential primary producers and integral components of many microbial mats found in hot springs of Asia and North America. Historically, based on their simple morphology, these organisms, along with members of taxonomically unrelated thermophilic Thermostichaceae have been described with a generic term, "Synechococcus", used for elongated unicellular cyanobacteria. This has created significant misperception in the scientific literature regarding the taxonomic status of these essential thermophilic primary producers and their relationship with Synechococcus sensu stricto. In this manuscript, we attempted a genome-driven taxonomic reevaluation of the family Thermosynechococcaceae. Application of genomic analyses such as GTDB classification, ANI/AAI and phylogenomics support the delineation of eight species within genus Thermosynechococcus. Two subspecies were further identified within T. taiwanensis by dDDH and phylogenomics. Moreover, the results also suggest the presence of two putative new genera phylogenetically alongside genus Thermosynechococcus, a thermophilic genus Parathermosynechococcus represented by PCC 6715 and a non-thermophilic genus represented by PCC 6312. The proposed genospecies and new genera were further integrated with morphological and/or ecological information. Interestingly, the phylogeny of 16S-23S ITS achieved a better taxonomic relationship than that of 16S rRNA and supported the genome-based classification of Thermosynechococcus spp. Finally, the pan-genome analysis indicated a conserved pattern of genomic core among known members of Thermosynechococcus.}, }
@article {pmid38721599, year = {2024}, author = {Yang, Z and Chai, Z and Wang, X and Zhang, Z and Zhang, F and Kang, F and Liu, W and Ren, H and Jin, Y and Yue, J}, title = {Comparative genomic analysis provides insights into the genetic diversity and pathogenicity of the genus Brucella.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1389859}, pmid = {38721599}, issn = {1664-302X}, abstract = {Some Brucella spp. are important pathogens. According to the latest prokaryotic taxonomy, the Brucella genus consists of facultative intracellular parasitic Brucella species and extracellular opportunistic or environmental Brucella species. Intracellular Brucella species include classical and nonclassical types, with different species generally exhibiting host preferences. Some classical intracellular Brucella species can cause zoonotic brucellosis, including B. melitensis, B. abortus, B. suis, and B. canis. Extracellular Brucella species comprise opportunistic or environmental species which belonged formerly to the genus Ochrobactrum and thus nowadays renamed as for example Brucella intermedia or Brucella anthropi, which are the most frequent opportunistic human pathogens within the recently expanded genus Brucella. The cause of the diverse phenotypic characteristics of different Brucella species is still unclear. To further investigate the genetic evolutionary characteristics of the Brucella genus and elucidate the relationship between its genomic composition and prediction of phenotypic traits, we collected the genomic data of Brucella from the NCBI Genome database and conducted a comparative genomics study. We found that classical and nonclassical intracellular Brucella species and extracellular Brucella species exhibited differences in phylogenetic relationships, horizontal gene transfer and distribution patterns of mobile genetic elements, virulence factor genes, and antibiotic resistance genes, showing the close relationship between the genetic variations and prediction of phenotypic traits of different Brucella species. Furthermore, we found significant differences in horizontal gene transfer and the distribution patterns of mobile genetic elements, virulence factor genes, and antibiotic resistance genes between the two chromosomes of Brucella, indicating that the two chromosomes had distinct dynamics and plasticity and played different roles in the survival and evolution of Brucella. These findings provide new directions for exploring the genetic evolutionary characteristics of the Brucella genus and could offer new clues to elucidate the factors influencing the phenotypic diversity of the Brucella genus.}, }
@article {pmid38718091, year = {2024}, author = {Rahman, MS and Shimul, MEK and Parvez, MAK}, title = {Comprehensive analysis of genomic variation, pan-genome and biosynthetic potential of Corynebacterium glutamicum strains.}, journal = {PloS one}, volume = {19}, number = {5}, pages = {e0299588}, doi = {10.1371/journal.pone.0299588}, pmid = {38718091}, issn = {1932-6203}, mesh = {*Corynebacterium glutamicum/genetics/metabolism ; *Genome, Bacterial ; *Phylogeny ; *Genetic Variation ; Multigene Family ; Genomics/methods ; }, abstract = {Corynebacterium glutamicum is a non-pathogenic species of the Corynebacteriaceae family. It has been broadly used in industrial biotechnology for the production of valuable products. Though it is widely accepted at the industrial level, knowledge about the genomic diversity of the strains is limited. Here, we investigated the comparative genomic features of the strains and pan-genomic characteristics. We also observed phylogenetic relationships among the strains based on average nucleotide identity (ANI). We found diversity between strains at the genomic and pan-genomic levels. Less than one-third of the C. glutamicum pan-genome consists of core genes and soft-core genes. Whereas, a large number of strain-specific genes covered about half of the total pan-genome. Besides, C. glutamicum pan-genome is open and expanding, which indicates the possible addition of new gene families to the pan-genome. We also investigated the distribution of biosynthetic gene clusters (BGCs) among the strains. We discovered slight variations of BGCs at the strain level. Several BGCs with the potential to express novel bioactive secondary metabolites have been identified. Therefore, by utilizing the characteristic advantages of C. glutamicum, different strains can be potential applicants for natural drug discovery.}, }
@article {pmid38709734, year = {2024}, author = {Yahara, H and Yanamoto, S and Takahashi, M and Hamada, Y and Asaka, T and Kitagawa, Y and Moridera, K and Noguchi, K and Maruoka, Y and Yahara, K}, title = {Shotgun metagenomic analysis of saliva microbiome suggests Mogibacterium as a factor associated with chronic bacterial osteomyelitis.}, journal = {PloS one}, volume = {19}, number = {5}, pages = {e0302569}, doi = {10.1371/journal.pone.0302569}, pmid = {38709734}, issn = {1932-6203}, mesh = {Humans ; *Saliva/microbiology ; *Osteomyelitis/microbiology ; Female ; *Microbiota/genetics ; Male ; Middle Aged ; *Metagenomics/methods ; Chronic Disease ; Adult ; Metagenome ; Aged ; }, abstract = {Osteomyelitis of the jaw is a severe inflammatory disorder that affects bones, and it is categorized into two main types: chronic bacterial and nonbacterial osteomyelitis. Although previous studies have investigated the association between these diseases and the oral microbiome, the specific taxa associated with each disease remain unknown. In this study, we conducted shotgun metagenome sequencing (≥10 Gb from ≥66,395,670 reads per sample) of bulk DNA extracted from saliva obtained from patients with chronic bacterial osteomyelitis (N = 5) and chronic nonbacterial osteomyelitis (N = 10). We then compared the taxonomic composition of the metagenome in terms of both taxonomic and sequence abundances with that of healthy controls (N = 5). Taxonomic profiling revealed a statistically significant increase in both the taxonomic and sequence abundance of Mogibacterium in cases of chronic bacterial osteomyelitis; however, such enrichment was not observed in chronic nonbacterial osteomyelitis. We also compared a previously reported core saliva microbiome (59 genera) with our data and found that out of the 74 genera detected in this study, 47 (including Mogibacterium) were not included in the previous meta-analysis. Additionally, we analyzed a core-genome tree of Mogibacterium from chronic bacterial osteomyelitis and healthy control samples along with a reference complete genome and found that Mogibacterium from both groups was indistinguishable at the core-genome and pan-genome levels. Although limited by the small sample size, our study provides novel evidence of a significant increase in Mogibacterium abundance in the chronic bacterial osteomyelitis group. Moreover, our study presents a comparative analysis of the taxonomic and sequence abundances of all genera detected using deep salivary shotgun metagenome data. The distinct enrichment of Mogibacterium suggests its potential as a marker to distinguish between patients with chronic nonbacterial osteomyelitis and chronic bacterial osteomyelitis, particularly at the early stages when differences are unclear.}, }
@article {pmid38707536, year = {2024}, author = {Do, DT and Yang, MR and Vo, TNS and Le, NQK and Wu, YW}, title = {Unitig-centered pan-genome machine learning approach for predicting antibiotic resistance and discovering novel resistance genes in bacterial strains.}, journal = {Computational and structural biotechnology journal}, volume = {23}, number = {}, pages = {1864-1876}, pmid = {38707536}, issn = {2001-0370}, abstract = {In current genomic research, the widely used methods for predicting antimicrobial resistance (AMR) often rely on prior knowledge of known AMR genes or reference genomes. However, these methods have limitations, potentially resulting in imprecise predictions owing to incomplete coverage of AMR mechanisms and genetic variations. To overcome these limitations, we propose a pan-genome-based machine learning approach to advance our understanding of AMR gene repertoires and uncover possible feature sets for precise AMR classification. By building compacted de Brujin graphs (cDBGs) from thousands of genomes and collecting the presence/absence patterns of unique sequences (unitigs) for Pseudomonas aeruginosa, we determined that using machine learning models on unitig-centered pan-genomes showed significant promise for accurately predicting the antibiotic resistance or susceptibility of microbial strains. Applying a feature-selection-based machine learning algorithm led to satisfactory predictive performance for the training dataset (with an area under the receiver operating characteristic curve (AUC) of > 0.929) and an independent validation dataset (AUC, approximately 0.77). Furthermore, the selected unitigs revealed previously unidentified resistance genes, allowing for the expansion of the resistance gene repertoire to those that have not previously been described in the literature on antibiotic resistance. These results demonstrate that our proposed unitig-based pan-genome feature set was effective in constructing machine learning predictors that could accurately identify AMR pathogens. Gene sets extracted using this approach may offer valuable insights into expanding known AMR genes and forming new hypotheses to uncover the underlying mechanisms of bacterial AMR.}, }
@article {pmid38706000, year = {2024}, author = {Lan, D and Fu, W and Ji, W and Mipam, TD and Xiong, X and Ying, S and Xiong, Y and Sheng, P and Ni, J and Bai, L and Shan, T and Kong, X and Li, J}, title = {Pangenome and multi-tissue gene atlas provide new insights into the domestication and highland adaptation of yaks.}, journal = {Journal of animal science and biotechnology}, volume = {15}, number = {1}, pages = {64}, pmid = {38706000}, issn = {1674-9782}, support = {2021YFD1600200//National Key R&D Program of China/ ; CARS-37//Program of National Beef Cattle and Yak Industrial Technology System/ ; 24NSFSC0581//Natural Science Foundation of Sichuan Province (General Program)/ ; 2024CXTD02//the Scientific and Technological Innovation Team for Qinghai-Tibetan Plateau Research in Southwest Minzu University/ ; }, abstract = {BACKGROUND: The genetic diversity of yak, a key domestic animal on the Qinghai-Tibetan Plateau (QTP), is a vital resource for domestication and breeding efforts. This study presents the first yak pangenome obtained through the de novo assembly of 16 yak genomes.
RESULTS: We discovered 290 Mb of nonreference sequences and 504 new genes. Our pangenome-wide presence and absence variation (PAV) analysis revealed 5,120 PAV-related genes, highlighting a wide range of variety-specific genes and genes with varying frequencies across yak populations. Principal component analysis (PCA) based on binary gene PAV data classified yaks into three new groups: wild, domestic, and Jinchuan. Moreover, we proposed a 'two-haplotype genomic hybridization model' for understanding the hybridization patterns among breeds by integrating gene frequency, heterozygosity, and gene PAV data. A gene PAV-GWAS identified a novel gene (BosGru3G009179) that may be associated with the multirib trait in Jinchuan yaks. Furthermore, an integrated transcriptome and pangenome analysis highlighted the significant differences in the expression of core genes and the mutational burden of differentially expressed genes between yaks from high and low altitudes. Transcriptome analysis across multiple species revealed that yaks have the most unique differentially expressed mRNAs and lncRNAs (between high- and low-altitude regions), especially in the heart and lungs, when comparing high- and low-altitude adaptations.
CONCLUSIONS: The yak pangenome offers a comprehensive resource and new insights for functional genomic studies, supporting future biological research and breeding strategies.}, }
@article {pmid38704417, year = {2024}, author = {Achakkagari, SR and Bozan, I and Camargo-Tavares, JC and McCoy, HJ and Portal, L and Soto, J and Bizimungu, B and Anglin, NL and Manrique-Carpintero, N and Lindqvist-Kreuze, H and Tai, HH and Strömvik, MV}, title = {The phased Solanum okadae genome and Petota pangenome analysis of 23 other potato wild relatives and hybrids.}, journal = {Scientific data}, volume = {11}, number = {1}, pages = {454}, pmid = {38704417}, issn = {2052-4463}, support = {International Collaboration Program: "Genome sequencing of wild Solanum diploids"//Gouvernement du Canada | Agriculture and Agri-Food Canada (Agriculture et Agroalimentaire Canada)/ ; International Collaboration Program: "Genome sequencing of wild Solanum diploids"//Gouvernement du Canada | Agriculture and Agri-Food Canada (Agriculture et Agroalimentaire Canada)/ ; International Collaboration Program: Genome sequencing of wild Solanum diploids//Gouvernement du Canada | Agriculture and Agri-Food Canada (Agriculture et Agroalimentaire Canada)/ ; }, mesh = {*Genome, Plant ; *Solanum/genetics ; *Phylogeny ; Solanum tuberosum/genetics ; Hybridization, Genetic ; }, abstract = {Potato is an important crop in the genus Solanum section Petota. Potatoes are susceptible to multiple abiotic and biotic stresses and have undergone constant improvement through breeding programs worldwide. Introgression of wild relatives from section Petota with potato is used as a strategy to enhance the diversity of potato germplasm. The current dataset contributes a phased genome assembly for diploid S. okadae, and short read sequences and de novo assemblies for the genomes of 16 additional wild diploid species in section Petota that were noted for stress resistance and were of interest to potato breeders. Genome sequence data for three additional genomes representing polyploid hybrids with cultivated potato, and an additional genome from non-tuberizing S. etuberosum, which is outside of section Petota, were also included. High quality short reads assemblies were achieved with genome sizes ranging from 575 to 795 Mbp and annotations were performed utilizing transcriptome sequence data. Genomes were compared for presence/absence of genes and phylogenetic analyses were carried out using plastome and nuclear sequences.}, }
@article {pmid38703968, year = {2024}, author = {Yang, Y and Shao, Y and Pei, C and Liu, Y and Zhang, M and Zhu, X and Li, J and Feng, L and Li, G and Li, K and Liang, Y and Li, Y}, title = {Pangenome analyses of Clostridium butyricum provide insights into its genetic characteristics and industrial application.}, journal = {Genomics}, volume = {}, number = {}, pages = {110855}, doi = {10.1016/j.ygeno.2024.110855}, pmid = {38703968}, issn = {1089-8646}, abstract = {Clostridium butyricum is a Gram-positive anaerobic bacterium known for its ability to produce butyate. In this study, we conducted whole-genome sequencing and assembly of 14C. butyricum industrial strains collected from various parts of China. We performed a pan-genome comparative analysis of the 14 assembled strains and 139 strains downloaded from NCBI. We found that the genes related to critical industrial production pathways were primarily present in the core and soft-core gene categories. The phylogenetic analysis revealed that strains from the same clade of the phylogenetic tree possessed similar antibiotic resistance and virulence factors, with most of these genes present in the shell and cloud gene categories. Finally, we predicted the genes producing bacteriocins and botulinum toxins as well as CRISPR systems responsible for host defense. In conclusion, our research provides a desirable pan-genome database for the industrial production, food application, and genetic research of C. butyricum.}, }
@article {pmid38698240, year = {2024}, author = {Mertz, P and Costedoat-Chalumeau, N and Ferrada, MA and Moulis, G and Mekinian, A and Grayson, PC and Arnaud, L}, title = {Relapsing polychondritis: clinical updates and new differential diagnoses.}, journal = {Nature reviews. Rheumatology}, volume = {}, number = {}, pages = {}, pmid = {38698240}, issn = {1759-4804}, abstract = {Relapsing polychondritis is a rare inflammatory disease characterized by recurrent inflammation of cartilaginous structures, mainly of the ears, nose and respiratory tract, with a broad spectrum of accompanying systemic features. Despite its rarity, prompt recognition and accurate diagnosis of relapsing polychondritis is crucial for appropriate management and optimal outcomes. Our understanding of relapsing polychondritis has changed markedly in the past couple of years with the identification of three distinct patient clusters that have different clinical manifestations and prognostic outcomes. With the progress of pangenomic sequencing and the discovery of new somatic and monogenic autoinflammatory diseases, new differential diagnoses have emerged, notably the vacuoles, E1 enzyme, X-linked, autoinflammatory, somatic (VEXAS) syndrome, autoinflammatory diseases and immune checkpoint inhibitor-related adverse events. In this Review, we present a detailed update of the newly identified clusters and highlight red flags that should raise suspicion of these alternative diagnoses. The identification of these different clusters and mimickers has a direct impact on the management, follow-up and prognosis of patients with relapsing polychondritis and autoinflammatory syndromes.}, }
@article {pmid38698002, year = {2024}, author = {Da Silva Morais, E and Grimaud, GM and Warda, A and Stanton, C and Ross, P}, title = {Genome plasticity shapes the ecology and evolution of Phocaeicola dorei and Phocaeicola vulgatus.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {10109}, pmid = {38698002}, issn = {2045-2322}, support = {BACtheWINNER, Project No. 101054719/ERC_/European Research Council/International ; }, mesh = {*Phylogeny ; *Genome, Bacterial ; Humans ; Gastrointestinal Microbiome/genetics ; Gene Transfer, Horizontal ; Evolution, Molecular ; Genomics/methods ; Bacteroidetes/genetics ; }, abstract = {Phocaeicola dorei and Phocaeicola vulgatus are very common and abundant members of the human gut microbiome and play an important role in the infant gut microbiome. These species are closely related and often confused for one another; yet, their genome comparison, interspecific diversity, and evolutionary relationships have not been studied in detail so far. Here, we perform phylogenetic analysis and comparative genomic analyses of these two Phocaeicola species. We report that P. dorei has a larger genome yet a smaller pan-genome than P. vulgatus. We found that this is likely because P. vulgatus is more plastic than P. dorei, with a larger repertoire of genetic mobile elements and fewer anti-phage defense systems. We also found that P. dorei directly descends from a clade of P. vulgatus¸ and experienced genome expansion through genetic drift and horizontal gene transfer. Overall, P. dorei and P. vulgatus have very different functional and carbohydrate utilisation profiles, hinting at different ecological strategies, yet they present similar antimicrobial resistance profiles.}, }
@article {pmid38695578, year = {2024}, author = {Samanta, D and Rauniyar, S and Saxena, P and Sani, RK}, title = {From genome to evolution: investigating type II methylotrophs using a pangenomic analysis.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0024824}, doi = {10.1128/msystems.00248-24}, pmid = {38695578}, issn = {2379-5077}, abstract = {UNLABELLED: A comprehensive pangenomic approach was employed to analyze the genomes of 75 type II methylotrophs spanning various genera. Our investigation revealed 256 exact core gene families shared by all 75 organisms, emphasizing their crucial role in the survival and adaptability of these organisms. Additionally, we predicted the functionality of 12 hypothetical proteins. The analysis unveiled a diverse array of genes associated with key metabolic pathways, including methane, serine, glyoxylate, and ethylmalonyl-CoA (EMC) metabolic pathways. While all selected organisms possessed essential genes for the serine pathway, Methylooceanibacter marginalis lacked serine hydroxymethyltransferase (SHMT), and Methylobacterium variabile exhibited both isozymes of SHMT, suggesting its potential to utilize a broader range of carbon sources. Notably, Methylobrevis sp. displayed a unique serine-glyoxylate transaminase isozyme not found in other organisms. Only nine organisms featured anaplerotic enzymes (isocitrate lyase and malate synthase) for the glyoxylate pathway, with the rest following the EMC pathway. Methylovirgula sp. 4MZ18 stood out by acquiring genes from both glyoxylate and EMC pathways, and Methylocapsa sp. S129 featured an A-form malate synthase, unlike the G-form found in the remaining organisms. Our findings also revealed distinct phylogenetic relationships and clustering patterns among type II methylotrophs, leading to the proposal of a separate genus for Methylovirgula sp. 4M-Z18 and Methylocapsa sp. S129. This pangenomic study unveils remarkable metabolic diversity, unique gene characteristics, and distinct clustering patterns of type II methylotrophs, providing valuable insights for future carbon sequestration and biotechnological applications.
IMPORTANCE: Methylotrophs have played a significant role in methane-based product production for many years. However, a comprehensive investigation into the diverse genetic architectures across different genera of methylotrophs has been lacking. This study fills this knowledge gap by enhancing our understanding of core hypothetical proteins and unique enzymes involved in methane oxidation, serine, glyoxylate, and ethylmalonyl-CoA pathways. These findings provide a valuable reference for researchers working with other methylotrophic species. Furthermore, this study not only unveils distinctive gene characteristics and phylogenetic relationships but also suggests a reclassification for Methylovirgula sp. 4M-Z18 and Methylocapsa sp. S129 into separate genera due to their unique attributes within their respective genus. Leveraging the synergies among various methylotrophic organisms, the scientific community can potentially optimize metabolite production, increasing the yield of desired end products and overall productivity.}, }
@article {pmid38693490, year = {2024}, author = {Lu, Y and Liu, D and Kong, X and Song, Y and Jing, L}, title = {Pangenome characterization and analysis of the NAC gene family reveals genes for Sclerotinia sclerotiorum resistance in sunflower (Helianthus annuus).}, journal = {BMC genomic data}, volume = {25}, number = {1}, pages = {39}, pmid = {38693490}, issn = {2730-6844}, support = {32160642 and 32060598//National Natural Science Foundation of China/ ; 32160642 and 32060598//National Natural Science Foundation of China/ ; 32160642 and 32060598//National Natural Science Foundation of China/ ; 32160642 and 32060598//National Natural Science Foundation of China/ ; 32160642 and 32060598//National Natural Science Foundation of China/ ; NMGIRT2320//Program for Innovative Research Team in Universities of Inner Mongolia Autonomous Region/ ; NMGIRT2320//Program for Innovative Research Team in Universities of Inner Mongolia Autonomous Region/ ; NMGIRT2320//Program for Innovative Research Team in Universities of Inner Mongolia Autonomous Region/ ; NMGIRT2320//Program for Innovative Research Team in Universities of Inner Mongolia Autonomous Region/ ; NMGIRT2320//Program for Innovative Research Team in Universities of Inner Mongolia Autonomous Region/ ; }, abstract = {BACKGROUND: Sunflower (Helianthus annuus) is one of the most important economic crops in oilseed production worldwide. The different cultivars exhibit variability in their resistance genes. The NAC transcription factor (TF) family plays diverse roles in plant development and stress responses. With the completion of the H. annuus genome sequence, the entire complement of genes coding for NACs has been identified. However, the reference genome of a single individual cannot cover all the genetic information of the species.
RESULTS: Considering only a single reference genome to study gene families will miss many meaningful genes. A pangenome-wide survey and characterization of the NAC genes in sunflower species were conducted. In total, 139 HaNAC genes are identified, of which 114 are core and 25 are variable. Phylogenetic analysis of sunflower NAC proteins categorizes these proteins into 16 subgroups. 138 HaNACs are randomly distributed on 17 chromosomes. SNP-based haplotype analysis shows haplotype diversity of the HaNAC genes in wild accessions is richer than in landraces and modern cultivars. Ten HaNAC genes in the basal stalk rot (BSR) resistance quantitative trait loci (QTL) are found. A total of 26 HaNAC genes are differentially expressed in response to Sclerotinia head rot (SHR). A total of 137 HaNAC genes are annotated in Gene Ontology (GO) and are classified into 24 functional groups. GO functional enrichment analysis reveals that HaNAC genes are involved in various functions of the biological process.
CONCLUSIONS: We identified NAC genes in H. annuus (HaNAC) on a pangenome-wide scale and analyzed S. sclerotiorum resistance-related NACs. This study provided a theoretical basis for further genomic improvement targeting resistance-related NAC genes in sunflowers.}, }
@article {pmid38693487, year = {2024}, author = {Gangurde, SS and Korani, W and Bajaj, P and Wang, H and Fountain, JC and Agarwal, G and Pandey, MK and Abbas, HK and Chang, PK and Holbrook, CC and Kemerait, RC and Varshney, RK and Dutta, B and Clevenger, JP and Guo, B}, title = {Aspergillus flavus pangenome (AflaPan) uncovers novel aflatoxin and secondary metabolite associated gene clusters.}, journal = {BMC plant biology}, volume = {24}, number = {1}, pages = {354}, pmid = {38693487}, issn = {1471-2229}, abstract = {BACKGROUND: Aspergillus flavus is an important agricultural and food safety threat due to its production of carcinogenic aflatoxins. It has high level of genetic diversity that is adapted to various environments. Recently, we reported two reference genomes of A. flavus isolates, AF13 (MAT1-2 and highly aflatoxigenic isolate) and NRRL3357 (MAT1-1 and moderate aflatoxin producer). Where, an insertion of 310 kb in AF13 included an aflatoxin producing gene bZIP transcription factor, named atfC. Observations of significant genomic variants between these isolates of contrasting phenotypes prompted an investigation into variation among other agricultural isolates of A. flavus with the goal of discovering novel genes potentially associated with aflatoxin production regulation. Present study was designed with three main objectives: (1) collection of large number of A. flavus isolates from diverse sources including maize plants and field soils; (2) whole genome sequencing of collected isolates and development of a pangenome; and (3) pangenome-wide association study (Pan-GWAS) to identify novel secondary metabolite cluster genes.
RESULTS: Pangenome analysis of 346 A. flavus isolates identified a total of 17,855 unique orthologous gene clusters, with mere 41% (7,315) core genes and 59% (10,540) accessory genes indicating accumulation of high genomic diversity during domestication. 5,994 orthologous gene clusters in accessory genome not annotated in either the A. flavus AF13 or NRRL3357 reference genomes. Pan-genome wide association analysis of the genomic variations identified 391 significant associated pan-genes associated with aflatoxin production. Interestingly, most of the significantly associated pan-genes (94%; 369 associations) belonged to accessory genome indicating that genome expansion has resulted in the incorporation of new genes associated with aflatoxin and other secondary metabolites.
CONCLUSION: In summary, this study provides complete pangenome framework for the species of Aspergillus flavus along with associated genes for pathogen survival and aflatoxin production. The large accessory genome indicated large genome diversity in the species A. flavus, however AflaPan is a closed pangenome represents optimum diversity of species A. flavus. Most importantly, the newly identified aflatoxin producing gene clusters will be a new source for seeking aflatoxin mitigation strategies and needs new attention in research.}, }
@article {pmid38689698, year = {2024}, author = {Tan, W and Zhou, P and Huang, X and Liao, R and Wang, X and Wu, Y and Ni, Z and Shi, T and Yu, X and Zhang, H and Ma, C and Gao, F and Ma, Y and Bai, Y and Hayat, F and Omondi, OK and Coulibaly, D and Gao, Z}, title = {Haplotype-resolved genome of Prunus zhengheensis provides insight into its evolution and low temperature adaptation in apricot.}, journal = {Horticulture research}, volume = {11}, number = {4}, pages = {uhae103}, doi = {10.1093/hr/uhae103}, pmid = {38689698}, issn = {2662-6810}, abstract = {Prunus zhengheensis, an extremely rare population of apricots, originated in warm South-East China and is an excellent material for genetic breeding. However, most apricots and two related species (P. sibirica, P. mandshurica) are found in the cold northern regions in China and the mechanism of their distribution is still unclear. In addition, the classification status of P. zhengheensis is controversial. Thus, we generated a high-quality haplotype-resolved genome for P. zhengheensis, exploring key genetic variations in its adaptation and the causes of phylogenetic incongruence. We found extensive phylogenetic discordances between the nuclear and organelle phylogenies of P. zhengheensis, which could be explained by incomplete lineage sorting. A 242.22-Mb pan-genome of the Armeniaca section was developed with 13 chromosomal genomes. Importantly, we identified a 566-bp insertion in the promoter of the HSFA1d gene in apricot and showed that the activity of the HSFA1d promoter increased under low temperatures. In addition, HSFA1d overexpression in Arabidopsis thaliana indicated that HSFA1d positively regulated plant growth under chilling. Therefore, we hypothesized that the insertion in the promoter of HSFA1d in apricot improved its low-temperature adaptation, allowing it to thrive in relatively cold locations. The findings help explain the weather adaptability of Armeniaca plants.}, }
@article {pmid38689648, year = {2024}, author = {Ma, Y and Sun, J and Zhang, X and Sadaqat, M and Tahir Ul Qamar, M and Liu, T}, title = {Comparative genomics analysis of pheophorbide a oxygenase (PAO) genes in eight pyrus genomes and their regulatory role in multiple stress responses in Chinese pear (Pyrus bretschneideri).}, journal = {Frontiers in genetics}, volume = {15}, number = {}, pages = {1396744}, doi = {10.3389/fgene.2024.1396744}, pmid = {38689648}, issn = {1664-8021}, abstract = {Pyrus (pear) is among the most nutritious fruits and contains fibers that have great health benefits to humans. It is mostly cultivated in temperate regions globally and is highly subjected to biotic and abiotic stresses which affect its yield. Pheophorbide a oxygenase (PAO) is an essential component of the chlorophyll degradation system and contributes to the senescence of leaves. It is responsible for opening the pheophorbide a porphyrin macrocycle and forming the main fluorescent chlorophyll catabolite However, this gene family and its members have not been explored in Pyrus genomes. Here we report a pangenome-wide investigation has been conducted on eight Pyrus genomes: Cuiguan, Shanxi Duli, Zhongai 1, Nijisseiki, Yunhong No.1, d'Anjou, Bartlett v2.0, and Dangshansuli v.1.1. The phylogenetic history, their gene structure, conservation patterns of motifs, their distribution on chromosomes, and gene duplication are studied in detail which shows the intraspecific structural conservation as well as evolutionary patterns of Pyrus PAOs. Cis-elements, protein-protein interactions (PPI), and the Gene Ontology (GO) enrichment analyses show their potential biological functions. Furthermore, their expression in various tissues, fruit hardening conditions, and drought stress conditions is also studied. Based on phylogenetics, the identified PAOs were divided into four groups. The expansion of this gene family in Pyrus is caused by both tandem and segmental duplication. Moreover, positive and negative selection pressure equally directed the gene's duplication process. The Pyrus PAO genes were enriched in hormones-related, light, development, and stress-related elements. RNA-seq data analysis showed that PAOs have varied levels of expression under diseased and abiotic stress conditions. The 3D structures of PAOs are also predicted to get more insights into functional conservation. Our research can be used further to get a deeper knowledge of the PAO gene family in Pyrus and to guide future research on improving the genetic composition of Pyrus to enhance stress tolerance.}, }
@article {pmid38686794, year = {2024}, author = {Nuhamunada, M and Mohite, OS and Phaneuf, PV and Palsson, BO and Weber, T}, title = {BGCFlow: systematic pangenome workflow for the analysis of biosynthetic gene clusters across large genomic datasets.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkae314}, pmid = {38686794}, issn = {1362-4962}, support = {NNF20CC0035580//Novo Nordisk Foundation/ ; CeMiSt//Danish National Research Foundation/ ; NNF20SA0035588//Novo Nordisk Foundation Copenhagen Bioscience PhD program/ ; }, abstract = {Genome mining is revolutionizing natural products discovery efforts. The rapid increase in available genomes demands comprehensive computational platforms to effectively extract biosynthetic knowledge encoded across bacterial pangenomes. Here, we present BGCFlow, a novel systematic workflow integrating analytics for large-scale genome mining of bacterial pangenomes. BGCFlow incorporates several genome analytics and mining tools grouped into five common stages of analysis such as: (i) data selection, (ii) functional annotation, (iii) phylogenetic analysis, (iv) genome mining, and (v) comparative analysis. Furthermore, BGCFlow provides easy configuration of different projects, parallel distribution, scheduled job monitoring, an interactive database to visualize tables, exploratory Jupyter Notebooks, and customized reports. Here, we demonstrate the application of BGCFlow by investigating the phylogenetic distribution of various biosynthetic gene clusters detected across 42 genomes of the Saccharopolyspora genus, known to produce industrially important secondary/specialized metabolites. The BGCFlow-guided analysis predicted more accurate dereplication of BGCs and guided the targeted comparative analysis of selected RiPPs. The scalable, interoperable, adaptable, re-entrant, and reproducible nature of the BGCFlow will provide an effective novel way to extract the biosynthetic knowledge from the ever-growing genomic datasets of biotechnologically relevant bacterial species.}, }
@article {pmid38682181, year = {2024}, author = {Aziz, T and Naveed, M and Shabbir, MA and Sarwar, A and Khan, AA and Hasnain, A and Haq, TU and Yang, Z and Zinedine, A and Rocha, JM and Alharbi, M}, title = {Whole Genome Analysis of Tibetan Kefir-Derived Lactiplantibacillus Plantarum 12-3 Elucidates Its Genomic Architecture, Antimicrobial and Drug Resistance, Potential Probiotic Functionality and Safety.}, journal = {Frontiers in bioscience (Landmark edition)}, volume = {29}, number = {4}, pages = {147}, doi = {10.31083/j.fbl2904147}, pmid = {38682181}, issn = {2768-6698}, support = {32272296//National Natural Science Foundation of China/ ; IFKSUOR3- 619-3//Deputyship for Research and Innovation/ ; }, mesh = {*Probiotics ; Tibet ; *Genome, Bacterial ; *Phylogeny ; *Kefir/microbiology ; Drug Resistance, Bacterial/genetics ; Lactobacillus plantarum/genetics ; Anti-Bacterial Agents/pharmacology ; Whole Genome Sequencing ; CRISPR-Cas Systems ; }, abstract = {BACKGROUND: Lactiplantibacillus plantarum 12-3 holds great promise as a probiotic bacterial strain, yet its full potential remains untapped. This study aimed to better understand this potential therapeutic strain by exploring its genomic landscape, genetic diversity, CRISPR-Cas mechanism, genotype, and mechanistic perspectives for probiotic functionality and safety applications.
METHODS: L. plantarum 12-3 was isolated from Tibetan kefir grains and, subsequently, Illumina and Single Molecule Real-Time (SMRT) technologies were used to extract and sequence genomic DNA from this organism. After performing pan-genomic and phylogenetic analysis, Average Nucleotide Identity (ANI) was used to confirm the taxonomic identity of the strain. Antibiotic resistance gene analysis was conducted using the Comprehensive Antibiotic Resistance Database (CARD). Antimicrobial susceptibility testing, and virulence gene identification were also included in our genomic analysis to evaluate food safety. Prophage, genomic islands, insertion sequences, and CRISPR-Cas sequence analyses were also carried out to gain insight into genetic components and defensive mechanisms within the bacterial genome.
RESULTS: The 3.4 Mb genome of L. plantarum 12-3, was assembled with 99.1% completeness and low contamination. A total of 3234 genes with normal length and intergenic spacing were found using gene prediction tools. Pan-genomic studies demonstrated gene diversity and provided functional annotation, whereas phylogenetic analysis verified taxonomic identity. Our food safety study revealed a profile of antibiotic resistance that is favorable for use as a probiotic. Analysis of insertional sequences, genomic islands, and prophage within the genome provided information regarding genetic components and their possible effects on evolution.
CONCLUSIONS: Pivotal genetic elements uncovered in this study play a crucial role in bacterial defense mechanisms and offer intriguing prospects for future genome engineering efforts. Moreover, our findings suggest further in vitro and in vivo studies are warranted to validate the functional attributes and probiotic potential of L. plantarum 12-3. Expanding the scope of the research to encompass a broader range of L. plantarum 12-3 strains and comparative analyses with other probiotic species would enhance our understanding of this organism's genetic diversity and functional properties.}, }
@article {pmid38676570, year = {2024}, author = {Avila Cartes, J and Bonizzoni, P and Ciccolella, S and Della Vedova, G and Denti, L and Didelot, X and Monti, D and Pirola, Y}, title = {RecGraph: recombination-aware alignment of sequences to variation graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btae292}, pmid = {38676570}, issn = {1367-4811}, abstract = {MOTIVATION: Bacterial genomes present more variability than human genomes, which requires important adjustments in computational tools that are developed for human data. In particular, bacteria exhibit a mosaic structure due to homologous recombinations, but this fact is not sufficiently captured by standard read mappers that align against linear reference genomes. The recent introduction of pangenomics provides some insights in that context, as a pangenome graph can represent the variability within a species. However, the concept of sequence-to-graph alignment that captures the presence of recombinations has not been previously investigated.
RESULTS: In this paper, we present the extension of the notion of sequence-to-graph alignment to a variation graph that incorporates a recombination, so that the latter are explicitly represented and evaluated in an alignment. Moreover, we present a dynamic programming approach for the special case where there is at most a recombination-we implement this case as RecGraph. From a modeling point of view, a recombination corresponds to identifying a new path of the variation graph, where the new arc is composed of two halves, each extracted from an original path, possibly joined by a new arc. Our experiments show that RecGraph accurately aligns simulated recombinant bacterial sequences that have at most a recombination, providing evidence for the presence of recombination events.
AVAILABILITY: Our implementation is open source and available at https://github.com/AlgoLab/RecGraph.
SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, }
@article {pmid38674443, year = {2024}, author = {Du, X and Sun, Y and Fu, T and Gao, T and Zhang, T}, title = {Research Progress and Applications of Bovine Genome in the Tribe Bovini.}, journal = {Genes}, volume = {15}, number = {4}, pages = {}, doi = {10.3390/genes15040509}, pmid = {38674443}, issn = {2073-4425}, support = {CARS-36//the National Dairy Industry Technology System of China/ ; 2023ZW04048//The Science and Technology Innovation 2030/ ; }, mesh = {Animals ; Cattle/genetics ; *Genome/genetics ; *Genomics/methods ; Buffaloes/genetics ; Phenotype ; High-Throughput Nucleotide Sequencing ; Breeding ; }, abstract = {Various bovine species have been domesticated and bred for thousands of years, and they provide adequate animal-derived products, including meat, milk, and leather, to meet human requirements. Despite the review studies on economic traits in cattle, the genetic basis of traits has only been partially explained by phenotype and pedigree breeding methods, due to the complexity of genomic regulation during animal development and growth. With the advent of next-generation sequencing technology, genomics projects, such as the 1000 Bull Genomes Project, Functional Annotation of Animal Genomes project, and Bovine Pangenome Consortium, have advanced bovine genomic research. These large-scale genomics projects gave us a comprehensive concept, technology, and public resources. In this review, we summarize the genomics research progress of the main bovine species during the past decade, including cattle (Bos taurus), yak (Bos grunniens), water buffalo (Bubalus bubalis), zebu (Bos indicus), and gayal (Bos frontalis). We mainly discuss the development of genome sequencing and functional annotation, focusing on how genomic analysis reveals genetic variation and its impact on phenotypes in several bovine species.}, }
@article {pmid38674043, year = {2024}, author = {Sáez, LP and Rodríguez-Caballero, G and Olaya-Abril, A and Cabello, P and Moreno-Vivián, C and Roldán, MD and Luque-Almagro, VM}, title = {Genomic Insights into Cyanide Biodegradation in the Pseudomonas Genus.}, journal = {International journal of molecular sciences}, volume = {25}, number = {8}, pages = {}, doi = {10.3390/ijms25084456}, pmid = {38674043}, issn = {1422-0067}, support = {P18-RT-3048//Junta de Andalucía/ ; PPIT_2022E_025814//University of Córdoba/ ; Program Frontiers in Science//Fundación Torres Gutiérrez/ ; }, mesh = {*Cyanides/metabolism ; *Biodegradation, Environmental ; *Pseudomonas/genetics/metabolism ; *Phylogeny ; *Genome, Bacterial ; Genomics/methods ; Bacterial Proteins/genetics/metabolism ; Aminohydrolases/genetics/metabolism ; Pseudomonas pseudoalcaligenes/metabolism/genetics ; }, abstract = {Molecular studies about cyanide biodegradation have been mainly focused on the hydrolytic pathways catalyzed by the cyanide dihydratase CynD or the nitrilase NitC. In some Pseudomonas strains, the assimilation of cyanide has been linked to NitC, such as the cyanotrophic model strain Pseudomonas pseudoalcaligenes CECT 5344, which has been recently reclassified as Pseudomonas oleovorans CECT 5344. In this work, a phylogenomic approach established a more precise taxonomic position of the strain CECT 5344 within the species P. oleovorans. Furthermore, a pan-genomic analysis of P. oleovorans and other species with cyanotrophic strains, such as P. fluorescens and P. monteilii, allowed for the comparison and identification of the cioAB and mqoAB genes involved in cyanide resistance, and the nitC and cynS genes required for the assimilation of cyanide or cyanate, respectively. While cyanide resistance genes presented a high frequency among the analyzed genomes, genes responsible for cyanide or cyanate assimilation were identified in a considerably lower proportion. According to the results obtained in this work, an in silico approach based on a comparative genomic approach can be considered as an agile strategy for the bioprospection of putative cyanotrophic bacteria and for the identification of new genes putatively involved in cyanide biodegradation.}, }
@article {pmid38671539, year = {2024}, author = {Goff, JL and Szink, EG and Durrence, KL and Lui, LM and Nielsen, TN and Kuehl, JV and Hunt, KA and Chandonia, JM and Huang, J and Thorgersen, MP and Poole, FL and Stahl, DA and Chakraborty, R and Deutschbauer, AM and Arkin, AP and Adams, MWW}, title = {Genomic and environmental controls on Castellaniella biogeography in an anthropogenically disturbed subsurface.}, journal = {Environmental microbiome}, volume = {19}, number = {1}, pages = {26}, pmid = {38671539}, issn = {2524-6372}, support = {DE-AC02-05CH11231//U.S. Department of Energy/ ; DE-AC02-05CH11231//U.S. Department of Energy/ ; DE-AC02-05CH11231//U.S. Department of Energy/ ; DE-AC02-05CH11231//U.S. Department of Energy/ ; DE-AC02-05CH11231//U.S. Department of Energy/ ; DE-AC02-05CH11231//U.S. Department of Energy/ ; DE-AC02-05CH11231//U.S. Department of Energy/ ; DE-AC02-05CH11231//U.S. Department of Energy/ ; DE-AC02-05CH11231//U.S. Department of Energy/ ; DE-AC02-05CH11231//U.S. Department of Energy/ ; DE-AC02-05CH11231//U.S. Department of Energy/ ; DE-AC02-05CH11231//U.S. Department of Energy/ ; DE-AC02-05CH11231//U.S. Department of Energy/ ; DE-AC02-05CH11231//U.S. Department of Energy/ ; DE-AC02-05CH11231//U.S. Department of Energy/ ; DE-AC02-05CH11231//U.S. Department of Energy/ ; }, abstract = {Castellaniella species have been isolated from a variety of mixed-waste environments including the nitrate and multiple metal-contaminated subsurface at the Oak Ridge Reservation (ORR). Previous studies examining microbial community composition and nitrate removal at ORR during biostimulation efforts reported increased abundances of members of the Castellaniella genus concurrent with increased denitrification rates. Thus, we asked how genomic and abiotic factors control the Castellaniella biogeography at the site to understand how these factors may influence nitrate transformation in an anthropogenically impacted setting. We report the isolation and characterization of several Castellaniella strains from the ORR subsurface. Five of these isolates match at 100% identity (at the 16S rRNA gene V4 region) to two Castellaniella amplicon sequence variants (ASVs), ASV1 and ASV2, that have persisted in the ORR subsurface for at least 2 decades. However, ASV2 has consistently higher relative abundance in samples taken from the site and was also the dominant blooming denitrifier population during a prior biostimulation effort. We found that the ASV2 representative strain has greater resistance to mixed metal stress than the ASV1 representative strains. We attribute this resistance, in part, to the large number of unique heavy metal resistance genes identified on a genomic island in the ASV2 representative genome. Additionally, we suggest that the relatively lower fitness of ASV1 may be connected to the loss of the nitrous oxide reductase (nos) operon (and associated nitrous oxide reductase activity) due to the insertion at this genomic locus of a mobile genetic element carrying copper resistance genes. This study demonstrates the value of integrating genomic, environmental, and phenotypic data to characterize the biogeography of key microorganisms in contaminated sites.}, }
@article {pmid38669452, year = {2024}, author = {Hu, H and Li, R and Zhao, J and Batley, J and Edwards, D}, title = {Technological development and advances for constructing and analysing plant pangenomes.}, journal = {Genome biology and evolution}, volume = {}, number = {}, pages = {}, doi = {10.1093/gbe/evae081}, pmid = {38669452}, issn = {1759-6653}, abstract = {A pangenome captures the genomic diversity for a species, derived from a collection of genetic sequences of diverse populations. Advances in sequencing technologies have given rise to three primary methods for pangenome construction and analysis: de novo assembly and comparison, reference genome-based iterative assembly and graph-based pangenome construction. Each method presents advantages and challenges in processing varying amounts and structures of DNA sequencing data. With the emergence of high-quality genome assemblies and advanced bioinformatics tools, the graph-based pangenome is emerging as an advanced reference for exploring the biological and functional implications of genetic variations.}, }
@article {pmid38669259, year = {2024}, author = {Duchen, D and Clipman, SJ and Vergara, C and Thio, CL and Thomas, DL and Duggal, P and Wojcik, GL}, title = {A hepatitis B virus (HBV) sequence variation graph improves alignment and sample-specific consensus sequence construction.}, journal = {PloS one}, volume = {19}, number = {4}, pages = {e0301069}, doi = {10.1371/journal.pone.0301069}, pmid = {38669259}, issn = {1932-6203}, mesh = {*Hepatitis B virus/genetics ; Humans ; *Genome, Viral ; *Consensus Sequence/genetics ; Phylogeny ; Sequence Alignment/methods ; Genetic Variation ; Hepatitis B, Chronic/virology ; DNA, Viral/genetics ; Sequence Analysis, DNA/methods ; }, abstract = {Nearly 300 million individuals live with chronic hepatitis B virus (HBV) infection (CHB), for which no curative therapy is available. As viral diversity is associated with pathogenesis and immunological control of infection, improved methods to characterize this diversity could aid drug development efforts. Conventionally, viral sequencing data are mapped/aligned to a reference genome, and only the aligned sequences are retained for analysis. Thus, reference selection is critical, yet selecting the most representative reference a priori remains difficult. We investigate an alternative pangenome approach which can combine multiple reference sequences into a graph which can be used during alignment. Using simulated short-read sequencing data generated from publicly available HBV genomes and real sequencing data from an individual living with CHB, we demonstrate alignment to a phylogenetically representative 'genome graph' can improve alignment, avoid issues of reference ambiguity, and facilitate the construction of sample-specific consensus sequences more genetically similar to the individual's infection. Graph-based methods can, therefore, improve efforts to characterize the genetics of viral pathogens, including HBV, and have broader implications in host-pathogen research.}, }
@article {pmid38667025, year = {2024}, author = {Che, M and Fresno, AH and Calvo-Fernandez, C and Hasman, H and Kurittu, PE and Heikinheimo, A and Hansen, LT}, title = {Comparison of IncK-blaCMY-2 Plasmids in Extended-Spectrum Cephalosporin-Resistant Escherichia coli Isolated from Poultry and Humans in Denmark, Finland, and Germany.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {13}, number = {4}, pages = {}, doi = {10.3390/antibiotics13040349}, pmid = {38667025}, issn = {2079-6382}, support = {Fødevareforlig 3//Danish Veterinary and Food Administration/ ; Ph.D. stipend//Chinese Scholarship Council/ ; BG22/00150 - Beatriz Galindo program//Ministerio de Universidades, Spain/ ; }, abstract = {Escherichia coli carrying IncK-blaCMY-2 plasmids mediating resistance to extended-spectrum cephalosporins (ESC) has been frequently described in food-producing animals and in humans. This study aimed to characterize IncK-blaCMY-2-positive ESC-resistant E. coli isolates from poultry production systems in Denmark, Finland, and Germany, as well as from Danish human blood infections, and further compare their plasmids. Whole-genome sequencing (Illumina) of all isolates (n = 46) confirmed the presence of the blaCMY-2 gene. Minimum inhibitory concentration (MIC) testing revealed a resistant phenotype to cefotaxime as well as resistance to ≥3 antibiotic classes. Conjugative transfer of the blaCMY-2 gene confirmed the resistance being on mobile plasmids. Pangenome analysis showed only one-third of the genes being in the core with the remainder being in the large accessory gene pool. Single nucleotide polymorphism (SNP) analysis on sequence type (ST) 429 and 1286 isolates showed between 0-60 and 13-90 SNP differences, respectively, indicating vertical transmission of closely related clones in the poultry production, including among Danish, Finnish, and German ST429 isolates. A comparison of 22 ST429 isolates from this study with 80 ST429 isolates in Enterobase revealed the widespread geographical occurrence of related isolates associated with poultry production. Long-read sequencing of a representative subset of isolates (n = 28) allowed further characterization and comparison of the IncK-blaCMY-2 plasmids with publicly available plasmid sequences. This analysis revealed the presence of highly similar plasmids in ESC-resistant E. coli from Denmark, Finland, and Germany pointing to the existence of common sources. Moreover, the analysis presented evidence of global plasmid transmission and evolution. Lastly, our results indicate that IncK-blaCMY-2 plasmids and their carriers had been circulating in the Danish production chain with an associated risk of spreading to humans, as exemplified by the similarity of the clinical ST429 isolate to poultry isolates. Its persistence may be driven by co-selection since most IncK-blaCMY-2 plasmids harbor resistance factors to drugs used in veterinary medicine.}, }
@article {pmid38663087, year = {2024}, author = {Taylor, DJ and Eizenga, JM and Li, Q and Das, A and Jenike, KM and Kenny, EE and Miga, KH and Monlong, J and McCoy, RC and Paten, B and Schatz, MC}, title = {Beyond the Human Genome Project: The Age of Complete Human Genome Sequences and Pangenome References.}, journal = {Annual review of genomics and human genetics}, volume = {}, number = {}, pages = {}, doi = {10.1146/annurev-genom-021623-081639}, pmid = {38663087}, issn = {1545-293X}, abstract = {The Human Genome Project was an enormous accomplishment, providing a foundation for countless explorations into the genetics and genomics of the human species. Yet for many years, the human genome reference sequence remained incomplete and lacked representation of human genetic diversity. Recently, two major advances have emerged to address these shortcomings: complete gap-free human genome sequences, such as the one developed by the Telomere-to-Telomere Consortium, and high-quality pangenomes, such as the one developed by the Human Pangenome Reference Consortium. Facilitated by advances in long-read DNA sequencing and genome assembly algorithms, complete human genome sequences resolve regions that have been historically difficult to sequence, including centromeres, telomeres, and segmental duplications. In parallel, pangenomes capture the extensive genetic diversity across populations worldwide. Together, these advances usher in a new era of genomics research, enhancing the accuracy of genomic analysis, paving the path for precision medicine, and contributing to deeper insights into human biology.}, }
@article {pmid38659906, year = {2024}, author = {Schloissnig, S and Pani, S and Rodriguez-Martin, B and Ebler, J and Hain, C and Tsapalou, V and Söylev, A and Hüther, P and Ashraf, H and Prodanov, T and Asparuhova, M and Hunt, S and Rausch, T and Marschall, T and Korbel, JO}, title = {Long-read sequencing and structural variant characterization in 1,019 samples from the 1000 Genomes Project.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.04.18.590093}, pmid = {38659906}, abstract = {Structural variants (SVs) contribute significantly to human genetic diversity and disease [1-4] . Previously, SVs have remained incompletely resolved by population genomics, with short-read sequencing facing limitations in capturing the whole spectrum of SVs at nucleotide resolution [5-7] . Here we leveraged nanopore sequencing [8] to construct an intermediate coverage resource of 1,019 long-read genomes sampled within 26 human populations from the 1000 Genomes Project. By integrating linear and graph-based approaches for SV analysis via pangenome graph-augmentation, we uncover 167,291 sequence-resolved SVs in these samples, considerably advancing SV characterization compared to population-wide short-read sequencing studies [3,4] . Our analysis details diverse SV classes-deletions, duplications, insertions, and inversions-at population-scale. LINE-1 and SVA retrotransposition activities frequently mediate transductions [9,10] of unique sequences, with both mobile element classes transducing sequences at either the 3'- or 5'-end, depending on the source element locus. Furthermore, analyses of SV breakpoint junctions suggest a continuum of homology-mediated rearrangement processes are integral to SV formation, and highlight evidence for SV recurrence involving repeat sequences. Our open-access dataset underscores the transformative impact of long-read sequencing in advancing the characterisation of polymorphic genomic architectures, and provides a resource for guiding variant prioritisation in future long-read sequencing-based disease studies.}, }
@article {pmid38658835, year = {2024}, author = {Liu, M and Zhang, F and Lu, H and Xue, H and Dong, X and Li, Z and Xu, J and Wang, W and Wei, C}, title = {PPanG: a precision pangenome browser enabling nucleotide-level analysis of genomic variations in individual genomes and their graph-based pangenome.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {405}, pmid = {38658835}, issn = {1471-2164}, support = {B23CJ0208//the Hainan Yazhou Bay Seed Lab Project/ ; B23CJ0208//the Hainan Yazhou Bay Seed Lab Project/ ; B23CJ0208//the Hainan Yazhou Bay Seed Lab Project/ ; YBXM//Institute of Crop Sciences, Chinese Academy of Agricultural Sciences/ ; 2022ZD0401703//Scientific Innovation 2030 Program/ ; 2022ZD0401703//Scientific Innovation 2030 Program/ ; SQ2023YFF1000094//National Key Research and Development Program of China/ ; 32170643//National Natural Science Foundation of China/ ; 20ZR1428200//Natural Science Foundation of Shanghai Municipality/ ; }, abstract = {Graph-based pangenome is gaining more popularity than linear pangenome because it stores more comprehensive information of variations. However, traditional linear genome browser has its own advantages, especially the tremendous resources accumulated historically. With the fast-growing number of individual genomes and their annotations available, the demand for a genome browser to visualize genome annotation for many individuals together with a graph-based pangenome is getting higher and higher. Here we report a new pangenome browser PPanG, a precise pangenome browser enabling nucleotide-level comparison of individual genome annotations together with a graph-based pangenome. Nine rice genomes with annotations were provided by default as potential references, and any individual genome can be selected as the reference. Our pangenome browser provides unprecedented insights on genome variations at different levels from base to gene, and reveals how the structures of a gene could differ for individuals. PPanG can be applied to any species with multiple individual genomes available and it is available at https://cgm.sjtu.edu.cn/PPanG .}, }
@article {pmid38655077, year = {2024}, author = {Chuang, SC and Dobhal, S and Alvarez, AM and Arif, M}, title = {Three new species, Xanthomonas hawaiiensis sp. nov., Stenotrophomonas aracearum sp. nov., and Stenotrophomonas oahuensis sp. nov., isolated from the Araceae family.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1356025}, pmid = {38655077}, issn = {1664-302X}, abstract = {Xanthomonas and Stenotrophomonas are closely related genera in the family Lysobacteraceae. In our previous study of aroid-associated bacterial strains, most strains isolated from anthurium and other aroids were reclassified as X. phaseoli and other Xanthomonas species. However, two strains isolated from Spathiphyllum and Colocasia were phylogenetically distant from other strains in the Xanthomonas clade and two strains isolated from Anthurium clustered within the Stenotrophomonas clade. Phylogenetic trees based on 16S rRNA and nine housekeeping genes placed the former strains with the type strain of X. sacchari from sugarcane and the latter strains with the type strain of S. bentonitica from bentonite. In pairwise comparisons with type strains, the overall genomic relatedness indices required delineation of new species; digital DNA-DNA hybridization and average nucleotide identity values were lower than 70 and 95%, respectively. Hence, three new species are proposed: S. aracearum sp. nov. and S. oahuensis sp. nov. for two strains from anthurium and X. hawaiiensis sp. nov. for the strains from spathiphyllum and colocasia, respectively. The genome size of X. hawaiiensis sp. nov. is ~4.88 Mbp and higher than S. aracearum sp. nov. (4.33 Mbp) and S. oahuensis sp. nov. (4.68 Mbp). Gene content analysis revealed 425 and 576 core genes present in 40 xanthomonads and 25 stenotrophomonads, respectively. The average number of unique genes in Stenotrophomonas spp. was higher than in Xanthomonas spp., implying higher genetic diversity in Stenotrophomonas.}, }
@article {pmid38654264, year = {2024}, author = {He, X and Qi, Z and Liu, Z and Chang, X and Zhang, X and Li, J and Wang, M}, title = {Pangenome analysis reveals transposon-driven genome evolution in cotton.}, journal = {BMC biology}, volume = {22}, number = {1}, pages = {92}, pmid = {38654264}, issn = {1741-7007}, support = {32001595//National Natural Science Foundation of China/ ; 32170645//National Natural Science Foundation of China/ ; 31830062//National Natural Science Foundation of China/ ; }, abstract = {BACKGROUND: Transposable elements (TEs) have a profound influence on the trajectory of plant evolution, driving genome expansion and catalyzing phenotypic diversification. The pangenome, a comprehensive genetic pool encompassing all variations within a species, serves as an invaluable tool, unaffected by the confounding factors of intraspecific diversity. This allows for a more nuanced exploration of plant TE evolution.
RESULTS: Here, we constructed a pangenome for diploid A-genome cotton using 344 accessions from representative geographical regions, including 223 from China as the main component. We found 511 Mb of non-reference sequences (NRSs) and revealed the presence of 5479 previously undiscovered protein-coding genes. Our comprehensive approach enabled us to decipher the genetic underpinnings of the distinct geographic distributions of cotton. Notably, we identified 3301 presence-absence variations (PAVs) that are closely tied to gene expression patterns within the pangenome, among which 2342 novel expression quantitative trait loci (eQTLs) were found residing in NRSs. Our investigation also unveiled contrasting patterns of transposon proliferation between diploid and tetraploid cotton, with long terminal repeat (LTR) retrotransposons exhibiting a synchronized surge in polyploids. Furthermore, the invasion of LTR retrotransposons from the A subgenome to the D subgenome triggered a substantial expansion of the latter following polyploidization. In addition, we found that TE insertions were responsible for the loss of 36.2% of species-specific genes, as well as the generation of entirely new species-specific genes.
CONCLUSIONS: Our pangenome analyses provide new insights into cotton genomics and subgenome dynamics after polyploidization and demonstrate the power of pangenome approaches for elucidating transposon impacts and genome evolution.}, }
@article {pmid38652229, year = {2024}, author = {Kavanova, K and Kostovova, I and Moravkova, M and Kubasova, T and Babak, V and Crhanova, M}, title = {Comparative Genome Analysis and Characterization of the Probiotic Properties of Lactic Acid Bacteria Isolated from the Gastrointestinal Tract of Wild Boars in the Czech Republic.}, journal = {Probiotics and antimicrobial proteins}, volume = {}, number = {}, pages = {}, pmid = {38652229}, issn = {1867-1314}, support = {QK1910351, MZE-RO0523//Ministerstvo Zemědělství/ ; QK1910351, MZE-RO0523//Ministerstvo Zemědělství/ ; QK1910351, MZE-RO0523//Ministerstvo Zemědělství/ ; QK1910351, MZE-RO0523//Ministerstvo Zemědělství/ ; QK1910351, MZE-RO0523//Ministerstvo Zemědělství/ ; QK1910351, MZE-RO0523//Ministerstvo Zemědělství/ ; }, abstract = {Probiotics are crucial components for maintaining a healthy gut microbiota in pigs, especially during the weaning period. Lactic acid bacteria (LAB) derived from the gastrointestinal tract of wild boars can serve as an abundant source of beneficial probiotic strains with suitable properties for use in pig husbandry. In this study, we analyzed and characterized 15 strains of Limosilactobacillus mucosae obtained from the gut contents of wild boars to assess their safety and suitability as probiotic candidates. The strains were compared using pan-genomic analysis with 49 L. mucosae strains obtained from the NCBI database. All isolated strains demonstrated their safety by showing an absence of transferrable antimicrobial resistance genes and hemolysin activity. Based on the presence of beneficial genes, five candidates with probiotic properties were selected and subjected to phenotypic profiling. These five selected isolates exhibited the ability to survive conditions mimicking passage through the host's digestive tract, such as low pH and the presence of bile salts. Furthermore, five selected strains demonstrated the presence of corresponding carbohydrate-active enzymes and the ability to utilize various carbohydrate substrates. These strains can enhance the digestibility of oligosaccharide or polysaccharide substrates found in food or feed, specifically resistant starch, α-galactosides, cellobiose, gentiobiose, and arabinoxylans. Based on the results obtained, the L. mucosae isolates tested in this study appear to be promising candidates for use as probiotics in pigs.}, }
@article {pmid38651830, year = {2024}, author = {Recuerda, M and Campagna, L}, title = {How structural variants shape avian phenotypes: Lessons from model systems.}, journal = {Molecular ecology}, volume = {}, number = {}, pages = {e17364}, doi = {10.1111/mec.17364}, pmid = {38651830}, issn = {1365-294X}, support = {NSF-DEB-2232929//Division of Environmental Biology/ ; }, abstract = {Despite receiving significant recent attention, the relevance of structural variation (SV) in driving phenotypic diversity remains understudied, although recent advances in long-read sequencing, bioinformatics and pangenomic approaches have enhanced SV detection. We review the role of SVs in shaping phenotypes in avian model systems, and identify some general patterns in SV type, length and their associated traits. We found that most of the avian SVs so far identified are short indels in chickens, which are frequently associated with changes in body weight and plumage colouration. Overall, we found that relatively short SVs are more frequently detected, likely due to a combination of their prevalence compared to large SVs, and a detection bias, stemming primarily from the widespread use of short-read sequencing and associated analytical methods. SVs most commonly involve non-coding regions, especially introns, and when patterns of inheritance were reported, SVs associated primarily with dominant discrete traits. We summarise several examples of phenotypic convergence across different species, mediated by different SVs in the same or different genes and different types of changes in the same gene that can lead to various phenotypes. Complex rearrangements and supergenes, which can simultaneously affect and link several genes, tend to have pleiotropic phenotypic effects. Additionally, SVs commonly co-occur with single-nucleotide polymorphisms, highlighting the need to consider all types of genetic changes to understand the basis of phenotypic traits. We end by summarising expectations for when long-read technologies become commonly implemented in non-model birds, likely leading to an increase in SV discovery and characterisation. The growing interest in this subject suggests an increase in our understanding of the phenotypic effects of SVs in upcoming years.}, }
@article {pmid38650915, year = {2024}, author = {Pettersen, JS and Nielsen, FD and Andreassen, PR and Møller-Jensen, J and Jørgensen, MG}, title = {A comprehensive analysis of pneumococcal two-component system regulatory networks.}, journal = {NAR genomics and bioinformatics}, volume = {6}, number = {2}, pages = {lqae039}, pmid = {38650915}, issn = {2631-9268}, abstract = {Two-component systems are key signal-transduction systems that enable bacteria to respond to a wide variety of environmental stimuli. The human pathogen, Streptococcus pneumoniae (pneumococcus) encodes 13 two-component systems and a single orphan response regulator, most of which are significant for pneumococcal pathogenicity. Mapping the regulatory networks governed by these systems is key to understand pneumococcal host adaptation. Here we employ a novel bioinformatic approach to predict the regulons of each two-component system based on publicly available whole-genome sequencing data. By employing pangenome-wide association studies (panGWAS) to predict genotype-genotype associations for each two-component system, we predicted regulon genes of 11 of the pneumococcal two-component systems. Through validation via next-generation RNA-sequencing on response regulator overexpression mutants, several top candidate genes predicted by the panGWAS analysis were confirmed as regulon genes. The present study presents novel details on multiple pneumococcal two-component systems, including an expansion of regulons, identification of candidate response regulator binding motifs, and identification of candidate response regulator-regulated small non-coding RNAs. We also demonstrate a use for panGWAS as a complementary tool in target gene identification via identification of genotype-to-genotype links. Expanding our knowledge on two-component systems in pathogens is crucial to understanding how these bacteria sense and respond to their host environment, which could prove useful in future drug development.}, }
@article {pmid38650829, year = {2024}, author = {Wei, H and Wang, X and Zhang, Z and Yang, L and Zhang, Q and Li, Y and He, H and Chen, D and Zhang, B and Zheng, C and Leng, Y and Cao, X and Cui, Y and Shi, C and Liu, Y and Lv, Y and Ma, J and He, W and Liu, X and Xu, Q and Yuan, Q and Yu, X and Wang, T and Qian, H and Li, X and Zhang, B and Zhang, H and Chen, W and Guo, M and Dai, X and Wang, Y and Zheng, X and Guo, L and Xie, X and Qian, Q and Shang, L}, title = {Uncovering key salt-tolerant regulators through a combined eQTL and GWAS analysis using the super pan-genome in rice.}, journal = {National science review}, volume = {11}, number = {4}, pages = {nwae043}, pmid = {38650829}, issn = {2053-714X}, abstract = {For sessile plants, gene expression plays a pivotal role in responding to salinity stress by activating or suppressing specific genes. However, our knowledge of genetic variations governing gene expression in response to salt stress remains limited in natural germplasm. Through transcriptome analysis of the Global Mini-Core Rice Collection consisting of a panel of 202 accessions, we identified 22 345 and 27 610 expression quantitative trait loci associated with the expression of 7787 and 9361 eGenes under normal and salt-stress conditions, respectively, leveraging the super pan-genome map. Notably, combined with genome-wide association studies, we swiftly pinpointed the potential candidate gene STG5-a major salt-tolerant locus known as qSTS5. Intriguingly, STG5 is required for maintaining Na[+]/K[+] homeostasis by directly regulating the transcription of multiple members of the OsHKT gene family. Our study sheds light on how genetic variants influence the dynamic changes in gene expression responding to salinity stress and provides a valuable resource for the mining of salt-tolerant genes in the future.}, }
@article {pmid38650702, year = {2024}, author = {Shivute, FN and Zhong, Y and Wu, J and Bao, Y and Wang, W and Liu, X and Lu, Z and Yu, H}, title = {Genome-wide and pan-genomic analysis reveals rich variants of NBS-LRR genes in a newly developed wild rice line from Oryza alta Swallen.}, journal = {Frontiers in plant science}, volume = {15}, number = {}, pages = {1345708}, pmid = {38650702}, issn = {1664-462X}, abstract = {INTRODUCTION: Oryza alta Swallen is an allotetraploid perennial wild rice and contains CCDD genome, which may harbor favorable genes for the enrichment of genetic resource.
METHODS: A new wild rice line, Huaye 5, was developed from Oryza alta Swallen in our lab. Whole genome re-sequencing and pan-genomic analysis were employed to analyze its genomic variations and novel genes.
RESULTS AND DISCUSSION: More than ten million genomic variations were detected when compared with Asian cultivar. Among the variational genes, 724, 197 and 710 genes coded protein kinase, synthetase and transcription factor, respectively. A total of 353, 131 and 135 variational genes were associated with morphological trait, physiological trait, resistance or tolerance, respectively. A total of 62 were NBS-LRR genes were detected, in which 11 NBS-LRR genes expressed in sheath and mature stem, and 26 expressed in young and mature roots expressed. The pan-genome sequences of wild rice species with CCDD genome were constructed by integrating 8 Oryza alta (OA), 2 Oryza grandiglumis (OG) and 18 Oryza latifolia (OL) accessions. A total of 28 non-reference NBS-LRR genes were revealed, and 7 of which were mainly expressed in mature roots. This research demonstrated rich DNA variation in the Oryza alta Swallen that may provide a new germplasm for rice resistance breeding.}, }
@article {pmid38650588, year = {2024}, author = {Lai, SK and Luo, AC and Chiu, IH and Chuang, HW and Chou, TH and Hung, TK and Hsu, JS and Chen, CY and Yang, WS and Yang, YC and Chen, PL}, title = {A novel framework for human leukocyte antigen (HLA) genotyping using probe capture-based targeted next-generation sequencing and computational analysis.}, journal = {Computational and structural biotechnology journal}, volume = {23}, number = {}, pages = {1562-1571}, pmid = {38650588}, issn = {2001-0370}, abstract = {Human leukocyte antigen (HLA) genes play pivotal roles in numerous immunological applications. Given the immense number of polymorphisms, achieving accurate high-throughput HLA typing remains challenging. This study aimed to harness the human pan-genome reference consortium (HPRC) resources as a potential benchmark for HLA reference materials. We meticulously annotated specific four field-resolution alleles for 11 HLA genes (HLA-A, -B, -C, -DPA1, -DPB1, -DQA1, -DQB1, -DRB1, -DRB3, -DRB4 and -DRB5) from 44 high-quality HPRC personal genome assemblies. For sequencing, we crafted HLA-specific probes and conducted capture-based targeted sequencing of the genomic DNA of the HPRC cohort, ensuring focused and comprehensive coverage of the HLA region of interest. We used publicly available short-read whole-genome sequencing (WGS) data from identical samples to offer a comparative perspective. To decipher the vast amount of sequencing data, we employed seven distinct software tools: OptiType, HLA-VBseq, HISAT genotype, SpecHLA, T1K, QzType, and DRAGEN. Each tool offers unique capabilities and algorithms for HLA genotyping, allowing comprehensive analysis and validation of the results. We then compared these results with benchmarks derived from personal genome assemblies. Our findings present a comprehensive four-field-resolution HLA allele annotation for 44 HPRC samples. Significantly, our innovative targeted next-generation sequencing (NGS) approach for HLA genes showed superior accuracy compared with conventional short-read WGS. An integrated analysis involving QzType, T1K, and DRAGEN was developed, achieving 100% accuracy for all 11 HLA genes. In conclusion, our study highlighted the combination of targeted short-read sequencing and astute computational analysis as a robust approach for HLA genotyping. Furthermore, the HPRC cohort has emerged as a valuable assembly-based reference in this realm.}, }
@article {pmid38643972, year = {2024}, author = {Kogay, R and Wolf, YI and Koonin, EV}, title = {Defence systems and horizontal gene transfer in bacteria.}, journal = {Environmental microbiology}, volume = {26}, number = {4}, pages = {e16630}, doi = {10.1111/1462-2920.16630}, pmid = {38643972}, issn = {1462-2920}, support = {//Intramural Research Program of the National Institutes of Health (National Library of Medicine)/ ; }, abstract = {Horizontal gene transfer (HGT) is a fundamental process in prokaryotic evolution, contributing significantly to diversification and adaptation. HGT is typically facilitated by mobile genetic elements (MGEs), such as conjugative plasmids and phages, which often impose fitness costs on their hosts. However, a considerable number of bacterial genes are involved in defence mechanisms that limit the propagation of MGEs, suggesting they may actively restrict HGT. In our study, we investigated whether defence systems limit HGT by examining the relationship between the HGT rate and the presence of 73 defence systems across 12 bacterial species. We discovered that only six defence systems, three of which were different CRISPR-Cas subtypes, were associated with a reduced gene gain rate at the species evolution scale. Hosts of these defence systems tend to have a smaller pangenome size and fewer phage-related genes compared to genomes without these systems. This suggests that these defence mechanisms inhibit HGT by limiting prophage integration. We hypothesize that the restriction of HGT by defence systems is species-specific and depends on various ecological and genetic factors, including the burden of MGEs and the fitness effect of HGT in bacterial populations.}, }
@article {pmid38641647, year = {2024}, author = {Lin, MJ and Iyer, S and Chen, NC and Langmead, B}, title = {Measuring, visualizing, and diagnosing reference bias with biastools.}, journal = {Genome biology}, volume = {25}, number = {1}, pages = {101}, pmid = {38641647}, issn = {1474-760X}, support = {R01HG011392/HG/NHGRI NIH HHS/United States ; }, abstract = {Many bioinformatics methods seek to reduce reference bias, but no methods exist to comprehensively measure it. Biastools analyzes and categorizes instances of reference bias. It works in various scenarios: when the donor's variants are known and reads are simulated; when donor variants are known and reads are real; and when variants are unknown and reads are real. Using biastools, we observe that more inclusive graph genomes result in fewer biased sites. We find that end-to-end alignment reduces bias at indels relative to local aligners. Finally, we use biastools to characterize how T2T references improve large-scale bias.}, }
@article {pmid38633703, year = {2024}, author = {Arisan, D and Moya-Beltrán, A and Rojas-Villalobos, C and Issotta, F and Castro, M and Ulloa, R and Chiacchiarini, PA and Díez, B and Martín, AJM and Ñancucheo, I and Giaveno, A and Johnson, DB and Quatrini, R}, title = {Acidithiobacillia class members originating at sites within the Pacific Ring of Fire and other tectonically active locations and description of the novel genus 'Igneacidithiobacillus'.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1360268}, pmid = {38633703}, issn = {1664-302X}, abstract = {Recent studies have expanded the genomic contours of the Acidithiobacillia, highlighting important lacunae in our comprehension of the phylogenetic space occupied by certain lineages of the class. One such lineage is 'Igneacidithiobacillus', a novel genus-level taxon, represented by 'Igneacidithiobacillus copahuensis' VAN18-1[T] as its type species, along with two other uncultivated metagenome-assembled genomes (MAGs) originating from geothermally active sites across the Pacific Ring of Fire. In this study, we investigate the genetic and genomic diversity, and the distribution patterns of several uncharacterized Acidithiobacillia class strains and sequence clones, which are ascribed to the same 16S rRNA gene sequence clade. By digging deeper into this data and contributing to novel MAGs emerging from environmental studies in tectonically active locations, the description of this novel genus has been consolidated. Using state-of-the-art genomic taxonomy methods, we added to already recognized taxa, an additional four novel Candidate (Ca.) species, including 'Ca. Igneacidithiobacillus chanchocoensis' (mCHCt20-1[TS]), 'Igneacidithiobacillus siniensis' (S30A2[T]), 'Ca. Igneacidithiobacillus taupoensis' (TVZ-G3 [TS]), and 'Ca. Igneacidithiobacillus waiarikiensis' (TVZ-G4 [TS]). Analysis of published data on the isolation, enrichment, cultivation, and preliminary microbiological characterization of several of these unassigned or misassigned strains, along with the type species of the genus, plus the recoverable environmental data from metagenomic studies, allowed us to identify habitat preferences of these taxa. Commonalities and lineage-specific adaptations of the seven species of the genus were derived from pangenome analysis and comparative genomic metabolic reconstruction. The findings emerging from this study lay the groundwork for further research on the ecology, evolution, and biotechnological potential of the novel genus 'Igneacidithiobacillus'.}, }
@article {pmid38632370, year = {2024}, author = {Sterzi, L and Nodari, R and Di Marco, F and Ferrando, ML and Saluzzo, F and Spitaleri, A and Allahverdi, H and Papaleo, S and Panelli, S and Rimoldi, SG and Batisti Biffignandi, G and Corbella, M and Cavallero, A and Prati, P and Farina, C and Cirillo, DM and Zuccotti, G and Bandi, C and Comandatore, F}, title = {Genetic barriers more than environmental associations explain Serratia marcescens population structure.}, journal = {Communications biology}, volume = {7}, number = {1}, pages = {468}, pmid = {38632370}, issn = {2399-3642}, abstract = {Bacterial species often comprise well-separated lineages, likely emerged and maintained by genetic isolation and/or ecological divergence. How these two evolutionary actors interact in the shaping of bacterial population structure is currently not fully understood. In this study, we investigate the genetic and ecological drivers underlying the evolution of Serratia marcescens, an opportunistic pathogen with high genomic flexibility and able to colonise diverse environments. Comparative genomic analyses reveal a population structure composed of five deeply-demarcated genetic clusters with open pan-genome but limited inter-cluster gene flow, partially explained by Restriction-Modification (R-M) systems incompatibility. Furthermore, a large-scale research on hundred-thousands metagenomic datasets reveals only a partial habitat separation of the clusters. Globally, two clusters only show a separate gene composition coherent with ecological adaptations. These results suggest that genetic isolation has preceded ecological adaptations in the shaping of the species diversity, an evolutionary scenario coherent with the Evolutionary Extended Synthesis.}, }
@article {pmid38632127, year = {2024}, author = {Jiménez-Edeza, M and Galván-Gordillo, SV and Pacheco-Arjona, R and Castañeda-Ruelas, GM}, title = {Genomic Approach of Listeria monocytogenes Strains Isolated from Deli-Meats in Mexico.}, journal = {Current microbiology}, volume = {81}, number = {6}, pages = {145}, pmid = {38632127}, issn = {1432-0991}, support = {PROFAPI2022/PRO_A7_069//Universidad Autónoma de Sinaloa/ ; }, abstract = {Listeria monocytogenes is a foodborne pathogen that causes listeriosis worldwide. In México, L. monocytogenes has been identified as a hazard of deli-meats. However, the genomic analysis that supports the transmission of L. monocytogenes strains via deli-meats and its role as a source for virulence and resistance genes is lacking. Here, we present four high-quality genome drafts of L. monocytogenes strains isolated from deli-meats in Mexico. In silico typing was used to determine the serotype, lineage, clonal complexes (CC), and multilocus sequence (ST). Also, comparative genomics were performed to explore the diversity, virulence, mobile elements, antimicrobial resistant and stress survival traits. The genome sequence size of these strains measured 3.05 ± 0.07 Mb with a mean value of 37.9%G+C. All strains belonged to linage I, which was divided into two groups: 4b, CC2, ST1 (n = 3) and 1/2b, CC5, ST5 (n = 1). The pangenome and core genome contained 3493 and 2625 genes, respectively. The strains harbor the L. monocytogenes pathogenicity island-1 (LIPI-1) and the same multidrug resistance pattern (fosX, norB, mprF, lin) via in silico analysis. Comparative analysis delineated the genomes as essentially syntenic, whose genomic differences were due to phage insertion. These results expand what is known about the biology of the L. monocytogenes strains isolated from deli-meats in Mexico and warns of the risk that these strains belong to epidemic linage and harbor virulence genes linked to human disease.}, }
@article {pmid38632097, year = {2024}, author = {Yu-Cheng, L and Yan-Ting, S and Zhi-Xi, T}, title = {Frontiers of soybean pan-genome studies.}, journal = {Yi chuan = Hereditas}, volume = {46}, number = {3}, pages = {183-198}, doi = {10.16288/j.yczz.23-321}, pmid = {38632097}, issn = {0253-9772}, abstract = {Artificial domestication provided the original motivation to the blooming of agriculture, following with the dramatic change of the genetic background of crops and livestock. According to theory and technology upgradation that contributing to the omics, we appreciate using the pan-genome instead of single reference genome for crop study. By comparison and integration of multiple genomes under the guidance of pan-genome theory, we can estimate the genomic information range of a species, leading to a global understanding of its genetic diversity. Combining pan-genome with large size chromosomal structural variations, high throughput population resequencing, and multi-omics data, we can profoundly study the genetic basis behind species traits we focus on. Soybean is one of the most important commercial crops over the world. It is also essential to our food security. Dissecting the formation of genetic diversity and the causal loci of key agricultural traits of soybean will make the modern soybean breeding more efficiently. In this review, we summarize the core idea of pan-genome and clarified the characteristics of construction strategies of pan-genome such as de novo/mapping assembly, iterative assembly and graph-based genome. Then we used the soybean pan-genome work as a case study to introduce the general way to study pan-genome. We highlighted the contribution of structural variation (SV) to the evolution/domestication of soybean and its value in understanding the genetic bases of agronomy traits. By those, we approved the value of graph-based pan-genome for data integration and SV calculation. Future research directions are also discussed for crop genomics and data science.}, }
@article {pmid38628868, year = {2024}, author = {Narsing Rao, MP and Singh, RN and Sani, RK and Banerjee, A}, title = {Genome-based approach to evaluate the metabolic potentials and exopolysaccharides production of Bacillus paralicheniformis CamBx3 isolated from a Chilean hot spring.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1377965}, pmid = {38628868}, issn = {1664-302X}, abstract = {In the present study, a thermophilic strain designated CamBx3 was isolated from the Campanario hot spring, Chile. Based on 16S rRNA gene sequence, phylogenomic, and average nucleotide identity analysis the strain CamBx3 was identified as Bacillus paralicheniformis. Genome analysis of B. paralicheniformis CamBx3 revealed the presence of genes related to heat tolerance, exopolysaccharides (EPS), dissimilatory nitrate reduction, and assimilatory sulfate reduction. The pangenome analysis of strain CamBx3 with eight Bacillus spp. resulted in 26,562 gene clusters, 7,002 shell genes, and 19,484 cloud genes. The EPS produced by B. paralicheniformis CamBx3 was extracted, partially purified, and evaluated for its functional activities. B. paralicheniformis CamBx3 EPS with concentration 5 mg mL[-1] showed an optimum 92 mM ferrous equivalent FRAP activity, while the same concentration showed a maximum 91% of Fe[2+] chelating activity. B. paralicheniformis CamBx3 EPS (0.2 mg mL[-1]) demonstrated β-glucosidase inhibition. The EPS formed a viscoelastic gel at 45°C with a maximum instantaneous viscosity of 315 Pa.s at acidic pH 5. The present study suggests that B. paralicheniformis CamBx3 could be a valuable resource for biopolymers and bioactive molecules for industrial applications.}, }
@article {pmid38619241, year = {2024}, author = {Feng, Y and Arsenault, D and Louyakis, AS and Altman-Price, N and Gophna, U and Papke, RT and Gogarten, JP}, title = {Using the pan-genomic framework for the discovery of genomic islands in the haloarchaeon Halorubrum ezzemoulense.}, journal = {mBio}, volume = {}, number = {}, pages = {e0040824}, doi = {10.1128/mbio.00408-24}, pmid = {38619241}, issn = {2150-7511}, abstract = {In this study, we use pan-genomics to characterize the genomic variability of the widely dispersed halophilic archaeal species Halorubrum ezzemoulense (Hez). We include a multi-regional sampling of newly sequenced, high-quality draft genomes. The pan-genome graph of the species reveals 50 genomic islands that represent rare accessory genetic capabilities available to members. Most notably, we observe rearrangements that have led to the insertion/recombination/replacement of mutually exclusive genomic islands in equivalent genome positions ("homeocassettes"). These conflicting islands encode for similar functions, but homologs from islands located between the same core genes exhibit high divergence on the amino acid level, while the neighboring core genes are nearly identical. Both islands of a homeocassette often coexist in the same geographic location, suggesting that either island may be beyond the reach of selective sweeps and that these loci of divergence between Hez members are maintained and persist long term. This implies that subsections of the population have different niche preferences and rare metabolic capabilities. After an evaluation of the gene content in the homeocassettes, we speculate that these islands may play a role in the speciation, niche adaptability, and group selection dynamics in Hez. Though homeocassettes are first described in this study, similar replacements and divergence of genes on genomic islands have been previously reported in other Haloarchaea and distantly related Archaea, suggesting that homeocassettes may be a feature in a wide range of organisms outside of Hez.IMPORTANCEThis study catalogs the rare genes discovered in strains of the species Halorubrum ezzemoulense (Hez), an obligate halophilic archaeon, through the perspective of its pan-genome. These rare genes are often found to be arranged on islands that confer metabolic and transport functions and contain genes that have eluded previous studies. The discovery of divergent, but homologous islands occupying equivalent genome positions ("homeocassettes") in different genomes, reveals significant new information on genome evolution in Hez. Homeocassette pairs encode for similar functions, but their dissimilarity and distribution imply high rates of recombination, different specializations, and niche preferences in Hez. The coexistence of both islands of a homeocassette pair in multiple environments demonstrates that both islands are beyond the reach of selective sweeps and that these genome content differences between strains persist long term. The switch between islands through recombination under different environmental conditions may lead to a greater range of niche adaptability in Hez.}, }
@article {pmid38613721, year = {2024}, author = {Zhang, H and Su, X and Zheng, X and Liu, M and Zhao, C and Liu, X and Ma, Z and Zhang, S and Zhang, W}, title = {vB_EcoM-P896 coliphage isolated from duck sewage can lyse both intestinal pathogenic Escherichia coli and extraintestinal pathogenic E. coli.}, journal = {International microbiology : the official journal of the Spanish Society for Microbiology}, volume = {}, number = {}, pages = {}, pmid = {38613721}, issn = {1618-1905}, support = {2022AH052192//Natural Science Foundation of Anhui Province/ ; 2022AH052192//Natural Science Foundation of Anhui Province/ ; 2022AH052192//Natural Science Foundation of Anhui Province/ ; gxgnfx2022135//The Project of Cultivating Outstanding Talents in Colleges/ ; gxgnfx2022135//The Project of Cultivating Outstanding Talents in Colleges/ ; gxgnfx2022135//The Project of Cultivating Outstanding Talents in Colleges/ ; KJ2021ZD0153//The Natural Science Research Project of Anhui Province/ ; KJ2021ZD0153//The Natural Science Research Project of Anhui Province/ ; KJ2021ZD0153//The Natural Science Research Project of Anhui Province/ ; wzykjtd202002//Wuhu Institute of Technology level science and technology team/ ; wzykjtd202002//Wuhu Institute of Technology level science and technology team/ ; wzykjtd202002//Wuhu Institute of Technology level science and technology team/ ; }, abstract = {Pathogenic Escherichia coli strains cause diseases in both humans and animals. The limiting factors to prevent as well as control infections from pathogenic E. coli strains are their pathotypes, serotypes, and drug resistance. Herein, a bacteriophage (vB_EcoM-P896) has been isolated from duck sewage. Furthermore, aside from targeting intestinal pathogenic E. coli strains like enteropathogenic E. coli, Shiga toxin-producing E. coli, entero-invasive E. coli, and enteroaggregative E. coli, vB_EcoM-P896 can cause lysis in extraintestinal pathogenic E. coli strains such as avian pathogenic E. coli. Stability analysis revealed that vB_EcoM-P896 was stable under the following conditions: temperature, 4℃-50℃; pH, 3-11. The sequencing of the vB_EcoM-P896 genome was conducted utilizing an HiSeq system (Illumina, San Diego, CA) and subjected to de novo assembling with the aid of Spades 3.11.1. The characteristics of the DNA genome were as follows: size, 170,656 bp; GC content, 40.4%; the number of putative coding regions, 294. Transmission electron microscopy analysis of morphology and genome analysis revealed that the phage vB_EcoM-P896 belonged to the order Caudovirales and the family Myoviridae. The pan-genome analysis of vB_EcoM-P896 was divided into two levels. The first level involved the analysis of 91 strains of muscle tail phages, which were mainly divided into 5 groups. The second level involved the analysis of 24 strains of myophage with high homology. Of the 1480 gene clusters, 23 were shared core genes. Neighbor-joining phylogenetic trees were constructed using the Poisson model with MEGA6.0 based on the conserved sequences of phage proteins, the amino acid sequence of the terminase large subunit, and tail fibrin. Further analysis revealed that vB_EcoM-P896 was a typical T4-like potent phage with potential clinical applications.}, }
@article {pmid38613617, year = {2024}, author = {Aziz, T and Hangyu, H and Naveed, M and Shabbir, MA and Sarwar, A and Nasbeeb, J and Zhennai, Y and Alharbi, M}, title = {Genotypic Profiling, Functional Analysis, Cholesterol-Lowering Ability, and Angiotensin I-Converting Enzyme (ACE) Inhibitory Activity of Probiotic Lactiplantibacillus plantarum K25 via Different Approaches.}, journal = {Probiotics and antimicrobial proteins}, volume = {}, number = {}, pages = {}, pmid = {38613617}, issn = {1867-1314}, support = {2017YFE0131800//National Natural Science Foundation of China/ ; }, abstract = {Due to its alleged health advantages, several uses in biotechnology and food safety, the well-known probiotic strain Lactiplantibacillus plantarum K25 has drawn interest. This in-depth investigation explores the genetic diversity, makeup, and security characteristics of the microbial genome of L. plantarum K25, providing insightful knowledge about its genotypic profile and functional characteristics. Utilizing cutting-edge bioinformatics techniques like comparative genomics, pan-genomics, and genotypic profiling was carried out to reveal the strain's multidimensional potential in various fields. The results not only add to our understanding of the genetic makeup of L. plantarum K25 but also show off its acceptability in various fields, notably in biotechnology and food safety. The explanation of evolutionary links, which highlights L. plantarum K25's aptitude as a probiotic, is one notable finding from this research. Its safety profile, which is emphasized by the absence of genes linked to antibiotic resistance, is crucial and supports its status as a promising probiotic option.}, }
@article {pmid38612691, year = {2024}, author = {Chen, L and Chen, K and Xi, X and Du, X and Zou, X and Ma, Y and Song, Y and Luo, C and Weining, S}, title = {The Evolution, Expression Patterns, and Domestication Selection Analysis of the Annexin Gene Family in the Barley Pan-Genome.}, journal = {International journal of molecular sciences}, volume = {25}, number = {7}, pages = {}, doi = {10.3390/ijms25073883}, pmid = {38612691}, issn = {1422-0067}, abstract = {Plant annexins constitute a conserved protein family that plays crucial roles in regulating plant growth and development, as well as in responses to both biotic and abiotic stresses. In this study, a total of 144 annexin genes were identified in the barley pan-genome, comprising 12 reference genomes, including cultivated barley, landraces, and wild barley. Their chromosomal locations, physical-chemical characteristics, gene structures, conserved domains, and subcellular localizations were systematically analyzed to reveal the certain differences between wild and cultivated populations. Through a cis-acting element analysis, co-expression network, and large-scale transcriptome analysis, their involvement in growth, development, and responses to various stressors was highlighted. It is worth noting that HvMOREXann5 is only expressed in pistils and anthers, indicating its crucial role in reproductive development. Based on the resequencing data from 282 barley accessions worldwide, genetic variations in thefamily were investigated, and the results showed that 5 out of the 12 identified HvMOREXanns were affected by selection pressure. Genetic diversity and haplotype frequency showed notable reductions between wild and domesticated barley, suggesting that a genetic bottleneck occurred on the annexin family during the barley domestication process. Finally, qRT-PCR analysis confirmed the up-regulation of HvMOREXann7 under drought stress, along with significant differences between wild accessions and varieties. This study provides some insights into the genome organization and genetic characteristics of the annexin gene family in barley at the pan-genome level, which will contribute to better understanding its evolution and function in barley and other crops.}, }
@article {pmid38608279, year = {2024}, author = {Wong, B and Ferguson, JM and Do, JY and Gamaarachchi, H and Deveson, IW}, title = {Streamlining remote nanopore data access with slow5curl.}, journal = {GigaScience}, volume = {13}, number = {}, pages = {}, doi = {10.1093/gigascience/giae016}, pmid = {38608279}, issn = {2047-217X}, support = {MRF1173594//Australian Medical Research Futures Fund/ ; DE230100178//Australian Research Council/ ; }, abstract = {BACKGROUND: As adoption of nanopore sequencing technology continues to advance, the need to maintain large volumes of raw current signal data for reanalysis with updated algorithms is a growing challenge. Here we introduce slow5curl, a software package designed to streamline nanopore data sharing, accessibility, and reanalysis.
RESULTS: Slow5curl allows a user to fetch a specified read or group of reads from a raw nanopore dataset stored on a remote server, such as a public data repository, without downloading the entire file. Slow5curl uses an index to quickly fetch specific reads from a large dataset in SLOW5/BLOW5 format and highly parallelized data access requests to maximize download speeds. Using all public nanopore data from the Human Pangenome Reference Consortium (>22 TB), we demonstrate how slow5curl can be used to quickly fetch and reanalyze raw signal reads corresponding to a set of target genes from each individual in large cohort dataset (n = 91), minimizing the time, egress costs, and local storage requirements for their reanalysis.
CONCLUSIONS: We provide slow5curl as a free, open-source package that will reduce frictions in data sharing for the nanopore community: https://github.com/BonsonW/slow5curl.}, }
@article {pmid38607544, year = {2024}, author = {Wang, J and Hu, H and Jiang, X and Zhang, S and Yang, W and Dong, J and Yang, T and Ma, Y and Zhou, L and Chen, J and Nie, S and Liu, C and Ning, Y and Zhu, X and Liu, B and Yang, J and Zhao, J}, title = {Pangenome-Wide Association Study and Transcriptome Analysis Reveal a Novel QTL and Candidate Genes Controlling both Panicle and Leaf Blast Resistance in Rice.}, journal = {Rice (New York, N.Y.)}, volume = {17}, number = {1}, pages = {27}, pmid = {38607544}, issn = {1939-8425}, support = {2020A1515111025//the GuangDong Basic and Applied Basic Research Foundation/ ; 2022NPY00005//Seed industry revitalization project of special fund for rural revitalization strategy in Guangdong Province/ ; 32161143009//the National Natural Science Foundation of China/ ; G2022030024L//the National Project/ ; 2023B1212060042//Guangdong Key Laboratory of New Technology in Rice Breeding/ ; 2023KJ106//the Innovation Team Project of Guangdong Modern Agricultural Industrial System/ ; }, abstract = {Cultivating rice varieties with robust blast resistance is the most effective and economical way to manage the rice blast disease. However, rice blast disease comprises leaf and panicle blast, which are different in terms of resistance mechanisms. While many blast resistant rice cultivars were bred using genes conferring resistance to only leaf or panicle blast, mining durable and effective quantitative trait loci (QTLs) for both panicle and leaf blast resistance is of paramount importance. In this study, we conducted a pangenome-wide association study (panGWAS) on 9 blast resistance related phenotypes using 414 international diverse rice accessions from an international rice panel. This approach led to the identification of 74 QTLs associated with rice blast resistance. One notable locus, qPBR1, validated in a F4:5 population and fine-mapped in a Heterogeneous Inbred Family (HIF), exhibited broad-spectrum, major and durable blast resistance throughout the growth period. Furthermore, we performed transcriptomic analysis of 3 resistant and 3 sensitive accessions at different time points after infection, revealing 3,311 differentially expressed genes (DEGs) potentially involved in blast resistance. Integration of the above results identified 6 candidate genes within the qPBR1 locus, with no significant negative effect on yield. The results of this study provide valuable germplasm resources, QTLs, blast response genes and candidate functional genes for developing rice varieties with enduring and broad-spectrum blast resistance. The qPBR1, in particular, holds significant potential for breeding new rice varieties with comprehensive and durable resistance throughout their growth period.}, }
@article {pmid38604355, year = {2024}, author = {Yin, Z and Liang, J and Zhang, M and Chen, B and Yu, Z and Tian, X and Deng, X and Peng, L}, title = {Pan-genome insights into adaptive evolution of bacterial symbionts in mixed host-microbe symbioses represented by human gut microbiota Bacteroides cellulosilyticus.}, journal = {The Science of the total environment}, volume = {}, number = {}, pages = {172251}, doi = {10.1016/j.scitotenv.2024.172251}, pmid = {38604355}, issn = {1879-1026}, abstract = {Animal hosts harbor diverse assemblages of microbial symbionts that play crucial roles in the host's lifestyle. The link between microbial symbiosis and host development remains poorly understood. In particular, little is known about the adaptive evolution of gut bacteria in host-microbe symbioses. Recently, symbiotic relationships have been categorized as open, closed, or mixed, reflecting their modes of inter-host transmission and resulting in distinct genomic features. Members of the genus Bacteroides are the most abundant human gut microbiota and possess both probiotic and pathogenic potential, providing an excellent model for studying pan-genome evolution in symbiotic systems. Here, we determined the complete genome of an novel clinical strain PL2022, which was isolated from a blood sample and performed pan-genome analyses on a representative set of Bacteroides cellulosilyticus strains to quantify the influence of the symbiotic relationship on the evolutionary dynamics. B. cellulosilyticus exhibited correlated genomic features with both open and closed symbioses, suggesting a mixed symbiosis. An open pan-genome is characterized by abundant accessory gene families, potential horizontal gene transfer (HGT), and diverse mobile genetic elements (MGEs), indicating an innovative gene pool, mainly associated with genomic islands and plasmids. However, massive parallel gene loss, weak purifying selection, and accumulation of positively selected mutations were the main drivers of genome reduction in B. cellulosilyticus. Metagenomic read recruitment analyses showed that B. cellulosilyticus members are globally distributed and active in human gut habitats, in line with predominant vertical transmission in the human gut. However, existence and/or high abundance were also detected in non-intestinal tissues, other animal hosts, and non-host environments, indicating occasional horizontal transmission to new niches, thereby creating arenas for the acquisition of novel genes. This case study of adaptive evolution under a mixed host-microbe symbiosis advances our understanding of symbiotic pan-genome evolution. Our results highlight the complexity of genetic evolution in this unusual intestinal symbiont.}, }
@article {pmid38605417, year = {2024}, author = {Roder, T and Pimentel, G and Fuchsmann, P and Stern, MT and von Ah, U and Vergères, G and Peischl, S and Brynildsrud, O and Bruggmann, R and Bär, C}, title = {Scoary2: rapid association of phenotypic multi-omics data with microbial pan-genomes.}, journal = {Genome biology}, volume = {25}, number = {1}, pages = {93}, pmid = {38605417}, issn = {1474-760X}, support = {GRS-070/17//Gebert Rüf Stiftung/ ; }, abstract = {Unraveling bacterial gene function drives progress in various areas, such as food production, pharmacology, and ecology. While omics technologies capture high-dimensional phenotypic data, linking them to genomic data is challenging, leaving 40-60% of bacterial genes undescribed. To address this bottleneck, we introduce Scoary2, an ultra-fast microbial genome-wide association studies (mGWAS) software. With its data exploration app and improved performance, Scoary2 is the first tool to enable the study of large phenotypic datasets using mGWAS. As proof of concept, we explore the metabolome of yogurts, each produced with a different Propionibacterium reichii strain and discover two genes affecting carnitine metabolism.}, }
@article {pmid38605254, year = {2024}, author = {Cunha-Ferreira, IC and Vizzotto, CS and Freitas, MAM and Peixoto, J and Carvalho, LS and Tótola, MR and Thompson, FL and Krüger, RH}, title = {Genomic and physiological characterization of Kitasatospora sp. nov., an actinobacterium with potential for biotechnological application isolated from Cerrado soil.}, journal = {Brazilian journal of microbiology : [publication of the Brazilian Society for Microbiology]}, volume = {}, number = {}, pages = {}, pmid = {38605254}, issn = {1678-4405}, support = {Conselho Nacional de Desenvolvimento Científico e Tecnológico//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; Coordenação de Aperfeiçoamento de Pessoal de Nível Superior//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; Fundação de Apoio à Pesquisa do Distrito Federal//Fundação de Apoio à Pesquisa do Distrito Federal/ ; Fundação Carlos Chagas Filho de Amparo à Pesquisa do Estado do Rio de Janeiro//Fundação Carlos Chagas Filho de Amparo à Pesquisa do Estado do Rio de Janeiro/ ; }, abstract = {An Actinobacteria - Kitasatospora sp. K002 - was isolated from the soil of Cerrado, a savanna-like Brazilian biome. Herein, we conducted a phylogenetic, phenotypic and physiological characterization, revealing its potential for biotechnological applications. Kitasatospora sp. K002 is an aerobic, non-motile, Gram-positive bacteria that forms grayish-white mycelium on solid cultures and submerged spores with vegetative mycelia on liquid cultures. The strain showed antibacterial activity against Bacillus subtilis, Pseudomonas aeruginosa and Escherichia coli. Genomic analysis indicated that Kitasatospora xanthocidica JCM 4862 is the closest strain to K002, with a dDDH of 32.8-37.8% and an ANI of 86.86% and the pangenome investigations identified a high number of rare genes. A total of 60 gene clusters of 22 different types were detected by AntiSMASH, and 22 gene clusters showed low similarity (< 10%) with known compounds, which suggests the potential production of novel bioactive compounds. In addition, phylogenetic analysis and morphophysiological characterization clearly distinguished Kitasatospora sp. K002 from other related species. Therefore, we propose that Kitasatospora sp. K002 should be recognized as a new species of the genus Kitasatospora - Kitasatospora brasiliensis sp. nov. (type strains = K002).}, }
@article {pmid38605175, year = {2024}, author = {Lian, Q and Huettel, B and Walkemeier, B and Mayjonade, B and Lopez-Roques, C and Gil, L and Roux, F and Schneeberger, K and Mercier, R}, title = {A pan-genome of 69 Arabidopsis thaliana accessions reveals a conserved genome structure throughout the global species range.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {38605175}, issn = {1546-1718}, support = {TRR 341/1-456082119//Deutsche Forschungsgemeinschaft (German Research Foundation)/ ; }, abstract = {Although originally primarily a system for functional biology, Arabidopsis thaliana has, owing to its broad geographical distribution and adaptation to diverse environments, developed into a powerful model in population genomics. Here we present chromosome-level genome assemblies of 69 accessions from a global species range. We found that genomic colinearity is very conserved, even among geographically and genetically distant accessions. Along chromosome arms, megabase-scale rearrangements are rare and typically present only in a single accession. This indicates that the karyotype is quasi-fixed and that rearrangements in chromosome arms are counter-selected. Centromeric regions display higher structural dynamics, and divergences in core centromeres account for most of the genome size variations. Pan-genome analyses uncovered 32,986 distinct gene families, 60% being present in all accessions and 40% appearing to be dispensable, including 18% private to a single accession, indicating unexplored genic diversity. These 69 new Arabidopsis thaliana genome assemblies will empower future genetic research.}, }
@article {pmid38600518, year = {2024}, author = {Hong, A and Oliva, M and Köppl, D and Bannai, H and Boucher, C and Gagie, T}, title = {Pfp-fm: an accelerated FM-index.}, journal = {Algorithms for molecular biology : AMB}, volume = {19}, number = {1}, pages = {15}, pmid = {38600518}, issn = {1748-7188}, support = {R01AI14180/HG/NHGRI NIH HHS/United States ; }, abstract = {FM-indexes are crucial data structures in DNA alignment, but searching with them usually takes at least one random access per character in the query pattern. Ferragina and Fischer [1] observed in 2007 that word-based indexes often use fewer random accesses than character-based indexes, and thus support faster searches. Since DNA lacks natural word-boundaries, however, it is necessary to parse it somehow before applying word-based FM-indexing. In 2022, Deng et al. [2] proposed parsing genomic data by induced suffix sorting, and showed that the resulting word-based FM-indexes support faster counting queries than standard FM-indexes when patterns are a few thousand characters or longer. In this paper we show that using prefix-free parsing-which takes parameters that let us tune the average length of the phrases-instead of induced suffix sorting, gives a significant speedup for patterns of only a few hundred characters. We implement our method and demonstrate it is between 3 and 18 times faster than competing methods on queries to GRCh38, and is consistently faster on queries made to 25,000, 50,000 and 100,000 SARS-CoV-2 genomes. Hence, it seems our method accelerates the performance of count over all state-of-the-art methods with a moderate increase in the memory. The source code for PFP - FM is available at https://github.com/AaronHong1024/afm .}, }
@article {pmid38592762, year = {2024}, author = {Lazaridi, E and Kapazoglou, A and Gerakari, M and Kleftogianni, K and Passa, K and Sarri, E and Papasotiropoulos, V and Tani, E and Bebeli, PJ}, title = {Crop Landraces and Indigenous Varieties: A Valuable Source of Genes for Plant Breeding.}, journal = {Plants (Basel, Switzerland)}, volume = {13}, number = {6}, pages = {}, doi = {10.3390/plants13060758}, pmid = {38592762}, issn = {2223-7747}, abstract = {Landraces and indigenous varieties comprise valuable sources of crop species diversity. Their utilization in plant breeding may lead to increased yield and enhanced quality traits, as well as resilience to various abiotic and biotic stresses. Recently, new approaches based on the rapid advancement of genomic technologies such as deciphering of pangenomes, multi-omics tools, marker-assisted selection (MAS), genome-wide association studies (GWAS), and CRISPR/Cas9 gene editing greatly facilitated the exploitation of landraces in modern plant breeding. In this paper, we present a comprehensive overview of the implementation of new genomic technologies and highlight their importance in pinpointing the genetic basis of desirable traits in landraces and indigenous varieties of annual, perennial herbaceous, and woody crop species cultivated in the Mediterranean region. The need for further employment of advanced -omic technologies to unravel the full potential of landraces and indigenous varieties underutilized genetic diversity is also indicated. Ultimately, the large amount of genomic data emerging from the investigation of landraces and indigenous varieties reveals their potential as a source of valuable genes and traits for breeding. The role of landraces and indigenous varieties in mitigating the ongoing risks posed by climate change in agriculture and food security is also highlighted.}, }
@article {pmid38591882, year = {2024}, author = {Xing, Y and Clark, JR and Chang, JD and Zulk, JJ and Chirman, DM and Piedra, F-A and Vaughan, EE and Hernandez Santos, HJ and Patras, KA and Maresso, AW}, title = {Progress toward a vaccine for extraintestinal pathogenic E. coli (ExPEC) II: efficacy of a toxin-autotransporter dual antigen approach.}, journal = {Infection and immunity}, volume = {}, number = {}, pages = {e0044023}, doi = {10.1128/iai.00440-23}, pmid = {38591882}, issn = {1098-5522}, abstract = {Extraintestinal pathogenic Escherichia coli (ExPEC) is a leading cause of worldwide morbidity and mortality, the top cause of antimicrobial-resistant (AMR) infections, and the most frequent cause of life-threatening sepsis and urinary tract infections (UTI) in adults. The development of an effective and universal vaccine is complicated by this pathogen's pan-genome, its ability to mix and match virulence factors and AMR genes via horizontal gene transfer, an inability to decipher commensal from pathogens, and its intimate association and co-evolution with mammals. Using a pan virulome analysis of >20,000 sequenced E. coli strains, we identified the secreted cytolysin α-hemolysin (HlyA) as a high priority target for vaccine exploration studies. We demonstrate that a catalytically inactive pure form of HlyA, expressed in an autologous host using its own secretion system, is highly immunogenic in a murine host, protects against several forms of ExPEC infection (including lethal bacteremia), and significantly lowers bacterial burdens in multiple organ systems. Interestingly, the combination of a previously reported autotransporter (SinH) with HlyA was notably effective, inducing near complete protection against lethal challenge, including commonly used infection strains ST73 (CFT073) and ST95 (UTI89), as well as a mixture of 10 of the most highly virulent sequence types and strains from our clinical collection. Both HlyA and HlyA-SinH combinations also afforded some protection against UTI89 colonization in a murine UTI model. These findings suggest recombinant, inactive hemolysin and/or its combination with SinH warrant investigation in the development of an E. coli vaccine against invasive disease.}, }
@article {pmid38585972, year = {2024}, author = {Marin, MG and Wippel, C and Quinones-Olvera, N and Behruznia, M and Jeffrey, BM and Harris, M and Mann, BC and Rosenthal, A and Jacobson, KR and Warren, RM and Li, H and Meehan, CJ and Farhat, MR}, title = {Analysis of the limited M. tuberculosis accessory genome reveals potential pitfalls of pan-genome analysis approaches.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.03.21.586149}, pmid = {38585972}, abstract = {Pan-genome analysis is a fundamental tool in the study of bacterial genome evolution. Benchmarking the accuracy of pan-genome analysis methods is challenging, because it can be significantly influenced by both the methodology used to compare genomes, as well as differences in the accuracy and representativeness of the genomes analyzed. In this work, we curated a collection of 151 Mycobacterium tuberculosis (Mtb) isolates to evaluate sources of variability in pan-genome analysis. Mtb is characterized by its clonal evolution, absence of horizontal gene transfer, and limited accessory genome, making it an ideal test case for this study. Using a state-of-the-art graph-genome approach, we found that a majority of the structural variation observed in Mtb originates from rearrangement, deletion, and duplication of redundant nucleotide sequences. In contrast, we found that pan-genome analyses that focus on comparison of coding sequences (at the amino acid level) can yield surprisingly variable results, driven by differences in assembly quality and the softwares used. Upon closer inspection, we found that coding sequence annotation discrepancies were a major contributor to inflated Mtb accessory genome estimates. To address this, we developed panqc, a software that detects annotation discrepancies and collapses nucleotide redundancy in pan-genome estimates. We characterized the effect of the panqc adjustment on both pan-genome analysis of Mtb and E. coli genomes, and highlight how different levels of genomic diversity are prone to unique biases. Overall, this study illustrates the need for careful methodological selection and quality control to accurately map the evolutionary dynamics of a bacterial species.}, }
@article {pmid38584940, year = {2024}, author = {Carhuaricra-Huaman, D and Gonzalez, IHL and Ramos, PL and da Silva, AM and Setubal, JC}, title = {Analysis of twelve genomes of the bacterium Kerstersia gyiorum from brown-throated sloths (Bradypus variegatus), the first from a non-human host.}, journal = {PeerJ}, volume = {12}, number = {}, pages = {e17206}, pmid = {38584940}, issn = {2167-8359}, abstract = {Kerstersia gyiorum is a Gram-negative bacterium found in various animals, including humans, where it has been associated with various infections. Knowledge of the basic biology of K. gyiorum is essential to understand the evolutionary strategies of niche adaptation and how this organism contributes to infectious diseases; however, genomic data about K. gyiorum is very limited, especially from non-human hosts. In this work, we sequenced 12 K. gyiorum genomes isolated from healthy free-living brown-throated sloths (Bradypus variegatus) in the Parque Estadual das Fontes do Ipiranga (São Paulo, Brazil), and compared them with genomes from isolates of human origin, in order to gain insights into genomic diversity, phylogeny, and host specialization of this species. Phylogenetic analysis revealed that these K. gyiorum strains are structured according to host. Despite the fact that sloth isolates were sampled from a single geographic location, the intra-sloth K. gyiorum diversity was divided into three clusters, with differences of more than 1,000 single nucleotide polymorphisms between them, suggesting the circulation of various K. gyiorum lineages in sloths. Genes involved in mobilome and defense mechanisms against mobile genetic elements were the main source of gene content variation between isolates from different hosts. Sloth-specific K. gyiorum genome features include an IncN2 plasmid, a phage sequence, and a CRISPR-Cas system. The broad diversity of defense elements in K. gyiorum (14 systems) may prevent further mobile element flow and explain the low amount of mobile genetic elements in K. gyiorum genomes. Gene content variation may be important for the adaptation of K. gyiorum to different host niches. This study furthers our understanding of diversity, host adaptation, and evolution of K. gyiorum, by presenting and analyzing the first genomes of non-human isolates.}, }
@article {pmid38580497, year = {2024}, author = {Wolfe, JM}, title = {Pangenomes at the limits of evolution.}, journal = {Trends in ecology & evolution}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.tree.2024.03.008}, pmid = {38580497}, issn = {1872-8383}, abstract = {Evolutionary pathways can be random or deterministic. In a recent article, Beavan et al. investigate this balance by applying machine learning models to microbial pangenomes. The presence of almost one-third of genes can be reliably inferred, indicating a surprising amount of predictable evolution.}, }
@article {pmid38577974, year = {2024}, author = {Cannon, EK and Portwood, JL and Hayford, RK and Haley, OC and Gardiner, JM and Andorf, CM and Woodhouse, MR}, title = {Enhanced pan-genomic resources at the maize genetics and genomics database.}, journal = {Genetics}, volume = {}, number = {}, pages = {}, doi = {10.1093/genetics/iyae036}, pmid = {38577974}, issn = {1943-2631}, support = {//US Department of Agriculture/ ; 5030-21000-068-00-D//Agricultural Research Service/ ; 440229//Corn Insects and Crop Genetics Research Unit in Ames, Iowa/ ; }, abstract = {Pan-genomes, encompassing the entirety of genetic sequences found in a collection of genomes within a clade, are more useful than single reference genomes for studying species diversity. This is especially true for a species like Zea mays, which has a particularly diverse and complex genome. Presenting pan-genome data, analyses, and visualization is challenging, especially for a diverse species, but more so when pan-genomic data is linked to extensive gene model and gene data, including classical gene information, markers, insertions, expression and proteomic data, and protein structures as is the case at MaizeGDB. Here, we describe MaizeGDB's expansion to include the genic subset of the Zea pan-genome in a pan-gene data center featuring the maize genomes hosted at MaizeGDB, and the outgroup teosinte Zea genomes from the Pan-Andropoganeae project. The new data center offers a variety of browsing and visualization tools, including sequence alignment visualization, gene trees and other tools, to explore pan-genes in Zea that were calculated by the pipeline Pandagma. Combined, these data will help maize researchers study the complexity and diversity of Zea, and to use the comparative functions to validate pan-gene relationships for a selected gene model.}, }
@article {pmid38576793, year = {2024}, author = {Borowska-Beszta, M and Smoktunowicz, M and Horoszkiewicz, D and Jonca, J and Waleron, MM and Gawor, J and Mika, A and Sledzinski, T and Waleron, K and Waleron, M}, title = {Comparative genomics, pangenomics, and phenomic studies of Pectobacterium betavasculorum strains isolated from sugar beet, potato, sunflower, and artichoke: insights into pathogenicity, virulence determinants, and adaptation to the host plant.}, journal = {Frontiers in plant science}, volume = {15}, number = {}, pages = {1352318}, pmid = {38576793}, issn = {1664-462X}, abstract = {INTRODUCTION: Bacteria of genus Pectobacterium, encompassing economically significant pathogens affecting various plants, includes the species P. betavasculorum, initially associated with beetroot infection. However, its host range is much broader. It causes diseases of sunflower, potato, tomato, carrots, sweet potato, radish, squash, cucumber, and chrysanthemum. To explain this phenomenon, a comprehensive pathogenomic and phenomic characterisation of P. betavasculorum species was performed.
METHODS: Genomes of P. betavasculorum strains isolated from potato, sunflower, and artichoke were sequenced and compared with those from sugar beet isolates. Metabolic profiling and pathogenomic analyses were conducted to assess virulence determinants and adaptation potential. Pathogenicity assays were performed on potato tubers and chicory leaves to confirm in silico predictions of disease symptoms. Phenotypic assays were also conducted to assess the strains ability to synthesise homoserine lactones and siderophores.
RESULTS: The genome size ranged from 4.675 to 4.931 kbp, and GC % was between 51.0% and 51.2%. The pangenome of P. betavasculorum is open and comprises, on average, 4,220 gene families. Of these, 83% of genes are the core genome, and 2% of the entire pangenome are unique genes. Strains isolated from sugar beet have a smaller pangenome size and a higher number of unique genes than those from other plants. Interestingly, genomes of strains from artichoke and sunflower share 391 common CDS that are not present in the genomes of other strains from sugar beet or potato. Those strains have only one unique gene. All strains could use numerous sugars as building materials and energy sources and possessed a high repertoire of virulence determinants in the genomes. P. betavasculorum strains were able to cause disease symptoms on potato tubers and chicory leaves. They were also able to synthesise homoserine lactones and siderophores.
DISCUSSION: The findings underscore the adaptability of P. betavasculorum to diverse hosts and environments. Strains adapted to plants with high sugar content in tissues have a different composition of fatty acids in membranes and a different mechanism of replenishing nitrogen in case of deficiency of this compound than strains derived from other plant species. Extensive phenomics and genomic analyses performed in this study have shown that P. betavasculorum species is an agronomically relevant pathogen.}, }
@article {pmid38506894, year = {2024}, author = {Baek, J and Lawson, J and Rahimzadeh, V}, title = {Investigating the Roles and Responsibilities of Institutional Signing Officials After Data Sharing Policy Reform for Federally Funded Research in the United States: National Survey.}, journal = {JMIR formative research}, volume = {8}, number = {}, pages = {e49822}, pmid = {38506894}, issn = {2561-326X}, abstract = {BACKGROUND: New federal policies along with rapid growth in data generation, storage, and analysis tools are together driving scientific data sharing in the United States. At the same, triangulating human research data from diverse sources can also create situations where data are used for future research in ways that individuals and communities may consider objectionable. Institutional gatekeepers, namely, signing officials (SOs), are therefore at the helm of compliant management and sharing of human data for research. Of those with data governance responsibilities, SOs most often serve as signatories for investigators who deposit, access, and share research data between institutions. Although SOs play important leadership roles in compliant data sharing, we know surprisingly little about their scope of work, roles, and oversight responsibilities.
OBJECTIVE: The purpose of this study was to describe existing institutional policies and practices of US SOs who manage human genomic data access, as well as how these may change in the wake of new Data Management and Sharing requirements for National Institutes of Health-funded research in the United States.
METHODS: We administered an anonymous survey to institutional SOs recruited from biomedical research institutions across the United States. Survey items probed where data generated from extramurally funded research are deposited, how researchers outside the institution access these data, and what happens to these data after extramural funding ends.
RESULTS: In total, 56 institutional SOs participated in the survey. We found that SOs frequently approve duplicate data deposits and impose stricter access controls when data use limitations are unclear or unspecified. In addition, 21% (n=12) of SOs knew where data from federally funded projects are deposited after project funding sunsets. As a consequence, most investigators deposit their scientific data into "a National Institutes of Health-funded repository" to meet the Data Management and Sharing requirements but also within the "institution's own repository" or a third-party repository.
CONCLUSIONS: Our findings inform 5 policy recommendations and best practices for US SOs to improve coordination and develop comprehensive and consistent data governance policies that balance the need for scientific progress with effective human data protections.}, }
@article {pmid38573185, year = {2024}, author = {Hall, MB and Coin, LJM}, title = {Pangenome databases improve host removal and mycobacteria classification from clinical metagenomic data.}, journal = {GigaScience}, volume = {13}, number = {}, pages = {}, doi = {10.1093/gigascience/giae010}, pmid = {38573185}, issn = {2047-217X}, support = {2020/MRF1200856//Australian Government Medical Research Future Fund/ ; //Genomics Health Futures Mission/ ; }, abstract = {BACKGROUND: Culture-free real-time sequencing of clinical metagenomic samples promises both rapid pathogen detection and antimicrobial resistance profiling. However, this approach introduces the risk of patient DNA leakage. To mitigate this risk, we need near-comprehensive removal of human DNA sequences at the point of sequencing, typically involving the use of resource-constrained devices. Existing benchmarks have largely focused on the use of standardized databases and largely ignored the computational requirements of depletion pipelines as well as the impact of human genome diversity.
RESULTS: We benchmarked host removal pipelines on simulated and artificial real Illumina and Nanopore metagenomic samples. We found that construction of a custom kraken database containing diverse human genomes results in the best balance of accuracy and computational resource usage. In addition, we benchmarked pipelines using kraken and minimap2 for taxonomic classification of Mycobacterium reads using standard and custom databases. With a database representative of the Mycobacterium genus, both tools obtained improved specificity and sensitivity, compared to the standard databases for classification of Mycobacterium tuberculosis. Computational efficiency of these custom databases was superior to most standard approaches, allowing them to be executed on a laptop device.
CONCLUSIONS: Customized pangenome databases provide the best balance of accuracy and computational efficiency when compared to standard databases for the task of human read removal and M. tuberculosis read classification from metagenomic samples. Such databases allow for execution on a laptop, without sacrificing accuracy, an especially important consideration in low-resource settings. We make all customized databases and pipelines freely available.}, }
@article {pmid38567256, year = {2024}, author = {Olbrich, M and Bartels, L and Wohlers, I}, title = {Sequencing technologies and hardware-accelerated parallel computing transform computational genomics research.}, journal = {Frontiers in bioinformatics}, volume = {4}, number = {}, pages = {1384497}, pmid = {38567256}, issn = {2673-7647}, }
@article {pmid38567138, year = {2024}, author = {Jiang, M and Chen, M and Zeng, J and Du, Z and Xiao, J}, title = {A comprehensive evaluation of the potential of three next-generation short-read-based plant pan-genome construction strategies for the identification of novel non-reference sequence.}, journal = {Frontiers in plant science}, volume = {15}, number = {}, pages = {1371222}, pmid = {38567138}, issn = {1664-462X}, abstract = {Pan-genome studies are important for understanding plant evolution and guiding the breeding of crops by containing all genomic diversity of a certain species. Three short-read-based strategies for plant pan-genome construction include iterative individual, iteration pooling, and map-to-pan. Their performance is very different under various conditions, while comprehensive evaluations have yet to be conducted nowadays. Here, we evaluate the performance of these three pan-genome construction strategies for plants under different sequencing depths and sample sizes. Also, we indicate the influence of length and repeat content percentage of novel sequences on three pan-genome construction strategies. Besides, we compare the computational resource consumption among the three strategies. Our findings indicate that map-to-pan has the greatest recall but the lowest precision. In contrast, both two iterative strategies have superior precision but lower recall. Factors of sample numbers, novel sequence length, and the percentage of novel sequences' repeat content adversely affect the performance of all three strategies. Increased sequencing depth improves map-to-pan's performance, while not affecting the other two iterative strategies. For computational resource consumption, map-to-pan demands considerably more than the other two iterative strategies. Overall, the iterative strategy, especially the iterative pooling strategy, is optimal when the sequencing depth is less than 20X. Map-to-pan is preferable when the sequencing depth exceeds 20X despite its higher computational resource consumption.}, }
@article {pmid38564163, year = {2024}, author = {Marlin, R and Loger, JS and Joachim, C and Ebring, C and Robert-Siegwald, G and Pennont, S and Rose, M and Raguette, K and Suez-Panama, V and Ulric-Gervaise, S and Lusbec, S and Bera, O and Vallard, A and Aline-Fardin, A and Colomba, E and Jean-Laurent, M}, title = {Copy number signatures and CCNE1 amplification reveal the involvement of replication stress in high-grade endometrial tumors oncogenesis.}, journal = {Cellular oncology (Dordrecht)}, volume = {}, number = {}, pages = {}, pmid = {38564163}, issn = {2211-3436}, abstract = {PURPOSE: Managing high-grade endometrial cancer in Martinique poses significant challenges. The diversity of copy number alterations in high-grade endometrial tumors, often associated with a TP53 mutation, is a key factor complicating treatment. Due to the high incidence of high-grade tumors with poor prognosis, our study aimed to characterize the molecular signature of these tumors within a cohort of 25 high-grade endometrial cases.
METHODS: We conducted a comprehensive pangenomic analysis to categorize the copy number alterations involved in these tumors. Whole-Exome Sequencing (WES) and Homologous Recombination (HR) analysis were performed. The alterations obtained from the WES were classified into various signatures using the Copy Number Signatures tool available in COSMIC.
RESULTS: We identified several signatures that correlated with tumor stage and disctinct prognoses. These signatures all seem to be linked to replication stress, with CCNE1 amplification identified as the primary driver of oncogenesis in over 70% of tumors analyzed.
CONCLUSION: The identification of CCNE1 amplification, which is currently being explored as a therapeutic target in clinical trials, suggests new treatment strategies for high-grade endometrial cancer. This finding holds particular significance for Martinique, where access to care is challenging.}, }
@article {pmid38560215, year = {2024}, author = {Comba-González, NB and Chaves-Moreno, D and Santamaría-Vanegas, J and Montoya-Castaño, D}, title = {A pan-genomic assessment: Delving into the genome of the marine epiphyte Bacillus altitudinis strain 19_A and other very close Bacillus strains from multiple environments.}, journal = {Heliyon}, volume = {10}, number = {7}, pages = {e27820}, doi = {10.1016/j.heliyon.2024.e27820}, pmid = {38560215}, issn = {2405-8440}, abstract = {Marine macroalgae are the habitat of epiphytic bacteria and provide several conditions for a beneficial biological interaction to thrive. Although Bacillus is one of the most abundant epiphytic genera, genomic information on marine macroalgae-associated Bacillus species remains scarce. In this study, we further investigated our previously published genome of the epiphytic strain Bacillus altitudinis 19_A to find features that could be translated to potential metabolites produced by this microorganism, as well as genes that play a role in its interaction with its macroalgal host. To achieve this goal, we performed a pan-genome analysis of Bacillus sp. and a codon bias assessment, including the genome of the strain Bacillus altitudinis 19_A and 29 complete genome sequences of closely related Bacillus strains isolated from soil, marine environments, plants, extreme environments, air, and food. This genomic analysis revealed that Bacillus altitudinis 19_A possessed unique genes encoding proteins involved in horizontal gene transfer, DNA repair, transcriptional regulation, and bacteriocin biosynthesis. In this comparative analysis, codon bias was not associated with the habitat of the strains studied. Some accessory genes were identified in the Bacillus altitudinis 19_A genome that could be related to its epiphytic lifestyle, as well as gene clusters for the biosynthesis of a sporulation-killing factor and a bacteriocin, showing their potential as a source of antimicrobial peptides. Our results provide a comprehensive view of the Bacillus altitudinis 19_A genome to understand its adaptation to the marine environment and its potential as a producer of bioactive compounds.}, }
@article {pmid38559026, year = {2024}, author = {Marini, S and Barquero, A and Wadhwani, AA and Bian, J and Ruiz, J and Boucher, C and Prosperi, M}, title = {OCTOPUS: Disk-based, Multiplatform, Mobile-friendly Metagenomics Classifier.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.03.15.585215}, pmid = {38559026}, abstract = {Portable genomic sequencers such as Oxford Nanopore's MinION enable real-time applications in both clinical and environmental health, e.g., detection of bacterial outbreaks. However, there is a bottleneck in the downstream analytics when bioinformatics pipelines are unavailable, e.g., when cloud processing is unreachable due to absence of Internet connection, or only low-end computing devices can be carried on site. For instance, metagenomics classifiers usually require a large amount of memory or specific operating systems/libraries. In this work, we present a platform-friendly software for portable metagenomic analysis of Nanopore data, the Oligomer-based Classifier of Taxonomic Operational and Pan-genome Units via Singletons (OCTOPUS). OCTOPUS is written in Java, reimplements several features of the popular Kraken2 and KrakenUniq software, with original components for improving metagenomics classification on incomplete/sampled reference databases (e.g., selection of bacteria of public health priority), making it ideal for running on smartphones or tablets. We indexed both OCTOPUS and Kraken2 on a bacterial database with ∼4,000 reference genomes, then simulated a positive (bacterial genomes from the same species, but different genomes) and two negative (viral, mammalian) Nanopore test sets. On the bacterial test set OCTOPUS yielded sensitivity and precision comparable to Kraken2 (94.4% and 99.8% versus 94.5% and 99.1%, respectively). On non-bacterial sequences (mammals and viral), OCTOPUS dramatically decreased (4-to 16-fold) the false positive rate when compared to Kraken2 (2.1% and 0.7% versus 8.2% and 11.2%, respectively). We also developed customized databases including viruses, and the World Health Organization's set of bacteria of concern for drug resistance, tested with real Nanopore data on an Android smartphone. OCTOPUS is publicly available at https://github.com/DataIntellSystLab/OCTOPUS and https://github.com/Ruiz-HCI-Lab/OctopusMobile .}, }
@article {pmid38558270, year = {2024}, author = {Montecillo, JAV}, title = {Comparative genomics of the genus Halioglobus reveals the genetic basis for the reclassification of Halioglobus pacificus as Parahalioglobus pacificus gen. nov. comb. nov.}, journal = {International microbiology : the official journal of the Spanish Society for Microbiology}, volume = {}, number = {}, pages = {}, pmid = {38558270}, issn = {1618-1905}, abstract = {The genus Halioglobus is one of the environmentally relevant members of the family Halieaceae, class Gammaproteobacteria. At present, the genus is composed of three validly published species. However, in the recent study of the family Halieaceae, the species Halioglobus pacificus was observed to branch outside of the main clade formed by the members of Halioglobus, suggesting its distinct taxonomic placement within the family. In the present study, the taxonomic placement of H. pacificus was reassessed using comparative genomics. Phylogenomic analysis revealed the paraphyletic relationship of H. pacificus with the type species of the genus Halioglobus, and further demonstrated its genus-level placement. This phylogenetic relationship was reinforced by the average nucleotide and amino acid identity values shared by H. pacificus with the members of the family Halieaceae. Moreover, the results of the pan-genome analysis, together with the phenotype data, further supported the exclusion of H. pacificus from the genus Halioglobus. Based on these findings, the species H. pacificus is thereby assigned to a new genus Parahalioglobus gen. nov. as Parahalioglobus pacificus comb. nov.}, }
@article {pmid38555312, year = {2024}, author = {Yang, Y and Wang, H and Tu, J and Li, Y and Guan, H}, title = {Comprehensive genomic analysis of Burkholderia arboris PN-1 reveals its biocontrol potential against Fusarium solani-induced root rot in Panax notoginseng.}, journal = {Current genetics}, volume = {70}, number = {1}, pages = {4}, pmid = {38555312}, issn = {1432-0983}, support = {YJSJJ22-A15//Postgraduate research Innovation Fund Project in Yunnan Normal University/ ; 202202AE090025//Major Science and Technology Special Project of Yunnan Province/ ; 202305AF150027//Yunnan Academician Expert Workstation/ ; YSZJGZZ-2021062//Yunnan (Kunming) Academician Expert Workstation/ ; }, abstract = {Panax notoginseng (Burkill) F.H. Chen, a valuable traditional Chinese medicine, faces significant yield and quality challenges stemming from root rot primarily caused by Fusarium solani. Burkholderia arboris PN-1, isolated from the rhizosphere soil of P. notoginseng, demonstrated a remarkable ability to inhibit the growth of F. solani. This study integrates phenotypic, phylogenetic, and genomic analyses to enhance our understanding of the biocontrol mechanisms employed by B. arboris PN-1. Phenotype analysis reveals that B. arboris PN-1 effectively suppresses P. notoginseng root rot both in vitro and in vivo. The genome of B. arboris PN-1 comprises three circular chromosomes (contig 1: 3,651,544 bp, contig 2: 1,355,460 bp, and contig 3: 3,471,056 bp), with a 66.81% GC content, housing 7,550 protein-coding genes. Notably, no plasmids were detected. Phylogenetic analysis places PN-1 in close relation to B. arboris AU14372, B. arboris LMG24066, and B. arboris MEC_B345. Average nucleotide identity (ANI) values confirm the PN-1 classification as B. arboris. Comparative analysis with seven other B. arboris strains identified 4,628 core genes in B. arboris PN-1. The pan-genome of B. arboris appears open but may approach closure. Whole-genome sequencing revealed 265 carbohydrate-active enzymes and identified 9 gene clusters encoding secondary metabolites. This comprehensive investigation enhances our understanding of B. arboris genomes, paving the way for their potential as effective biocontrol agents against fungal plant pathogens in the future.}, }
@article {pmid38550074, year = {2024}, author = {International, BR}, title = {Retracted: Epi-Gene: An R-Package for Easy Pan-Genome Analysis.}, journal = {BioMed research international}, volume = {2024}, number = {}, pages = {9830450}, doi = {10.1155/2024/9830450}, pmid = {38550074}, issn = {2314-6141}, abstract = {[This retracts the article DOI: 10.1155/2021/5585586.].}, }
@article {pmid38543563, year = {2024}, author = {Grizon, A and Theil, S and Helinck, S and Gerber, P and Bonnarme, P and Chassard, C}, title = {Genomic Characterization of Wild Lactobacillus delbrueckii Strains Reveals Low Diversity but Strong Typicity.}, journal = {Microorganisms}, volume = {12}, number = {3}, pages = {}, doi = {10.3390/microorganisms12030512}, pmid = {38543563}, issn = {2076-2607}, support = {convention CIFRE N° 2020/0839//Association Nationale de la Recherche et de la Technologie/ ; }, abstract = {Investigating the diversity of a given species could give clues for the development of autochthonous starter cultures. However, few studies have focused on the intraspecies diversity of Lactobacillus delbrueckii strains, a technologically important lactic acid bacterium for the dairy industry. For this reason, Lactobacillus delbrueckii strains from the Saint-Nectaire Protected Designation of Origin (PDO) area were isolated and characterized. Genetic diversity was determined based on core genome phylogenetic reconstruction and pangenome analysis, while phenotypic assessments encompassed proteolysis and volatile compound production potential. A total of 15 L. delbrueckii ssp. lactis unique new strains were obtained. The genetic analysis and further proteolytic activities measurement revealed low variability among these Saint-Nectaire strains, while substantial genetic variability was observed within the L. delbrueckii ssp. lactis subspecies as a whole. The volatile compound profiles slightly differed among strains, and some strains produced volatile compounds that could be of particular interest for cheese flavor development. While the genetic diversity among Saint-Nectaire strains was relatively modest compared to overall subspecies diversity, their distinct characteristics and pronounced differentiation from publicly available genomes position them as promising candidates for developing autochthonous starter cultures for cheese production.}, }
@article {pmid38541621, year = {2024}, author = {Park, S and Kim, I and Chhetri, G and Jung, Y and Woo, H and Seo, T}, title = {Draft Genome Sequence Analyses of Two Novel Marinobacter suadae sp. nov. and Wenyingzhuangia gilva sp. nov. Isolated from the Root of Suaeda japonica Makino.}, journal = {Life (Basel, Switzerland)}, volume = {14}, number = {3}, pages = {}, doi = {10.3390/life14030296}, pmid = {38541621}, issn = {2075-1729}, support = {2022R1F1A1070108//National Research Foundation of Korea/ ; NIBR202304204//National Institute of Biological Resources/ ; }, abstract = {Gram-negative, rod-shaped, and aerobic bacteria designated chi1[T] and chi5[T] were isolated from the root of Suaeda japonica Makino. Phylogenetics utilizing 16S rRNA and whole-genome sequences of the two novel strains chi1[T] and chi5[T] confirmed that they were related to the genera Marinobacter and Wenyingzhuangia, respectively. For the novel strains chi1[T] and chi5[T], the digital DNA-DNA hybridization values (19-20% and 22.1-36.6%, respectively) and average nucleotide identity values (74.4-76.5% and 79.1-88.9%, respectively) fell within the range for the genera Marinobacter and Wenyingzhuangia, respectively. Pangenome analyses of the novel strains chi1[T] and chi5[T] revealed 357 and 368 singletons genes, respectively. The genomic DNA G + C contents of the strains chi1[T] and chi5[T] were 57.2% and 31.5%, respectively. The major fatty acids of strain chi1[T] were C12:0, C16:0, and summed feature 3 (C16:1ω6c and/or C16:1ω7c), while those of the strain chi5[T] were iso-C15:0 3OH, iso-C17:0 3OH, and iso-C15:0. Data from the phylogenetic, phylogenomic, pangenome, genomic, physiological, and biochemical analyses indicated that the novel strains were distinct. Therefore, we propose the names Marinobacter suadae (type strain chi1[T] = KACC 23259[T] = TBRC 17652[T]) and Wenyingzhangia gilva (type strain chi5[T] = KACC 23262[T] = TBRC 17900[T]) for the studied bacterial strains.}, }
@article {pmid38537199, year = {2024}, author = {Meng, T and Jiao, H and Zhang, Y and Zhou, Y and Chen, S and Wang, X and Yang, B and Sun, J and Geng, X and Ayhan, DH and Guo, L}, title = {FoPGDB: a pangenome database of Fusarium oxysporum, a cross-kingdom fungal pathogen.}, journal = {Database : the journal of biological databases and curation}, volume = {2024}, number = {}, pages = {}, doi = {10.1093/database/baae017}, pmid = {38537199}, issn = {1758-0463}, support = {//National Natural Science Foundation of China/ ; //Taishan Scholars Program of Shandong Province/ ; //Shandong Provincial Science and Technology Innovation Fund/ ; //National Natural Science Foundation of China/ ; //Taishan Scholars Program of Shandong Province/ ; //Shandong Provincial Science and Technology Innovation Fund/ ; }, abstract = {Pangenomes, capturing the genetic diversity of a species or genus, are essential to understanding the ecology, pathobiology and evolutionary mechanisms of fungi that cause infection in crops and humans. However, fungal pangenome databases remain unavailable. Here, we report the first fungal pangenome database, specifically for Fusarium oxysporum species complex (FOSC), a group of cross-kingdom pathogens causing devastating vascular wilt to over 100 plant species and life-threatening fusariosis to immunocompromised humans. The F. oxysporum Pangenome Database (FoPGDB) is a comprehensive resource integrating 35 high-quality FOSC genomes, coupled with robust analytical tools. FoPGDB allows for both gene-based and graph-based exploration of the F. oxysporum pangenome. It also curates a large repository of putative effector sequences, crucial for understanding the mechanisms of FOSC pathogenicity. With an assortment of functionalities including gene search, genomic variant exploration and tools for functional enrichment, FoPGDB provides a platform for in-depth investigations of the genetic diversity and adaptability of F. oxysporum. The modular and user-friendly interface ensures efficient data access and interpretation. FoPGDB promises to be a valuable resource for F. oxysporum research, contributing to our understanding of this pathogen's pangenomic landscape and aiding in the development of novel disease management strategies. Database URL: http://www.fopgdb.site.}, }
@article {pmid38534692, year = {2024}, author = {Karampatakis, T and Tsergouli, K and Behzadi, P}, title = {Pan-Genome Plasticity and Virulence Factors: A Natural Treasure Trove for Acinetobacter baumannii.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {13}, number = {3}, pages = {}, doi = {10.3390/antibiotics13030257}, pmid = {38534692}, issn = {2079-6382}, abstract = {Acinetobacter baumannii is a Gram-negative pathogen responsible for a variety of community- and hospital-acquired infections. It is recognized as a life-threatening pathogen among hospitalized individuals and, in particular, immunocompromised patients in many countries. A. baumannii, as a member of the ESKAPE group, encompasses high genomic plasticity and simultaneously is predisposed to receive and exchange the mobile genetic elements (MGEs) through horizontal genetic transfer (HGT). Indeed, A. baumannii is a treasure trove that contains a high number of virulence factors. In accordance with these unique pathogenic characteristics of A. baumannii, the authors aim to discuss the natural treasure trove of pan-genome and virulence factors pertaining to this bacterial monster and try to highlight the reasons why this bacterium is a great concern in the global public health system.}, }
@article {pmid38532808, year = {2024}, author = {Bundhoo, E and Ghoorah, AW and Jaufeerally-Fakim, Y}, title = {Large-scale Pan Genomic Analysis of Mycobacterium tuberculosis Reveals Key Insights Into Molecular Evolutionary Rate of Specific Processes and Functions.}, journal = {Evolutionary bioinformatics online}, volume = {20}, number = {}, pages = {11769343241239463}, pmid = {38532808}, issn = {1176-9343}, abstract = {Mycobacterium tuberculosis (Mtb) is the causative agent of tuberculosis (TB), an infectious disease that is a major killer worldwide. Due to selection pressure caused by the use of antibacterial drugs, Mtb is characterised by mutational events that have given rise to multi drug resistant (MDR) and extensively drug resistant (XDR) phenotypes. The rate at which mutations occur is an important factor in the study of molecular evolution, and it helps understand gene evolution. Within the same species, different protein-coding genes evolve at different rates. To estimate the rates of molecular evolution of protein-coding genes, a commonly used parameter is the ratio dN/dS, where dN is the rate of non-synonymous substitutions and dS is the rate of synonymous substitutions. Here, we determined the estimated rates of molecular evolution of select biological processes and molecular functions across 264 strains of Mtb. We also investigated the molecular evolutionary rates of core genes of Mtb by computing the dN/dS values, and estimated the pan genome of the 264 strains of Mtb. Our results show that the cellular amino acid metabolic process and the kinase activity function evolve at a significantly higher rate, while the carbohydrate metabolic process evolves at a significantly lower rate for M. tuberculosis. These high rates of evolution correlate well with Mtb physiology and pathogenicity. We further propose that the core genome of M. tuberculosis likely experiences varying rates of molecular evolution which may drive an interplay between core genome and accessory genome during M. tuberculosis evolution.}, }
@article {pmid38529905, year = {2024}, author = {Crowley, C and Selvaraj, A and Hariharan, A and Healy, CM and Moran, GP}, title = {Fusobacterium nucleatum subsp. polymorphum recovered from malignant and potentially malignant oral disease exhibit heterogeneity in adhesion phenotypes and adhesin gene copy number, shaped by inter-subspecies horizontal gene transfer and recombination-derived mosaicism.}, journal = {Microbial genomics}, volume = {10}, number = {3}, pages = {}, doi = {10.1099/mgen.0.001217}, pmid = {38529905}, issn = {2057-5858}, abstract = {Fusobacterium nucleatum is an anaerobic commensal of the oral cavity associated with periodontitis and extra-oral diseases, including colorectal cancer. Previous studies have shown an increased relative abundance of this bacterium associated with oral dysplasia or within oral tumours. Using direct culture, we found that 75 % of Fusobacterium species isolated from malignant or potentially malignant oral mucosa were F. nucleatum subsp. polymorphum. Whole genome sequencing and pangenome analysis with Panaroo was carried out on 76 F. nucleatum subsp. polymorphum genomes. F. nucleatum subsp. polymorphum was shown to possesses a relatively small core genome of 1604 genes in a pangenome of 7363 genes. Phylogenetic analysis based on the core genome shows the isolates can be separated into three main clades with no obvious genotypic associations with disease. Isolates recovered from healthy and diseased sites in the same patient are generally highly related. A large repertoire of adhesins belonging to the type V secretion system (TVSS) could be identified with major variation in repertoire and copy number between strains. Analysis of intergenic recombination using fastGEAR showed that adhesin complement is shaped by horizontal gene transfer and recombination. Recombination events at TVSS adhesin genes were not only common between lineages of subspecies polymorphum, but also between different subspecies of F. nucleatum. Strains of subspecies polymorphum with low copy numbers of TVSS adhesin encoding genes tended to have the weakest adhesion to oral keratinocytes. This study highlights the genetic heterogeneity of F. nucleatum subsp. polymorphum and provides a new framework for defining virulence in this organism.}, }
@article {pmid38529502, year = {2024}, author = {Zhou, Q and Ghezelji, M and Hari, A and Ford, MKB and Holley, C and , and Mirabello, L and Chanock, S and Sahinalp, SC and Numanagić, I}, title = {Geny: A Genotyping Tool for Allelic Decomposition of Killer Cell Immunoglobulin-Like Receptor Genes.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.02.27.582413}, pmid = {38529502}, abstract = {Accurate genotyping of Killer cell Immunoglobulin-like Receptor (KIR) genes plays a pivotal role in enhancing our understanding of innate immune responses, disease correlations, and the advancement of personalized medicine. However, due to the high variability of the KIR region and high level of sequence similarity among different KIR genes, the currently available genotyping methods are unable to accurately infer copy numbers, genotypes and haplotypes of individual KIR genes from next-generation sequencing data. Here we introduce Geny, a new computational tool for precise genotyping of KIR genes. Geny utilizes available KIR haplotype databases and proposes a novel combination of expectation-maximization filtering schemes and integer linear programming-based combinatorial optimization models to resolve ambiguous reads, provide accurate copy number estimation and estimate the haplotype of each copy for the genes within the KIR region. We evaluated Geny on a large set of simulated short-read datasets covering the known validated KIR region assemblies and a set of Illumina short-read samples sequenced from 25 validated samples from the Human Pangenome Reference Consortium collection and showed that it outperforms the existing genotyping tools in terms of accuracy, precision and recall. We envision Geny becoming a valuable resource for understanding immune system response and consequently advancing the field of patient-centric medicine.}, }
@article {pmid38524570, year = {2024}, author = {Nwabor, LC and Chukamnerd, A and Nwabor, OF and Surachat, K and Pomwised, R and Jeenkeawpiam, K and Chusri, S}, title = {Genotypic and phenotypic mechanisms underlying antimicrobial resistance and synergistic efficacy of rifampicin-based combinations against carbapenem-resistant Acinetobacter baumannii.}, journal = {Heliyon}, volume = {10}, number = {6}, pages = {e27326}, pmid = {38524570}, issn = {2405-8440}, abstract = {PURPOSE: Carbapenem-resistant Acinetobacter baumannii (CRAB) is an urgent concern to public health. This study focuses on exploring the resistance mechanisms and the in vitro results of using rifampicin in combination with conventional antibiotics for the management of CRAB.
METHODS: The synergistic and bactericidal effects of rifampicin with conventional antibiotics were evaluated using chequerboard assay and time-kill assay, while the phenotypic and genotypic characteristics of resistant determinants were performed by efflux pump detection and whole genome sequencing on 29 isolates from ICU patients with underlying health diseases.
RESULTS: The isolates showed multidrug resistance, with over 60% showing addictive responses to rifampicin-based combinations at FICI ranging from 0.6 to 0.8. The time-kill assay revealed 99 % killing for rifampicin and minocycline combination in one isolate at 1/4 MIC rifampicin plus 1/4 MIC minocycline, while a bacteriostatic effect was observed at 1/2 MIC rifampici plus 1/2 MIC for a second isolate. Combination with tigecycline resulted in a 99% killing in two out of three isolates with a 2.5-3 log reduction in CFU at 1/4 MIC rifampicin plus 1/4 MIC tigecycline. Rifampicin plus colistin exhibited bactericidal activity against three out of four isolates. The combinations of rifampicin with ciprofloxacin, chloramphenicol, and trimethoprim-sulfamethoxazole were ineffective against the isolates. In addition, a 4-fold reduction in rifampicin MIC was observed in 2 out of 14 isolates in the presence of an efflux pump inhibitor. The pan-genome study demonstrated a progressive evolution with an accessory genome estimated to cover 58% of the matrix. Seven of the ten sequenced isolates belong to sequence type 2 (ST2), while one isolate each was assigned to ST164, ST16, and ST25. Furthermore, 11 plasmids, 34 antimicrobial resistance (AMR) genes, and 65 virulence-associated genes were predicted from the whole genome data. The blaOXA-23blaADC-25, blaOXA-66, blaPER-7, aph(6)-Id, armA, and arr-3 were prevalent among the isolates. Sequence alignment of the bacteria genome to the reference strain revealed a deleterious mutation in the rpoB gene of 4 isolates.
CONCLUSION: The study suggests that rifampicin in combination with either minocycline, tigecycline, or colistin might be a treatment option for CRAB clinical isolates. In addition, genotypic analysis of the bacteria isolates may inform the clinician of the suitable drug regimen for the management of specific bacteria variants.}, }
@article {pmid38521963, year = {2024}, author = {Sengupta, P and Muthamilselvi Sivabalan, SK and Singh, NK and Raman, K and Venkateswaran, K}, title = {Genomic, functional, and metabolic enhancements in multidrug-resistant Enterobacter bugandensis facilitating its persistence and succession in the International Space Station.}, journal = {Microbiome}, volume = {12}, number = {1}, pages = {62}, pmid = {38521963}, issn = {2049-2618}, support = {19-12829-26//2012 Space Biology NNH12ZTT001N/ ; 19-12829-26//2012 Space Biology NNH12ZTT001N/ ; MTR/2020/000490//Science and Engineering Board (SERB) MATRICS/ ; }, abstract = {BACKGROUND: The International Space Station (ISS) stands as a testament to human achievement in space exploration. Despite its highly controlled environment, characterised by microgravity, increased CO 2 levels, and elevated solar radiation, microorganisms occupy a unique niche. These microbial inhabitants play a significant role in influencing the health and well-being of astronauts on board. One microorganism of particular interest in our study is Enterobacter bugandensis, primarily found in clinical specimens including the human gastrointestinal tract, and also reported to possess pathogenic traits, leading to a plethora of infections.
RESULTS: Distinct from their Earth counterparts, ISS E. bugandensis strains have exhibited resistance mechanisms that categorise them within the ESKAPE pathogen group, a collection of pathogens recognised for their formidable resistance to antimicrobial treatments. During the 2-year Microbial Tracking 1 mission, 13 strains of multidrug-resistant E. bugandensis were isolated from various locations within the ISS. We have carried out a comprehensive study to understand the genomic intricacies of ISS-derived E. bugandensis in comparison to terrestrial strains, with a keen focus on those associated with clinical infections. We unravel the evolutionary trajectories of pivotal genes, especially those contributing to functional adaptations and potential antimicrobial resistance. A hypothesis central to our study was that the singular nature of the stresses of the space environment, distinct from any on Earth, could be driving these genomic adaptations. Extending our investigation, we meticulously mapped the prevalence and distribution of E. bugandensis across the ISS over time. This temporal analysis provided insights into the persistence, succession, and potential patterns of colonisation of E. bugandensis in space. Furthermore, by leveraging advanced analytical techniques, including metabolic modelling, we delved into the coexisting microbial communities alongside E. bugandensis in the ISS across multiple missions and spatial locations. This exploration revealed intricate microbial interactions, offering a window into the microbial ecosystem dynamics within the ISS.
CONCLUSIONS: Our comprehensive analysis illuminated not only the ways these interactions sculpt microbial diversity but also the factors that might contribute to the potential dominance and succession of E. bugandensis within the ISS environment. The implications of these findings are twofold. Firstly, they shed light on microbial behaviour, adaptation, and evolution in extreme, isolated environments. Secondly, they underscore the need for robust preventive measures, ensuring the health and safety of astronauts by mitigating risks associated with potential pathogenic threats. Video Abstract.}, }
@article {pmid38517169, year = {2024}, author = {Paulay, A and Grimaud, GM and Caballero, R and Laroche, B and Leclerc, M and Labarthe, S and Maguin, E}, title = {Design of a proteolytic module for improved metabolic modeling of Bacteroides caccae.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0015324}, doi = {10.1128/msystems.00153-24}, pmid = {38517169}, issn = {2379-5077}, abstract = {The gut microbiota plays a crucial role in health and is significantly modulated by human diets. In addition to Western diets which are rich in proteins, high-protein diets are used for specific populations or indications, mainly weight loss. In this study, we investigated the effect of protein supplementation on Bacteroides caccae, a Gram-negative gut symbiont. The supplementation with whey proteins led to a significant increase in growth rate, final biomass, and short-chain fatty acids production. A comprehensive genomic analysis revealed that B. caccae possesses a set of 156 proteases with putative intracellular and extracellular localization and allowed to identify amino acid transporters and metabolic pathways. We developed a fully curated genome-scale metabolic model of B. caccae that incorporated its proteolytic activity and simulated its growth and production of fermentation-related metabolites in response to the different growth media. We validated the model by comparing the predicted phenotype to experimental data. The model accurately predicted B. caccae's growth and metabolite production (R[2] = 0.92 for the training set and R[2] = 0.89 for the validation set). We found that accounting for both ATP consumption related to proteolysis, and whey protein accessibility is necessary for accurate predictions of metabolites production. These results provide insights into B. caccae's adaptation to a high-protein diet and its ability to utilize proteins as a source of nutrition. The proposed model provides a useful tool for understanding the feeding mechanism of B. caccae in the gut microbiome.IMPORTANCEMicrobial proteolysis is understudied despite the availability of dietary proteins for the gut microbiota. Here, the proteolytic potential of the gut symbiont Bacteroides caccae was analyzed for the first time using pan-genomics. This sketches a well-equipped bacteria for protein breakdown, capable of producing 156 different proteases with a broad spectrum of cleavage targets. This functional potential was confirmed by the enhancement of growth and metabolic activities at high protein levels. Proteolysis was included in a B. caccae metabolic model which was fitted with the experiments and validated on external data. This model pinpoints the links between protein availability and short-chain fatty acids production, and the importance for B. caccae to gain access to glutamate and asparagine to promote growth. This integrated approach can be generalized to other symbionts and upscaled to complex microbiota to get insights into the ecological impact of proteins on the gut microbiota.}, }
@article {pmid38514651, year = {2024}, author = {Xie, S and Cui, L and Liao, Z and Zhu, J and Ren, S and Niu, K and Li, H and Jiang, F and Wu, J and Wang, J and Wu, J and Song, B and Wu, W and Peng, C}, title = {Genomic analysis of lumpy skin disease virus asian variants and evaluation of its cellular tropism.}, journal = {NPJ vaccines}, volume = {9}, number = {1}, pages = {65}, pmid = {38514651}, issn = {2059-0105}, support = {32172822//National Natural Science Foundation of China (National Science Foundation of China)/ ; 32172822//National Natural Science Foundation of China (National Science Foundation of China)/ ; 32172822//National Natural Science Foundation of China (National Science Foundation of China)/ ; 32172822//National Natural Science Foundation of China (National Science Foundation of China)/ ; 32172822//National Natural Science Foundation of China (National Science Foundation of China)/ ; 32172822//National Natural Science Foundation of China (National Science Foundation of China)/ ; 32172822//National Natural Science Foundation of China (National Science Foundation of China)/ ; 32172822//National Natural Science Foundation of China (National Science Foundation of China)/ ; 32172822//National Natural Science Foundation of China (National Science Foundation of China)/ ; 32172822//National Natural Science Foundation of China (National Science Foundation of China)/ ; 32172822//National Natural Science Foundation of China (National Science Foundation of China)/ ; 32172822//National Natural Science Foundation of China (National Science Foundation of China)/ ; 32172822//National Natural Science Foundation of China (National Science Foundation of China)/ ; }, abstract = {Lumpy skin disease virus (LSDV) is a poxvirus that mainly affects cattle and can lead to symptoms such as severe reduction in milk production as well as infertility and mortality, which has resulted in dramatic economic loss in affected countries in Africa, Europe, and Asia. In this study, we successfully isolated two strains of LSDV from different geographical regions in China. Comparative genomic analyses were performed by incorporating additional LSDV whole genome sequences reported in other areas of Asia. Our analyses revealed that LSDV exhibited an 'open' pan-genome. Phylogenetic analysis unveiled distinct branches of LSDV evolution, signifying the prevalence of multiple lineages of LSDV across various regions in Asia. In addition, a reporter LSDV expressing eGFP directed by a synthetic poxvirus promoter was generated and used to evaluate the cell tropism of LSDV in various mammalian and avian cell lines. Our results demonstrated that LSDV replicated efficiently in several mammalian cell lines, including human A549 cells. In conclusion, our results underscore the necessity for strengthening LSD outbreak control measures and continuous epidemiological surveillance.}, }
@article {pmid38509359, year = {2024}, author = {Zepeda-Rivera, M and Minot, SS and Bouzek, H and Wu, H and Blanco-Míguez, A and Manghi, P and Jones, DS and LaCourse, KD and Wu, Y and McMahon, EF and Park, SN and Lim, YK and Kempchinsky, AG and Willis, AD and Cotton, SL and Yost, SC and Sicinska, E and Kook, JK and Dewhirst, FE and Segata, N and Bullman, S and Johnston, CD}, title = {A distinct Fusobacterium nucleatum clade dominates the colorectal cancer niche.}, journal = {Nature}, volume = {}, number = {}, pages = {}, pmid = {38509359}, issn = {1476-4687}, abstract = {Fusobacterium nucleatum (Fn), a bacterium present in the human oral cavity and rarely found in the lower gastrointestinal tract of healthy individuals[1], is enriched in human colorectal cancer (CRC) tumours[2-5]. High intratumoural Fn loads are associated with recurrence, metastases and poorer patient prognosis[5-8]. Here, to delineate Fn genetic factors facilitating tumour colonization, we generated closed genomes for 135 Fn strains; 80 oral strains from individuals without cancer and 55 unique cancer strains cultured from tumours from 51 patients with CRC. Pangenomic analyses identified 483 CRC-enriched genetic factors. Tumour-isolated strains predominantly belong to Fn subspecies animalis (Fna). However, genomic analyses reveal that Fna, considered a single subspecies, is instead composed of two distinct clades (Fna C1 and Fna C2). Of these, only Fna C2 dominates the CRC tumour niche. Inter-Fna analyses identified 195 Fna C2-associated genetic factors consistent with increased metabolic potential and colonization of the gastrointestinal tract. In support of this, Fna C2-treated mice had an increased number of intestinal adenomas and altered metabolites. Microbiome analysis of human tumour tissue from 116 patients with CRC demonstrated Fna C2 enrichment. Comparison of 62 paired specimens showed that only Fna C2 is tumour enriched compared to normal adjacent tissue. This was further supported by metagenomic analysis of stool samples from 627 patients with CRC and 619 healthy individuals. Collectively, our results identify the Fna clade bifurcation, show that specifically Fna C2 drives the reported Fn enrichment in human CRC and reveal the genetic underpinnings of pathoadaptation of Fna C2 to the CRC niche.}, }
@article {pmid38509222, year = {2024}, author = {Xie, L and Gong, X and Yang, K and Huang, Y and Zhang, S and Shen, L and Sun, Y and Wu, D and Ye, C and Zhu, QH and Fan, L}, title = {Technology-enabled great leap in deciphering plant genomes.}, journal = {Nature plants}, volume = {}, number = {}, pages = {}, pmid = {38509222}, issn = {2055-0278}, support = {2022C02032//Hainan Provincial Department of Science and Technology (Department of Science and Technology of Hainan Province)/ ; }, abstract = {Plant genomes provide essential and vital basic resources for studying many aspects of plant biology and applications (for example, breeding). From 2000 to 2020, 1,144 genomes of 782 plant species were sequenced. In the past three years (2021-2023), 2,373 genomes of 1,031 plant species, including 793 newly sequenced species, have been assembled, representing a great leap. The 2,373 newly assembled genomes, of which 63 are telomere-to-telomere assemblies and 921 have been generated in pan-genome projects, cover the major phylogenetic clades. Substantial advances in read length, throughput, accuracy and cost-effectiveness have notably simplified the achievement of high-quality assemblies. Moreover, the development of multiple software tools using different algorithms offers the opportunity to generate more complete and complex assemblies. A database named N3: plants, genomes, technologies has been developed to accommodate the metadata associated with the 3,517 genomes that have been sequenced from 1,575 plant species since 2000. We also provide an outlook for emerging opportunities in plant genome sequencing.}, }
@article {pmid38503725, year = {2024}, author = {Zhao, X and Yu, J and Chanda, B and Zhao, J and Wu, S and Zheng, Y and Sun, H and Levi, A and Ling, KS and Fei, Z}, title = {Genomic and pangenomic analyses provide insights into the population history and genomic diversification of bottle gourd.}, journal = {The New phytologist}, volume = {}, number = {}, pages = {}, doi = {10.1111/nph.19673}, pmid = {38503725}, issn = {1469-8137}, support = {2015-51181-24285//National Institute of Food and Agriculture/ ; 2020-51181-32139//National Institute of Food and Agriculture/ ; }, abstract = {Bottle gourd (Lagenaria siceraria (Mol.) Strandl.) is an economically important vegetable crop and one of the earliest domesticated crops. However, the population history and genomic diversification of bottle gourd have not been extensively studied. We generated a comprehensive bottle gourd genome variation map from genome sequences of 197 world-wide representative accessions, which enables a genome-wide association study for identifying genomic loci associated with resistance to zucchini yellow mosaic virus, and constructed a bottle gourd pangenome that harbors 1534 protein-coding genes absent in the reference genome. Demographic analyses uncover that domesticated bottle gourd originated in Southern Africa c. 12 000 yr ago, and subsequently radiated to the New World via the Atlantic drift and to Eurasia through the efforts of early farmers in the initial Holocene. The identified highly differentiated genomic regions among different bottle gourd populations harbor many genes contributing to their local adaptations such as those related to disease resistance and stress tolerance. Presence/absence variation analysis of genes in the pangenome reveals numerous genes including those involved in abiotic/biotic stress responses that have been under selection during the world-wide expansion of bottle gourds. The bottle gourd variation map and pangenome provide valuable resources for future functional studies and genomics-assisted breeding.}, }
@article {pmid38502137, year = {2024}, author = {Guo, X and Zhang, Z and Chen, Q and Wang, L and Xu, X and Wei, Z and Zhang, Y and Chen, K and Wang, Z and Lu, X and Liang, Q}, title = {Whole Genome Sequencing Highlights the Pathogenic Profile in Nocardia Keratitis.}, journal = {Investigative ophthalmology & visual science}, volume = {65}, number = {3}, pages = {26}, doi = {10.1167/iovs.65.3.26}, pmid = {38502137}, issn = {1552-5783}, abstract = {PURPOSE: Nocardia keratitis is a serious and sight-threatening condition. This study aims to reveal the virulence and antimicrobial resistance gene profile of Nocardia strains using whole genome sequencing.
METHODS: Whole-genome sequencing was performed on 23 cornea-derived Nocardia strains. Together with genomic data from the respiratory tract and the environment, 141 genomes were then utilized for phylogenetic and pan-genome analyses, followed by virulence and antibiotic resistance analysis. The correlations between virulence genes and pathogenicity were experimentally validated, including the characteristics of Nocardia colonies and clinical and histopathological evaluations of Nocardia keratitis mice models.
RESULTS: Whole-genome sequencing of 141 Nocardia strains revealed a mean of 220 virulence genes contributed to bacterial pathogenesis. The mce gene family analysis led to the categorization of strains from the cornea into groups A, B, and C. The colonies of group C had the largest diameter, height, and fastest growth rate. The size of corneal ulcers and the clinical scores showed a significant increase in mouse models induced by group C. The relative expression levels of pro-inflammatory cytokines (CD4, IFN-γ, IL-6Rα, and TNF-α) in the lesion area exhibited an increasing trend from group A to group C. Antibiotic resistance genes (ARGs) spanned nine distinct drug classes, four resistance mechanisms, and seven primary antimicrobial resistance gene families.
CONCLUSIONS: Whole genome sequencing highlights the pathogenic role of mce gene family in Nocardia keratitis. Its distribution pattern may contribute to the distinct characteristics of the growth of Nocardia colonies and the clinical severity of the mice models.}, }
@article {pmid38501935, year = {2024}, author = {Piper, KR and Ikhimiukor, OO and Souza, SSR and Garcia-Aroca, T and Andam, CP}, title = {Evolutionary dynamics of the accessory genomes of Staphylococcus aureus.}, journal = {mSphere}, volume = {}, number = {}, pages = {e0075123}, doi = {10.1128/msphere.00751-23}, pmid = {38501935}, issn = {2379-5042}, abstract = {Staphylococcus aureus is a ubiquitous commensal and opportunistic bacterial pathogen that can cause a wide gamut of infections, which are exacerbated by the presence of multidrug-resistant and methicillin-resistant S. aureus. S. aureus is genetically heterogeneous and consists of numerous distinct lineages. Using 558 complete genomes of S. aureus, we aim to determine how the accessory genome content among phylogenetic lineages of S. aureus is structured and has evolved. Bayesian hierarchical clustering identified 10 sequence clusters, of which seven contained major sequence types (ST 1, 5, 8, 30, 59, 239, and 398). The seven sequence clusters differed in their accessory gene content, including genes associated with antimicrobial resistance and virulence. Focusing on the two largest clusters, BAPS8 and BAPS10, and each consisting mostly of ST5 and ST8, respectively, we found that the structure and connected components in the co-occurrence networks of accessory genomes varied between them. These differences are explained, in part, by the variation in the rates at which the two sequence clusters gained and lost accessory genes, with the highest rate of gene accumulation occurring recently in their evolutionary histories. We also identified a divergent group within BAPS10 that has experienced high gene gain and loss early in its history. Together, our results show highly variable and dynamic accessory genomes in S. aureus that are structured by the history of the specific lineages that carry them.IMPORTANCEStaphylococcus aureus is an opportunistic, multi-host pathogen that can cause a variety of benign and life-threatening infections. Our results revealed considerable differences in the structure and evolution of the accessory genomes of major lineages within S. aureus. Such genomic variation within a species can have important implications on disease epidemiology, pathogenesis of infection, and interactions with the vertebrate host. Our findings provide important insights into the underlying genetic basis for the success of S. aureus as a highly adaptable and resistant pathogen, which will inform current efforts to control and treat staphylococcal diseases.}, }
@article {pmid38500021, year = {2024}, author = {Zouagui, R and Zouagui, H and Aurag, J and Ibrahimi, A and Sbabou, L}, title = {Functional analysis and comparative genomics of Rahnella perminowiae S11P1 and Variovorax sp. S12S4, two plant growth-promoting rhizobacteria isolated from Crocus sativus L. (saffron) rhizosphere.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {289}, pmid = {38500021}, issn = {1471-2164}, abstract = {BACKGROUND: Rahnella perminowiae S11P1 and Variovorax sp. S12S4 are two plant growth-promoting rhizobacteria that were previously isolated from the rhizosphere of Crocus sativus L. (saffron), and have demonstrated interesting PGP activities and promising results when used as inoculants in field trials. To further elucidate the molecular mechanisms underlying their beneficial effects on plant growth, comprehensive genome mining of S11P1 and S12S4 and comparative genomic analysis with closely related strains were conducted.
RESULTS: Functional annotation of the two strains predicted a large number of genes involved in auxin and siderophore production, nitrogen fixation, sulfur metabolism, organic acid biosynthesis, pyrroloquinoline quinone production, 1-aminocyclopropane-1-carboxylate (ACC) deaminase activity, volatile organic compounds production, and polyamine biosynthesis. In addition, numerous genes implicated in plant-bacteria interactions, such as those involved in chemotaxis and quorum sensing, were predicted. Moreover, the two strains carried genes involved in bacterial fitness under abiotic stress conditions. Comparative genomic analysis revealed an open pan-genomic structure for the two strains. COG annotation showed that higher fractions of core and accessory genes were involved in the metabolism and transport of carbohydrates and amino acids, suggesting the metabolic versatility of the two strains as effective rhizosphere colonizers. Furthermore, this study reports the first comparison of Multilocus sequence analysis (MLSA) and core-based phylogenies of the Rahnella and Variovorax genera.
CONCLUSIONS: The present study unveils the molecular mechanisms underlying plant growth promotion and biocontrol activity of S11P1 and S12S4, and provides a basis for their further biotechnological application in agriculture.}, }
@article {pmid38496489, year = {2024}, author = {Rinker, DC and Sauters, TJC and Steffen, K and Gumilang, A and Raja, HA and Rangel-Grimaldo, M and Pinzan, CF and de Castro, PA and Dos Reis, TF and Delbaje, E and Houbraken, J and Goldman, GH and Oberlies, NH and Rokas, A}, title = {Strain heterogeneity in a non-pathogenic fungus highlights factors contributing to virulence.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.03.08.583994}, pmid = {38496489}, abstract = {Fungal pathogens exhibit extensive strain heterogeneity, including variation in virulence. Whether closely related non-pathogenic species also exhibit strain heterogeneity remains unknown. Here, we comprehensively characterized the pathogenic potentials (i.e., the ability to cause morbidity and mortality) of 16 diverse strains of Aspergillus fischeri , a non-pathogenic close relative of the major pathogen Aspergillus fumigatus . In vitro immune response assays and in vivo virulence assays using a mouse model of pulmonary aspergillosis showed that A. fischeri strains varied widely in their pathogenic potential. Furthermore, pangenome analyses suggest that A. fischeri genomic and phenotypic diversity is even greater. Genomic, transcriptomic, and metabolomic profiling identified several pathways and secondary metabolites associated with variation in virulence. Notably, strain virulence was associated with the simultaneous presence of the secondary metabolites hexadehydroastechrome and gliotoxin. We submit that examining the pathogenic potentials of non-pathogenic close relatives is key for understanding the origins of fungal pathogenicity.}, }
@article {pmid38495945, year = {2024}, author = {Lecomte, L and Árnyasi, M and Ferchaud, AL and Kent, M and Lien, S and Stenløkk, K and Sylvestre, F and Bernatchez, L and Mérot, C}, title = {Investigating structural variant, indel and single nucleotide polymorphism differentiation between locally adapted Atlantic salmon populations.}, journal = {Evolutionary applications}, volume = {17}, number = {3}, pages = {e13653}, pmid = {38495945}, issn = {1752-4571}, abstract = {Genomic structural variants (SVs) are now recognized as an integral component of intraspecific polymorphism and are known to contribute to evolutionary processes in various organisms. However, they are inherently difficult to detect and genotype from readily available short-read sequencing data, and therefore remain poorly documented in wild populations. Salmonid species displaying strong interpopulation variability in both life history traits and habitat characteristics, such as Atlantic salmon (Salmo salar), offer a prime context for studying adaptive polymorphism, but the contribution of SVs to fine-scale local adaptation has yet to be explored. Here, we performed a comparative analysis of SVs, single nucleotide polymorphisms (SNPs) and small indels (<50 bp) segregating in the Romaine and Puyjalon salmon, two putatively locally adapted populations inhabiting neighboring rivers (Québec, Canada) and showing pronounced variation in life history traits, namely growth, fecundity, and age at maturity and smoltification. We first catalogued polymorphism using a hybrid SV characterization approach pairing both short- (16X) and long-read sequencing (20X) for variant discovery with graph-based genotyping of SVs across 60 salmon genomes, along with characterization of SNPs and small indels from short reads. We thus identified 115,907 SVs, 8,777,832 SNPs and 1,089,321 short indels, with SVs covering 4.8 times more base pairs than SNPs. All three variant types revealed a highly congruent population structure and similar patterns of F ST and density variation along the genome. Finally, we performed outlier detection and redundancy analysis (RDA) to identify variants of interest in the putative local adaptation of Romaine and Puyjalon salmon. Genes located near these variants were enriched for biological processes related to nervous system function, suggesting that observed variation in traits such as age at smoltification could arise from differences in neural development. This study therefore demonstrates the feasibility of large-scale SV characterization and highlights its relevance for salmonid population genomics.}, }
@article {pmid38492232, year = {2024}, author = {Poretsky, E and Cagirici, HB and Andorf, CM and Sen, TZ}, title = {Harnessing the predicted maize pan-interactome for putative gene function prediction and prioritization of candidate genes for important traits.}, journal = {G3 (Bethesda, Md.)}, volume = {}, number = {}, pages = {}, doi = {10.1093/g3journal/jkae059}, pmid = {38492232}, issn = {2160-1836}, abstract = {The recent assembly and annotation of the 26 maize nested association mapping (NAM) population founder inbreds have enabled large-scale pan-genomic comparative studies. These studies have expanded our understanding of agronomically important traits by integrating pan-transcriptomic data with trait-specific gene candidates from previous association mapping results. In contrast to the availability of pan-transcriptomic data, obtaining reliable protein-protein interaction (PPI) data has remained a challenge due to its high cost and complexity. We generated predicted PPI networks for each of the 26 genomes using the established STRING database. The individual genome-interactomes were then integrated to generate core- and pan-interactomes. We deployed the PPI clustering algorithm ClusterONE to identify numerous PPI clusters that were functionally annotated using gene ontology (GO) functional enrichment, demonstrating a diverse range of enriched GO terms across different clusters. Additional cluster annotations were generated by integrating gene co-expression data and gene description annotations, providing additional useful information. We show that the functionally annotated PPI clusters establish a useful framework for protein function prediction and prioritization of candidate genes of interest. Our study not only provides a comprehensive resource of predicted PPI networks for 26 maize genomes, but also offers annotated interactome clusters for predicting protein functions and prioritizing gene candidates. The source code for the Python implementation of the analysis workflow and a standalone web application for accessing the analysis results are available at https://github.com/eporetsky/PanPPI.}, }
@article {pmid38491145, year = {2024}, author = {Wang, Y and Tang, H and Wang, X and Sun, Y and Joseph, PV and Paterson, AH}, title = {Detection of colinear blocks and synteny and evolutionary analyses based on utilization of MCScanX.}, journal = {Nature protocols}, volume = {}, number = {}, pages = {}, pmid = {38491145}, issn = {1750-2799}, abstract = {As different taxa evolve, gene order often changes slowly enough that chromosomal 'blocks' with conserved gene orders (synteny) are discernible. The MCScanX toolkit (https://github.com/wyp1125/MCScanX) was published in 2012 as freely available software for the detection of such 'colinear blocks' and subsequent synteny and evolutionary analyses based on genome-wide gene location and protein sequence information. Owing to its simplicity and high efficiency for colinear block detection, MCScanX provides a powerful tool for conducting diverse synteny and evolutionary analyses. Moreover, the detection of colinear blocks has been embraced as an integral step for pangenome graph construction. Here, new application trends of MCScanX are explored, striving to better connect this increasingly used tool to other tools and accelerate insight generation from exponentially growing sequence data. We provide a detailed protocol that covers how to install MCScanX on diverse platforms, tune parameters, prepare input files from data from the National Center for Biotechnology Information, run MCScanX and its visualization and evolutionary analysis tools, and connect MCScanX with external tools, including MCScanX-transposed, Circos and SynVisio. This protocol is easily implemented by users with minimal computational background and is adaptable to new data of interest to them. The data and utility programs for this protocol can be obtained from http://bdx-consulting.com/mcscanx-protocol .}, }
@article {pmid38488860, year = {2024}, author = {Freddi, S and Rajabal, V and Tetu, SG and Gillings, MR and Penesyan, A}, title = {Microbial biofilms on macroalgae harbour diverse integron gene cassettes.}, journal = {Microbiology (Reading, England)}, volume = {170}, number = {3}, pages = {}, doi = {10.1099/mic.0.001446}, pmid = {38488860}, issn = {1465-2080}, abstract = {Integrons are genetic platforms that capture, rearrange and express mobile modules called gene cassettes. The best characterized gene cassettes encode antibiotic resistance, but the function of most integron gene cassettes remains unknown. Functional predictions suggest that many gene cassettes could encode proteins that facilitate interactions with other cells and with the extracellular environment. Because cell interactions are essential for biofilm stability, we sequenced gene cassettes from biofilms growing on the surface of the marine macroalgae Ulva australis and Sargassum linearifolium. Algal samples were obtained from coastal rock platforms around Sydney, Australia, using seawater as a control. We demonstrated that integrons in microbial biofilms did not sample genes randomly from the surrounding seawater, but harboured specific functions that potentially provided an adaptive advantage to both the bacterial cells in biofilm communities and their macroalgal host. Further, integron gene cassettes had a well-defined spatial distribution, suggesting that each bacterial biofilm acquired these genetic elements via sampling from a large but localized pool of gene cassettes. These findings suggest two forms of filtering: a selective acquisition of different integron-containing bacterial species into the distinct biofilms on Ulva and Sargassum surfaces, and a selective retention of unique populations of gene cassettes at each sampling location.}, }
@article {pmid38488392, year = {2024}, author = {Wang, M and Li, X and Liu, X and Hou, X and He, Y and Yu, J-H and Hu, S and Yin, H and Xie, B-B}, title = {Annotation of 2,507 Saccharomyces cerevisiae genomes.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0358223}, doi = {10.1128/spectrum.03582-23}, pmid = {38488392}, issn = {2165-0497}, abstract = {Saccharomyces cerevisiae (baker's yeast, budding yeast) is one of the most important model organisms for biological research and is a crucial microorganism in industry. Currently, a huge number of Saccharomyces cerevisiae genome sequences are available at the public domain. However, these genomes are distributed at different websites and a large number of them are released without annotation information. To provide one complete annotated genome data resource, we collected 2,507 Saccharomyces cerevisiae genome assemblies and re-annotated 2,506 assemblies using a custom annotation pipeline, producing a total of 15,407,164 protein-coding gene models. With a custom pipeline, all these gene sequences were clustered into families. A total of 1,506 single-copy genes were selected as marker genes, which were then used to evaluate the genome completeness and base qualities of all assemblies. Pangenomic analyses were performed based on a selected subset of 847 medium-high-quality genomes. Statistical comparisons revealed a number of gene families showing copy number variations among different organism sources. To the authors' knowledge, this study represents the largest genome annotation project of S. cerevisiae so far, providing rich genomic resources for the future studies of the model organism S. cerevisiae and its relatives.IMPORTANCESaccharomyces cerevisiae (baker's yeast, budding yeast) is one of the most important model organisms for biological research and is a crucial microorganism in industry. Though a huge number of Saccharomyces cerevisiae genome sequences are available at the public domain, these genomes are distributed at different websites and most are released without annotation, hindering the efficient reuse of these genome resources. Here, we collected 2,507 genomes for Saccharomyces cerevisiae, performed genome annotation, and evaluated the genome qualities. All the obtained data have been deposited at public repositories and are freely accessible to the community. This study represents the largest genome annotation project of S. cerevisiae so far, providing one complete annotated genome data set for S. cerevisiae, an important workhorse for fundamental biology, biotechnology, and industry.}, }
@article {pmid38488280, year = {2024}, author = {Giacomini, JJ and Torres-Morales, J and Tang, J and Dewhirst, FE and Borisy, GG and Mark Welch, JL}, title = {Spatial ecology of Haemophilus and Aggregatibacter in the human oral cavity.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0401723}, doi = {10.1128/spectrum.04017-23}, pmid = {38488280}, issn = {2165-0497}, abstract = {UNLABELLED: Haemophilus and Aggregatibacter are two of the most common bacterial genera in the human oral cavity, encompassing both commensals and pathogens of substantial ecological and medical significance. In this study, we conducted a metapangenomic analysis of oral Haemophilus and Aggregatibacter species to uncover genomic diversity, phylogenetic relationships, and habitat specialization within the human oral cavity. Using three metrics-pangenomic gene content, phylogenomics, and average nucleotide identity (ANI)-we first identified distinct species and sub-species groups among these genera. Mapping of metagenomic reads then revealed clear patterns of habitat specialization, such as Aggregatibacter species predominantly in dental plaque, a distinctive Haemophilus parainfluenzae sub-species group on the tongue dorsum, and H. sp. HMT-036 predominantly in keratinized gingiva and buccal mucosa. In addition, we found that supragingival plaque samples contained predominantly only one out of the three taxa, H. parainfluenzae, Aggregatibacter aphrophilus, and A. sp. HMT-458, suggesting independent niches or a competitive relationship. Functional analyses revealed the presence of key metabolic genes, such as oxaloacetate decarboxylase, correlated with habitat specialization, suggesting metabolic versatility as a driving force. Additionally, heme synthesis distinguishes H. sp. HMT-036 from closely related Haemophilus haemolyticus, suggesting that the availability of micronutrients, particularly iron, was important in the evolutionary ecology of these species. Overall, our study exemplifies the power of metapangenomics to identify factors that may affect ecological interactions within microbial communities, including genomic diversity, habitat specialization, and metabolic versatility.
IMPORTANCE: Understanding the microbial ecology of the mouth is essential for comprehending human physiology. This study employs metapangenomics to reveal that various Haemophilus and Aggregatibacter species exhibit distinct ecological preferences within the oral cavity of healthy individuals, thereby supporting the site-specialist hypothesis. Additionally, it was observed that the gene pool of different Haemophilus species correlates with their ecological niches. These findings shed light on the significance of key metabolic functions in shaping microbial distribution patterns and interspecies interactions in the oral ecosystem.}, }
@article {pmid38487210, year = {2023}, author = {Grizon, A and Theil, S and Callon, C and Gerber, P and Helinck, S and Dugat-Bony, E and Bonnarme, P and Chassard, C}, title = {Genetic and technological diversity of Streptococcus thermophilus isolated from the Saint-Nectaire PDO cheese-producing area.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1245510}, doi = {10.3389/fmicb.2023.1245510}, pmid = {38487210}, issn = {1664-302X}, abstract = {Streptococcus thermophilus is of major importance for cheese manufacturing to ensure rapid acidification; however, studies indicate that intensive use of commercial strains leads to the loss of typical characteristics of the products. To strengthen the link between the product and its geographical area and improve the sensory qualities of cheeses, cheese-producing protected designations of origin (PDO) are increasingly interested in the development of specific autochthonous starter cultures. The present study is therefore investigating the genetic and functional diversity of S. thermophilus strains isolated from a local cheese-producing PDO area. Putative S. thermophilus isolates were isolated and identified from milk collected in the Saint-Nectaire cheese-producing PDO area and from commercial starters. Whole genomes of isolates were sequenced, and a comparative analysis based on their pan-genome was carried out. Important functional properties were studied, including acidifying and proteolytic activities. Twenty-two isolates representative of the diversity of the geographical area and four commercial strains were selected for comparison. The resulting phylogenetic trees do not correspond to the geographical distribution of isolates. The clustering based on the pan-genome analysis indicates that isolates are divided into five distinct groups. A Kyoto Encyclopedia of Genes and Genomes (KEGG) functional annotation of the accessory genes indicates that the accessory gene contents of isolates are involved in different functional categories. High variability in acidifying activities and less diversity in proteolytic activities were also observed. These results indicate that high genetic and functional variabilities of the species S. thermophilus may arise from a small (1,800 km[2]) geographical area and may be exploited to meet demand for use as autochthonous starters.}, }
@article {pmid38486452, year = {2024}, author = {Shi, T and Zhang, X and Hou, Y and Jia, C and Dan, X and Zhang, Y and Jiang, Y and Lai, Q and Feng, J and Feng, J and Ma, T and Wu, J and Liu, S and Zhang, L and Long, Z and Chen, L and Street, NR and Ingvarsson, PK and Liu, J and Yin, T and Wang, J}, title = {The super-pangenome of Populus unveil genomic facets for its adaptation and diversification in widespread forest trees.}, journal = {Molecular plant}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.molp.2024.03.009}, pmid = {38486452}, issn = {1752-9867}, abstract = {Understanding the underlying mechanisms and links between genome evolution and adaptive innovations stands as a key goal in evolutionary studies. Poplars, among the world's most widely distributed and cultivated trees, exhibit extensive phenotypic diversity and environmental adaptability. In this study, we present a genus-level super-pangenome comprising 19 Populus genomes, revealing the likely pivotal role of private genes in facilitating local environmental and climate adaptation. Through the integration of pan-genomes with transcriptomes, methylomes and chromatin accessibility mapping, we unveil that the evolutionary trajectories of pan-genes and duplicated genes are closely linked to local genomic landscapes of regulatory and epigenetic architectures, notably CG methylation in gene-body regions. Further comparative genomic analyses have enabled the identification of 142,202 structural variants (SVs) across species, which intersect with a significant number of genes and contribute substantially to both phenotypic and adaptive divergence. We have experimentally validated a ∼180 bp presence/absence variant impacting the expression of the CUC2 gene, crucial for leaf serration formation. Finally, we developed a user-friendly web-based tool encompassing the multi-omics resources associated with the Populus super-pangenome (http://www.populus-superpangenome.com/). Together, the present pioneering super-pangenome resource in forest trees not only aid in the advancement of breeding efforts of this globally important tree genus but also offer valuable insights into potential avenues for comprehending tree biology.}, }
@article {pmid38478130, year = {2024}, author = {Wisal, A and Saeed, N and Aurongzeb, M and Shafique, M and Sohail, S and Anwar, W and Basharat, Z and Irfan, M and Ullah, A and Hassan, SS}, title = {Bridging drug discovery through hierarchical subtractive genomics against asd, trpG, and secY of pneumonia causing MDR Staphylococcus aureus.}, journal = {Molecular genetics and genomics : MGG}, volume = {299}, number = {1}, pages = {34}, pmid = {38478130}, issn = {1617-4623}, abstract = {Staphylococcus aureus (S. aureus) is an opportunistic gram-positive, non-motile, and non-sporulating bacteria that induces pneumonia, a provocative lung infection affecting mainly the terminal bronchioles and the small air sacs known as alveoli. Recently, it has developed antibiotic resistance to the available consortium as per the WHO reports; thereby, novel remedial targets and resilient medications to forestall and cure this illness are desperately needed. Here, using pan-genomics, a total of 1,387 core proteins were identified. Subtractive proteome analyses further identified 12 proteins that are vital for bacteria. One membrane protein (secY) and two cytoplasmic proteins (asd and trpG) were chosen as possible therapeutic targets concerning minimum % host identity, essentiality, and other cutoff values, such as high resistance in the MDR S. aureus. The UniProt AA sequences of the selected targets were modelled and docked against 3 drug-like chemical libraries. The top-ranked compounds i.e., ZINC82049692, ZINC85492658 and 3a of Isosteviol derivative for Aspartate-semialdehyde dehydrogenase (asd); ZINC38222743, ZINC70455378, and 5 m Isosteviol derivative for Anthranilate synthase component II (trpG); and finally, ZINC72292296, ZINC85632684, and 7 m Isosteviol derivative for Protein translocase subunit secY (secY), were further subjected to molecular dynamics studies for thermodynamic stability and energy calculation. Our study proposes new therapeutic targets in S. aureus, some of which have previously been reported in other pathogenic microorganisms. Owing to further experimental validation, we anticipate that the adapted methodology and the predicted results in this work could make major contributions towards novel drug discovery and their targets in S. aureus caused pneumonia.}, }
@article {pmid38472486, year = {2024}, author = {Martínez-Gallardo, MJ and Villicaña, C and Yocupicio-Monroy, M and Alcaraz-Estrada, SL and Salazar-Salinas, J and Mendoza-Vázquez, OF and Damazo-Hernández, G and León-Félix, J}, title = {Comparative genomic analysis of Pseudomonas aeruginosa strains susceptible and resistant to carbapenems and aztreonam isolated from patients with healthcare-associated infections in a Mexican hospital.}, journal = {Molecular genetics and genomics : MGG}, volume = {299}, number = {1}, pages = {29}, pmid = {38472486}, issn = {1617-4623}, support = {E05//Ciencia y Tecnología ISSSTE/ ; }, abstract = {Pseudomonas aeruginosa (PA) is an important opportunistic pathogen that causes different infections on immunocompromised patients. Within PA accessory genome, differences in virulence, antibiotic resistance and biofilm formation have been described between strains, leading to the emergence of multidrug-resistant strains. The genome sequences of 17 strains isolated from patients with healthcare-associated infections in a Mexican hospital were genomically and phylogenetically analyzed and antibiotic resistance genes, virulence genes, and biofilm formation genes were detected. Fifteen of the 17 strains were resistant to at least two of the carbapenems meropenem, imipenem, and the monobactam aztreonam. The antibiotic resistance (mexA, mexB, and oprM) and the biofilm formation (pslA and pslD) genes were detected in all strains. Differences were found between strains in accessory genome size. The strains had different sequence types, and seven strains had sequence types associated with global high risk epidemic PA clones. All strains were represented in two groups among PA global strains. In the 17 strains, horizontally acquired resistance genes to aminoglycosides and beta-lactams were found, mainly, and between 230 and 240 genes that encode virulence factors. The strains under study were variable in terms of their accessory genome, antibiotic resistance, and virulence genes. With these characteristics, we provide information about the genomic diversity of clinically relevant PA strains.}, }
@article {pmid38470044, year = {2024}, author = {Liu, D and Xie, L-S and Lian, S and Li, K and Yang, Y and Wang, W-Z and Hu, S and Liu, S-J and Liu, C and He, Z}, title = {Anaerostipes hadrus, a butyrate-producing bacterium capable of metabolizing 5-fluorouracil.}, journal = {mSphere}, volume = {}, number = {}, pages = {e0081623}, doi = {10.1128/msphere.00816-23}, pmid = {38470044}, issn = {2379-5042}, abstract = {UNLABELLED: Anaerostipes hadrus (A. hadrus) is a dominant species in the human gut microbiota and considered a beneficial bacterium for producing probiotic butyrate. However, recent studies have suggested that A. hadrus may negatively affect the host through synthesizing fatty acid and metabolizing the anticancer drug 5-fluorouracil, indicating that the impact of A. hadrus is complex and unclear. Therefore, comprehensive genomic studies on A. hadrus need to be performed. We integrated 527 high-quality public A. hadrus genomes and five distinct metagenomic cohorts. We analyzed these data using the approaches of comparative genomics, metagenomics, and protein structure prediction. We also performed validations with culture-based in vitro assays. We constructed the first large-scale pan-genome of A. hadrus (n = 527) and identified 5-fluorouracil metabolism genes as ubiquitous in A. hadrus genomes as butyrate-producing genes. Metagenomic analysis revealed the wide and stable distribution of A. hadrus in healthy individuals, patients with inflammatory bowel disease, and patients with colorectal cancer, with healthy individuals carrying more A. hadrus. The predicted high-quality protein structure indicated that A. hadrus might metabolize 5-fluorouracil by producing bacterial dihydropyrimidine dehydrogenase (encoded by the preTA operon). Through in vitro assays, we validated the short-chain fatty acid production and 5-fluorouracil metabolism abilities of A. hadrus. We observed for the first time that A. hadrus can convert 5-fluorouracil to α-fluoro-β-ureidopropionic acid, which may result from the combined action of the preTA operon and adjacent hydA (encoding bacterial dihydropyrimidinase). Our results offer novel understandings of A. hadrus, exceptionally functional features, and potential applications.
IMPORTANCE: This work provides new insights into the evolutionary relationships, functional characteristics, prevalence, and potential applications of Anaerostipes hadrus.}, }
@article {pmid38469580, year = {2024}, author = {Yakubu, B and Appiah, EM and Adu, AF}, title = {Pangenome Analysis of Helicobacter pylori Isolates from Selected Areas of Africa Indicated Diverse Antibiotic Resistance and Virulence Genes.}, journal = {International journal of genomics}, volume = {2024}, number = {}, pages = {5536117}, pmid = {38469580}, issn = {2314-4378}, abstract = {The challenge facing Helicobacter pylori (H. pylori) infection management in some parts of Africa is the evolution of drug-resistant species, the lack of gold standard in diagnostic methods, and the ineffectiveness of current vaccines against the bacteria. It is being established that even though clinical consequences linked to the bacteria vary geographically, there is rather a generic approach to treatment. This situation has remained problematic in the successful fight against the bacteria in parts of Africa. As a result, this study compared the genomes of selected H. pylori isolates from selected areas of Africa and evaluated their virulence and antibiotic drug resistance, those that are highly pathogenic and are associated with specific clinical outcomes and those that are less virulent and rarely associated with clinical outcomes. 146 genomes of H. pylori isolated from selected locations of Africa were sampled, and bioinformatic tools such as Abricate, CARD RGI, MLST, Prokka, Roary, Phandango, Google Sheets, and iTOLS were used to compare the isolates and their antibiotic resistance or susceptibility. Over 20 k virulence and AMR genes were observed. About 95% of the isolates were genetically diverse, 90% of the isolates harbored shell genes, and 50% harbored cloud and core genes. Some isolates did not retain the cagA and vacA genes. Clarithromycin, metronidazole, amoxicillin, and tinidazole were resistant to most AMR genes (vacA, cagA, oip, and bab). Conclusion. This study found both virulence and AMR genes in all H. pylori strains in all the selected geographies around Africa with differing quantities. MLST, Pangenome, and ORF analyses showed disparities among the isolates. This in general could imply diversities in terms of genetics, evolution, and protein production. Therefore, generic administration of antibiotics such as clarithromycin, amoxicillin, and erythromycin as treatment methods in the African subregion could be contributing to the spread of the bacterium's antibiotic resistance.}, }
@article {pmid38463963, year = {2024}, author = {Young, MG and Straub, TJ and Worby, CJ and Metsky, HC and Gnirke, A and Bronson, RA and van Dijk, LR and Desjardins, CA and Matranga, C and Qu, J and Dodson, K and Schreiber, HL and Manson, AL and Hultgren, SJ and Earl, AM}, title = {Distinct Escherichia coli transcriptional profiles in the guts of recurrent UTI sufferers revealed by pan-genome hybrid selection.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.02.29.582780}, pmid = {38463963}, abstract = {Low-abundance members of microbial communities are difficult to study in their native habitat. This includes Escherichia coli , a minor, but common inhabitant of the gastrointestinal tract and opportunistic pathogen, including of the urinary tract, where it causes most infections. While our understanding of the interactions between uropathogenic Escherichia coli (UPEC) and the bladder is increasing, comparatively little is known about UPEC in its pre-infection reservoir, partly due to its low abundance there (<1% relative abundance). In order to specifically and sensitively explore the genomes and transcriptomes of diverse E. coli from gastrointestinal communities, we developed E. coli PanSelect, a set of probes designed to enrich E. coli 's broad pangenome. First we demonstrated the ability of PanSelect to enrich diverse strains in an unbiased way using a mock community of known composition. Then we enriched E. coli DNA and RNA from human stool microbiomes by 158 and 30-fold, respectively. We also used E. coli PanSelect to explore the gene content and transcriptome of E. coli within the gut microbiomes of women with history of recurrent urinary tract infection (rUTI), finding differential regulation of pathways that suggests that the rUTI gut environment promotes respiratory vs fermentative metabolism. E. coli PanSelect technology holds promise for investigations of native in vivo biology of diverse E. coli in the gut and other environments, where it is a minor component of the microbial community, using unbiased, culture-free shotgun sequencing. This method could also be generally applied to other highly diverse, low abundance bacteria.}, }
@article {pmid38463499, year = {2024}, author = {Li, H and Marin, M and Farhat, MR}, title = {Exploring gene content with pangenome gene graphs.}, journal = {ArXiv}, volume = {}, number = {}, pages = {}, pmid = {38463499}, issn = {2331-8422}, abstract = {MOTIVATION: The gene content regulates the biology of an organism. It varies between species and between individuals of the same species. Although tools have been developed to identify gene content changes in bacterial genomes, none is applicable to collections of large eukaryotic genomes such as the human pangenome.
RESULTS: We developed pangene, a computational tool to identify gene orientation, gene order and gene copy-number changes in a collection of genomes. Pangene aligns a set of input protein sequences to the genomes, resolves redundancies between protein sequences and constructs a gene graph with each genome represented as a walk in the graph. It additionally finds subgraphs that encodes gene content changes. Applied to the human pangenome, pangene identifies known gene-level variations and reveals complex haplotypes that are not well studied before. Pangene also works with high-quality bacterial pangenome and reports similar numbers of core and accessory genes in comparison to existing tools.
Source code at https://github.com/lh3/pangene; pre-built pangene graphs can be downloaded from https://zenodo.org/records/8118576 and visualized at https://pangene.bioinweb.org.}, }
@article {pmid38461665, year = {2024}, author = {Feng, NX and Li, DW and Zhang, F and Bin, H and Huang, YT and Xiang, L and Liu, BL and Cai, QY and Li, YW and Xu, DL and Xie, Y and Mo, CH}, title = {Biodegradation of phthalate acid esters and whole-genome analysis of a novel Streptomyces sp. FZ201 isolated from natural habitats.}, journal = {Journal of hazardous materials}, volume = {469}, number = {}, pages = {133972}, doi = {10.1016/j.jhazmat.2024.133972}, pmid = {38461665}, issn = {1873-3336}, abstract = {Di-n-butyl phthalate (DBP) is one of the most extensively used phthalic acid esters (PAEs) and is considered to be an emerging, globally concerning pollutant. The genus Streptomyces holds promise as a degrader of various organic pollutants, but PAE biodegradation mechanisms by Streptomyces species remain unsolved. In this study, a novel PAE-degrading Streptomyces sp. FZ201 isolated from natural habitats efficiently degraded various PAEs. FZ201 had strong resilience against DBP and exhibited immediate degradation, with kinetics adhering to a first-order model. The comprehensive biodegradation of DBP involves de-esterification, β-oxidation, trans-esterification, and aromatic ring cleavage. FZ201 contains numerous catabolic genes that potentially facilitate PAE biodegradation. The DBP metabolic pathway was reconstructed by genome annotation and intermediate identification. Streptomyces species have an open pangenome with substantial genome expansion events during the evolutionary process, enabling extensive genetic diversity and highly plastic genomes within the Streptomyces genus. FZ201 had a diverse array of highly expressed genes associated with the degradation of PAEs, potentially contributing significantly to its adaptive advantage and efficiency of PAE degradation. Thus, FZ201 is a promising candidate for remediating highly PAE-contaminated environments. These findings enhance our preliminary understanding of the molecular mechanisms employed by Streptomyces for the removal of PAEs.}, }
@article {pmid38459435, year = {2024}, author = {Zhu, L and Liu, H and Li, X and Shi, Y and Yin, X and Pi, X}, title = {Whole-genome sequencing and analysis of Chryseobacterium arthrosphaerae from Rana nigromaculata.}, journal = {BMC microbiology}, volume = {24}, number = {1}, pages = {80}, pmid = {38459435}, issn = {1471-2180}, support = {2022SNJF072//Zhejiang Provincial Science and Technology Cooperation Plan of "Three Rural Areas and Nine Rural Areas"/ ; 2022SNJF072//Zhejiang Provincial Science and Technology Cooperation Plan of "Three Rural Areas and Nine Rural Areas"/ ; 2022SNJF072//Zhejiang Provincial Science and Technology Cooperation Plan of "Three Rural Areas and Nine Rural Areas"/ ; 2022SNJF072//Zhejiang Provincial Science and Technology Cooperation Plan of "Three Rural Areas and Nine Rural Areas"/ ; 2022SNJF072//Zhejiang Provincial Science and Technology Cooperation Plan of "Three Rural Areas and Nine Rural Areas"/ ; 2022SNJF072//Zhejiang Provincial Science and Technology Cooperation Plan of "Three Rural Areas and Nine Rural Areas"/ ; }, abstract = {Chryseobacterium arthrosphaerae strain FS91703 was isolated from Rana nigromaculata in our previous study. To investigate the genomic characteristics, pathogenicity-related genes, antimicrobial resistance, and phylogenetic relationship of this strain, PacBio RS II and Illumina HiSeq 2000 platforms were used for the whole genome sequencing. The genome size of strain FS91703 was 5,435,691 bp and GC content was 37.78%. A total of 4,951 coding genes were predicted; 99 potential virulence factors homologs were identified. Analysis of antibiotic resistance genes revealed that strain FS91703 harbored 10 antibiotic resistance genes in 6 categories and 2 multidrug-resistant efflux pump genes, including adeG and farA. Strain FS91703 was sensitive to β-lactam combination drugs, cephem, monobactam and carbapenems, intermediately resistant to phenicol, and resistant to penicillin, aminoglycosides, tetracycline, fluoroquinolones, and folate pathway inhibitors. Phylogenetic analysis revealed that strain FS91703 and C. arthrosphaerae CC-VM-7[T] were on the same branch of the phylogenetic tree based on 16 S rRNA; the ANI value between them was 96.99%; and the DDH values were 80.2, 72.2 and 81.6% by three default calculation formulae. These results suggested that strain FS91703 was a species of C. arthrosphaerae. Pan-genome analysis showed FS91703 had 566 unique genes compared with 13 other C. arthrosphaerae strains, and had a distant phylogenetic relationship with the other C. arthrosphaerae strains of the same branch in phylogenetic tree based on orthologous genes. The results of this study suggest that strain FS91703 is a multidrug-resistant and highly virulent bacterium, that differs from other C. arthrosphaerae strains at the genomic level. The knowledge about the genomic characteristics and antimicrobial resistance of strain FS91703 provides valuable insights into this rare species, as well as guidance for the treatment of the disease caused by FS91703 in Rana nigromaculata.}, }
@article {pmid38450165, year = {2024}, author = {Zhou, Y and Tu, T and Yao, X and Luo, Y and Yang, Z and Ren, M and Zhang, G and Yu, Y and Lu, A and Wang, Y}, title = {Pan-genome analysis of Streptococcus suis serotype 2 highlights genes associated with virulence and antibiotic resistance.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1362316}, pmid = {38450165}, issn = {1664-302X}, abstract = {Streptococcus suis serotype 2 (SS2) is a Gram-positive bacterium. It is a common and significant pathogen in pigs and a common cause of zoonotic meningitis in humans. It can lead to sepsis, endocarditis, arthritis, and pneumonia. If not diagnosed and treated promptly, it has a high mortality rate. The pan-genome of SS2 is open, and with an increasing number of genes, the core genome and accessory genome may exhibit more pronounced differences. Due to the diversity of SS2, the genes related to its virulence and resistance are still unclear. In this study, a strain of SS2 was isolated from a pig farm in Sichuan Province, China, and subjected to whole-genome sequencing and characterization. Subsequently, we conducted a Pan-Genome-Wide Association Study (Pan-GWAS) on 230 strains of SS2. Our analysis indicates that the core genome is composed of 1,458 genes related to the basic life processes of the bacterium. The accessory genome, consisting of 4,337 genes, is highly variable and a major contributor to the genetic diversity of SS2. Furthermore, we identified important virulence and resistance genes in SS2 through pan-GWAS. The virulence genes of SS2 are mainly associated with bacterial adhesion. In addition, resistance genes in the core genome may confer natural resistance of SS2 to fluoroquinolone and glycopeptide antibiotics. This study lays the foundation for further research on the virulence and resistance of SS2, providing potential new drug and vaccine targets against SS2.}, }
@article {pmid38448140, year = {2024}, author = {Mathur, S and Singh, D and Ranjan, R}, title = {Recent advances in plant translational genomics for crop improvement.}, journal = {Advances in protein chemistry and structural biology}, volume = {139}, number = {}, pages = {335-382}, doi = {10.1016/bs.apcsb.2023.11.009}, pmid = {38448140}, issn = {1876-1631}, abstract = {The growing population, climate change, and limited agricultural resources put enormous pressure on agricultural systems. A plateau in crop yields is occurring and extreme weather events and urbanization threaten the livelihood of farmers. It is imperative that immediate attention is paid to addressing the increasing food demand, ensuring resilience against emerging threats, and meeting the demand for more nutritious, safer food. Under uncertain conditions, it is essential to expand genetic diversity and discover novel crop varieties or variations to develop higher and more stable yields. Genomics plays a significant role in developing abundant and nutrient-dense food crops. An alternative to traditional breeding approach, translational genomics is able to improve breeding programs in a more efficient and precise manner by translating genomic concepts into practical tools. Crop breeding based on genomics offers potential solutions to overcome the limitations of conventional breeding methods, including improved crop varieties that provide more nutritional value and are protected from biotic and abiotic stresses. Genetic markers, such as SNPs and ESTs, contribute to the discovery of QTLs controlling agronomic traits and stress tolerance. In order to meet the growing demand for food, there is a need to incorporate QTLs into breeding programs using marker-assisted selection/breeding and transgenic technologies. This chapter primarily focuses on the recent advances that are made in translational genomics for crop improvement and various omics techniques including transcriptomics, metagenomics, pangenomics, single cell omics etc. Numerous genome editing techniques including CRISPR Cas technology and their applications in crop improvement had been discussed.}, }
@article {pmid38439049, year = {2024}, author = {Chen, C and Wu, S and Sun, Y and Zhou, J and Chen, Y and Zhang, J and Birchler, JA and Han, F and Yang, N and Su, H}, title = {Three near-complete genome assemblies reveal substantial centromere dynamics from diploid to tetraploid in Brachypodium genus.}, journal = {Genome biology}, volume = {25}, number = {1}, pages = {63}, pmid = {38439049}, issn = {1474-760X}, support = {2021YFF1000800//National Key Research and Development Program of China/ ; 32170571//National Natural Science Foundation of China/ ; 2021ZKPY008//Fundamental Research Funds for the Central Universities/ ; No. B21HJ0504//Hainan Yazhou Bay Seed Laboratory/ ; }, abstract = {BACKGROUND: Centromeres are critical for maintaining genomic stability in eukaryotes, and their turnover shapes genome architectures and drives karyotype evolution. However, the co-evolution of centromeres from different species in allopolyploids over millions of years remains largely unknown.
RESULTS: Here, we generate three near-complete genome assemblies, a tetraploid Brachypodium hybridum and its two diploid ancestors, Brachypodium distachyon and Brachypodium stacei. We detect high degrees of sequence, structural, and epigenetic variations of centromeres at base-pair resolution between closely related Brachypodium genomes, indicating the appearance and accumulation of species-specific centromere repeats from a common origin during evolution. We also find that centromere homogenization is accompanied by local satellite repeats bursting and retrotransposon purging, and the frequency of retrotransposon invasions drives the degree of interspecies centromere diversification. We further investigate the dynamics of centromeres during alloploidization process, and find that dramatic genetics and epigenetics architecture variations are associated with the turnover of centromeres between homologous chromosomal pairs from diploid to tetraploid. Additionally, our pangenomes analysis reveals the ongoing variations of satellite repeats and stable evolutionary homeostasis within centromeres among individuals of each Brachypodium genome with different polyploidy levels.
CONCLUSIONS: Our results provide unprecedented information on the genomic, epigenomic, and functional diversity of highly repetitive DNA between closely related species and their allopolyploid genomes at both coarse and fine scale.}, }
@article {pmid38438804, year = {2024}, author = {Niu, D and Feng, N and Xi, S and Xu, J and Su, Y}, title = {Genomics-based analysis of four porcine-derived lactic acid bacteria strains and their evaluation as potential probiotics.}, journal = {Molecular genetics and genomics : MGG}, volume = {299}, number = {1}, pages = {24}, pmid = {38438804}, issn = {1617-4623}, support = {2022YFD1300402//Key Technologies Research and Development Program/ ; 31872362//National Natural Science Foundation of China/ ; 32072688//National Natural Science Foundation of China/ ; }, abstract = {The search for probiotics and exploration of their functions are crucial for livestock farming. Recently, porcine-derived lactic acid bacteria (LAB) have shown great potential as probiotics. However, research on the evaluation of porcine-derived LAB as potential probiotics through genomics-based analysis is relatively limited. The present study analyzed four porcine-derived LAB strains (Lactobacillus johnsonii L16, Latilactobacillus curvatus ZHA1, Ligilactobacillus salivarius ZSA5 and Ligilactobacillus animalis ZSB1) using genomic techniques and combined with in vitro tests to evaluate their potential as probiotics. The genome sizes of the four strains ranged from 1,897,301 bp to 2,318,470 bp with the GC contents from 33.03 to 41.97%. Pan-genomic analysis and collinearity analysis indicated differences among the genomes of four strains. Carbohydrate active enzymes analysis revealed that L. johnsonii L16 encoded more carbohydrate active enzymes than other strains. KEGG pathway analysis and in vitro tests confirmed that L. johnsonii L16 could utilize a wide range of carbohydrates and had good utilization capacity for each carbohydrate. The four strains had genes related to acid tolerance and were tolerant to low pH, with L. johnsonii L16 showing the greatest tolerance. The four strains contained genes related to bile salt tolerance and were able to tolerate 0.1% bile salt. Four strains had antioxidant related genes and exhibited antioxidant activity in in vitro tests. They contained the genes linked with organic acid biosynthesis and exhibited antibacterial activity against enterotoxigenic Escherichia coli K88 (ETEC K88) and Salmonella 6,7:c:1,5, wherein, L. johnsonii L16 and L. salivarius ZSA5 had gene clusters encoding bacteriocin. Results suggest that genome analysis combined with in vitro tests is an effective approach for evaluating different strains as probiotics. The findings of this study indicate that L. johnsonii L16 has the potential as a probiotic strain among the four strains and provide theoretical basis for the development of probiotics in swine production.}, }
@article {pmid38421269, year = {2024}, author = {Deery, J and Carmody, M and Flavin, R and Tomanek, M and O'Keeffe, M and McGlacken, GP and Reen, FJ}, title = {Comparative genomics reveals distinct diversification patterns among LysR-type transcriptional regulators in the ESKAPE pathogen Pseudomonas aeruginosa.}, journal = {Microbial genomics}, volume = {10}, number = {2}, pages = {}, doi = {10.1099/mgen.0.001205}, pmid = {38421269}, issn = {2057-5858}, mesh = {Humans ; Pseudomonas aeruginosa/genetics ; Genomics ; *Pseudomonas Infections ; Pseudomonas ; *Cystic Fibrosis/genetics ; }, abstract = {Pseudomonas aeruginosa, a harmful nosocomial pathogen associated with cystic fibrosis and burn wounds, encodes for a large number of LysR-type transcriptional regulator proteins. To understand how and why LTTR proteins evolved with such frequency and to establish whether any relationships exist within the distribution we set out to identify the patterns underpinning LTTR distribution in P. aeruginosa and to uncover cluster-based relationships within the pangenome. Comparative genomic studies revealed that in the JGI IMG database alone ~86 000 LTTRs are present across the sequenced genomes (n=699). They are widely distributed across the species, with core LTTRs present in >93 % of the genomes and accessory LTTRs present in <7 %. Analysis showed that subsets of core LTTRs can be classified as either variable (typically specific to P. aeruginosa) or conserved (and found to be distributed in other Pseudomonas species). Extending the analysis to the more extensive Pseudomonas database, PA14 rooted analysis confirmed the diversification patterns and revealed PqsR, the receptor for the Pseudomonas quinolone signal (PQS) and 2-heptyl-4-quinolone (HHQ) quorum-sensing signals, to be amongst the most variable in the dataset. Successful complementation of the PAO1 pqsR [-] mutant using representative variant pqsR sequences suggests a degree of structural promiscuity within the most variable of LTTRs, several of which play a prominent role in signalling and communication. These findings provide a new insight into the diversification of LTTR proteins within the P. aeruginosa species and suggests a functional significance to the cluster, conservation and distribution patterns identified.}, }
@article {pmid38421062, year = {2024}, author = {Ji, G and Long, Y and Cai, G and Wang, A and Yan, G and Li, H and Gao, G and Xu, K and Huang, Q and Chen, B and Li, L and Li, F and Nishio, T and Shen, J and Wu, X}, title = {The chromosome-scale genome of wild Brassica oleracea provides insights into the domestication of Brassica plants.}, journal = {Journal of experimental botany}, volume = {}, number = {}, pages = {}, doi = {10.1093/jxb/erae079}, pmid = {38421062}, issn = {1460-2431}, abstract = {The cultivated diploid Brassica oleracea is an important vegetable crop, but the genetic basis of domestication remains largely unclear without high-quality reference genomes of wild B. oleracea. Here, we report the first chromosome-level assembly of the wild Brassica oleracea L. W03 genome, (total genome size, 630.7 Mb; scaffold N50, 64.6 Mb). Using newly assembled W03 genome, we constructed a gene-based B. oleracea pangenome and identified 29,744 core genes, 23,306 dispensable genes, and 1,896 private genes. We resequenced 53 accessions, which represent six potential wild B. oleracea progenitor species. The results of the population genomic analysis showed that wild B. oleracea population had the highest level of diversity and represented the more closely related population of horticultural B. oleracea. Additionally, the WUSCHEL gene was found to play a decisive role in domestication and to be involved in cauliflower and broccoli curd formation. We also illustrate the loss of disease resistance genes during domestication selection. Our results provide deep insights into B. oleracea domestication and will facilitate Brassica crop genetic improvement.}, }
@article {pmid38418560, year = {2024}, author = {Chao, P and Zhang, X and Zhang, L and Yang, A and Wang, Y and Chen, X}, title = {Proteomics-based vaccine targets annotation and design of multi-epitope vaccine against antibiotic-resistant Streptococcus gallolyticus.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {4836}, pmid = {38418560}, issn = {2045-2322}, abstract = {Streptococcus gallolyticus is a non-motile, gram-positive bacterium that causes infective endocarditis. S. gallolyticus has developed resistance to existing antibiotics, and no vaccine is currently available. Therefore, it is essential to develop an effective S. gallolyticus vaccine. Core proteomics was used in this study together with subtractive proteomics and reverse vaccinology approach to find antigenic proteins that could be utilized for the design of the S. gallolyticus multi-epitope vaccine. The pipeline identified two antigenic proteins as potential vaccine targets: penicillin-binding protein and the ATP synthase subunit. T and B cell epitopes from the specific proteins were forecasted employing several immunoinformatics and bioinformatics resources. A vaccine (360 amino acids) was created using a combination of seven cytotoxic T cell lymphocyte (CTL), three helper T cell lymphocyte (HTL), and five linear B cell lymphocyte (LBL) epitopes. To increase immune responses, the vaccine was paired with a cholera enterotoxin subunit B (CTB) adjuvant. The developed vaccine was highly antigenic, non-allergenic, and stable for human use. The vaccine's binding affinity and molecular interactions with the human immunological receptor TLR4 were studied using molecular mechanics/generalized Born surface area (MMGBSA), molecular docking, and molecular dynamic (MD) simulation analyses. Escherichia coli (strain K12) plasmid vector pET-28a (+) was used to examine the ability of the vaccine to be expressed. According to the outcomes of these computer experiments, the vaccine is quite promising in terms of developing a protective immunity against diseases. However, in vitro and animal research are required to validate our findings.}, }
@article {pmid38417638, year = {2024}, author = {Banerjee, R and Robinson, SM and Lahiri, A and Verma, P and Banerjee, AK and Basak, S and Basak, K and Paul, S}, title = {Exploring the resistome and virulome in major sequence types of Acinetobacter baumannii genomes: Correlations with genome divergence and sequence types.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {}, number = {}, pages = {105579}, doi = {10.1016/j.meegid.2024.105579}, pmid = {38417638}, issn = {1567-7257}, abstract = {The increasing global prevalence of antimicrobial resistance in Acinetobacter baumannii has led to concerns regarding the effectiveness of infection treatment. Moreover, the critical role of virulence factor genes in A. baumannii's pathogenesis and its propensity to cause severe disease is of particular importance. Comparative genomics, including multi-locus sequence typing (MLST), enhances our understanding of A. baumannii epidemiology. While there is substantial documentation on A. baumannii, a comprehensive study of the antibiotic-resistant mechanisms and the virulence factors contributing to pathogenesis, and their correlation with Sequence Types (STs) remains incompletely elucidated. In this study, we aim to explore the relationship between antimicrobial resistance genes, virulence factor genes, and STs using genomic data from 223 publicly available A. baumannii strains. The core phylogeny analysis revealed five predominant STs in A. baumannii genomes, linked to their geographical sources of isolation. Furthermore, the resistome and virulome of A. baumannii followed an evolutionary pattern consistent with their pan-genome evolution. Among the major STs, we observed significant variations in resistant genes against "aminoglycoside" and "sulphonamide" antibiotics, highlighting the role of genotypic variations in determining resistance profiles. Furthermore, the presence of virulence factor genes, particularly exotoxin and nutritional / metabolic factor genes, played a crucial role in distinguishing the major STs, suggesting a potential link between genetic makeup and pathogenicity. Understanding these associations can provide valuable insights into A. baumannii's virulence potential and clinical outcomes, enabling the development of effective strategies to combat infections caused by this opportunistic pathogen.}, }
@article {pmid38415665, year = {2024}, author = {Guillén, R and Salinas, C and Mendoza-Álvarez, A and Rubio Rodríguez, LA and Díaz-de Usera, A and Lorenzo-Salazar, JM and González-Montelongo, R and Flores, C and Rodríguez, F}, title = {Genomic epidemiology of the primary methicillin-resistant Staphylococcus aureus clones causing invasive infections in Paraguayan children.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0301223}, doi = {10.1128/spectrum.03012-23}, pmid = {38415665}, issn = {2165-0497}, abstract = {UNLABELLED: Methicillin-resistant Staphylococcus aureus (MRSA) is one of the major human pathogens. It could carry numerous resistance genes and virulence factors in its genome, some of which are related to the severity of the infection. An observational, descriptive, cross-sectional study was designed to molecularly analyze MRSA isolates that cause invasive infections in Paraguayan children from 2009 to 2013. Ten representative MRSA isolates of the main clonal complex identified were analyzed with short-read paired-end sequencing and assessed for the virulome, resistome, and phylogenetic relationships. All the genetically linked MRSA isolates were recovered from diverse clinical sources, patients, and hospitals at broad gap periods. The pan-genomic analysis of these clones revealed three major and different clonal complexes (CC30, CC5, and CC8), each composed of clones closely related to each other. The CC30 genomes prove to be a successful clone, strongly installed and disseminated throughout our country, and closely related to other CC30 public genomes from the region and the world. The CC5 shows the highest genetic variability, and the CC8 carried the complete arginine catabolic mobile element (ACME), closely related to the USA300-NAE-ACME+, identified as the major cause of CA-MRSA infections in North America. Multiple virulence and resistance genes were identified for the first time in this study, highlighting the complex virulence profiles of MRSA circulating in the country. This study opens a wide range of new possibilities for future projects and trials to improve the existing knowledge on the epidemiology of MRSA circulating in Paraguay.
IMPORTANCE: The increasing prevalence of methicillin-resistant Staphylococcus aureus (MRSA) is a public health problem worldwide. The most frequent MRSA clones identified in Paraguay in previous studies (including community and hospital acquired) were the Pediatric (CC5-ST5-IV), the Cordobes-Chilean (CC5-ST5-I), the SouthWest Pacific (CC30-ST30-IV), and the Brazilian (CC8-ST239-III) clones. In this study, the pan-genomic analysis of the most representative MRSA clones circulating in invasive infection in Paraguayan children over the years 2009-2013, such as the CC30-ST30-IV, CC5-ST5-IV, and CC8-ST8-IV, was carried out to evaluate their genetic diversity, their repertoire of virulence factors, and antimicrobial resistance determinants. This revealed multiple virulence and resistance genes, highlighting the complex virulence profiles of MRSA circulating in Paraguay. Our work is the first genomic study of MRSA in Paraguay and will contribute to the development of genomic surveillance in the region and our understanding of the global epidemiology of this pathogen.}, }
@article {pmid38413855, year = {2024}, author = {Wang, H and Xia, F and Xia, Y and Li, J and Hu, Y and Deng, Y and Zou, M}, title = {Pangenome analysis of Shewanella xiamenensis revealed important genetic traits concerning genetic diversity, pathogenicity and antibiotic resistance.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {216}, pmid = {38413855}, issn = {1471-2164}, support = {No. 2023JJ30942//Natural Science Foundation of Hunan Province/ ; }, abstract = {BACKGROUND: Shewanella xiamenensis, widely distributed in natural environments, has long been considered as opportunistic pathogen. Recently, significant changes in the resistance spectrum have been observed in S. xiamenensis, due to acquired antibiotic resistance genes. Therefore, a pan-genome analysis was conducted to illuminate the genomic changes in S. xiamenensis.
RESULTS: Phylogenetic analysis revealed three major clusters and three singletons, among which close relationship between several strains was discovered, regardless of their host and niches. The "open" genomes with diversity of accessory and strain-specific genomes took advantage towards diversity environments. The purifying selection pressure was the main force on genome evolution, especially in conservative genes. Only 53 gene families were under positive selection pressure. Phenotypic resistance analysis revealed 21 strains were classified as multi-drug resistance (MDR). Ten types of antibiotic resistance genes and two heavy metal resistance operons were discovered in S. xiamenensis. Mobile genetic elements and horizontal gene transfer increased genome diversity and were closely related to MDR strains. S. xiamenensis carried a variety of virulence genes and macromolecular secretion systems, indicating their important roles in pathogenicity and adaptability. Type IV secretion system was discovered in 15 genomes with various sequence structures, indicating it was originated from different donors through horizontal gene transfer.
CONCLUSIONS: This study provided with a detailed insight into the changes in the pan-genome of S. xiamenensis, highlighting its capability to acquire new mobile genetic elements and resistance genes for its adaptation to environment and pathogenicity to human and animals.}, }
@article {pmid38413611, year = {2024}, author = {Go, S and Koo, H and Jung, M and Hong, S and Yi, G and Kim, YM}, title = {Pan-chloroplast genomes for accession-specific marker development in Hibiscus syriacus.}, journal = {Scientific data}, volume = {11}, number = {1}, pages = {246}, pmid = {38413611}, issn = {2052-4463}, abstract = {Hibiscus syriacus L. is a renowned ornamental plant. We constructed 95 chloroplast genomes of H. syriacus L. cultivars using a short-read sequencing platform (Illumina) and a long-read sequencing platform (Oxford Nanopore Technology). The following genome assembly, we delineate quadripartite structures encompassing large single-copy, small single-copy, and inverted repeat (IRa and IRb) regions, from 160,231 bp to 161,041 bp. Our comprehensive analyses confirmed the presence of 79 protein-coding genes, 30 tRNA genes, and 4 rRNA genes in the pan-chloroplast genome, consistent with prior research on the H. syriacus chloroplast genome. Subsequent pangenome analysis unveiled widespread genome sequence conservation alongside unique cultivar-specific variant patterns consisting of 193 single-nucleotide polymorphisms and 61 insertions or deletions. The region containing intra-species variant patterns, as identified in this study, has the potential to develop accession-specific molecular markers, enhancing precision in cultivar classification. These findings are anticipated to drive advancements in breeding strategies, augment biodiversity, and unlock the agricultural potential inherent in H. syriacus.}, }
@article {pmid38412041, year = {2024}, author = {Dong, X and Jia, H and Yu, Y and Xiang, Y and Zhang, Y}, title = {Genomic revisitation and reclassification of the genus Providencia.}, journal = {mSphere}, volume = {}, number = {}, pages = {e0073123}, doi = {10.1128/msphere.00731-23}, pmid = {38412041}, issn = {2379-5042}, abstract = {Members of Providencia, although typically opportunistic, can cause severe infections in immunocompromised hosts. Recent advances in genome sequencing provide an opportunity for more precise study of this genus. In this study, we first identified and characterized a novel species named Providencia zhijiangensis sp. nov. It has ≤88.23% average nucleotide identity (ANI) and ≤31.8% in silico DNA-DNA hybridization (dDDH) values with all known Providencia species, which fall significantly below the species-defining thresholds. Interestingly, we found that Providencia stuartii and Providencia thailandensis actually fall under the same species, evidenced by an ANI of 98.59% and a dDDH value of 90.4%. By fusing ANI with phylogeny, we have reclassified 545 genomes within this genus into 20 species, including seven unnamed taxa (provisionally titled Taxon 1-7), which can be further subdivided into 23 lineages. Pangenomic analysis identified 1,550 genus-core genes in Providencia, with coenzymes being the predominant category at 10.56%, suggesting significant intermediate metabolism activity. Resistance analysis revealed that most lineages of the genus (82.61%, 19/23) carry a high number of antibiotic-resistance genes (ARGs) and display diverse resistance profiles. Notably, the majority of ARGs are located on plasmids, underscoring the significant role of plasmids in the resistance evolution within this genus. Three species or lineages (P. stuartii, Taxon 3, and Providencia hangzhouensis L12) that possess the highest number of carbapenem-resistance genes suggest their potential influence on clinical treatment. These findings underscore the need for continued surveillance and study of this genus, particularly due to their role in harboring antibiotic-resistance genes.IMPORTANCEThe Providencia genus, known to harbor opportunistic pathogens, has been a subject of interest due to its potential to cause severe infections, particularly in vulnerable individuals. Our research offers groundbreaking insights into this genus, unveiling a novel species, Providencia zhijiangensis sp. nov., and highlighting the need for a re-evaluation of existing classifications. Our comprehensive genomic assessment offers a detailed classification of 545 genomes into distinct species and lineages, revealing the rich biodiversity and intricate species diversity within the genus. The substantial presence of antibiotic-resistance genes in the Providencia genus underscores potential challenges for public health and clinical treatments. Our study highlights the pressing need for increased surveillance and research, enriching our understanding of antibiotic resistance in this realm.}, }
@article {pmid38412007, year = {2024}, author = {Kim, M and Kim, W and Park, Y and Jung, J and Park, W}, title = {Lineage-specific evolution of Aquibium, a close relative of Mesorhizobium, during habitat adaptation.}, journal = {Applied and environmental microbiology}, volume = {}, number = {}, pages = {e0209123}, doi = {10.1128/aem.02091-23}, pmid = {38412007}, issn = {1098-5336}, abstract = {The novel genus Aquibium that lacks nitrogenase was recently reclassified from the Mesorhizobium genus. The genomes of Aquibium species isolated from water were smaller and had higher GC contents than those of Mesorhizobium species. Six Mesorhizobium species lacking nitrogenase were found to exhibit low similarity in the average nucleotide identity values to the other 24 Mesorhizobium species. Therefore, they were classified as the non-N2-fixing Mesorhizobium lineage (N-ML), an evolutionary intermediate species. The results of our phylogenomic analyses and the loss of Rhizobiales-specific fur/mur indicated that Mesorhizobium species may have evolved from Aquibium species through an ecological transition. Halotolerant and alkali-resistant Aquibium and Mesorhizobium microcysteis belonging to N-ML possessed many tripartite ATP-independent periplasmic transporter and sodium/proton antiporter subunits composed of seven genes (mrpABCDEFG). These genes were not present in the N2-fixing Mesorhizobium lineage (ML), suggesting that genes acquired for adaptation to highly saline and alkaline environments were lost during the evolution of ML as the habitat changed to soil. Land-to-water habitat changes in Aquibium species, close relatives of Mesorhizobium species, could have influenced their genomic evolution by the gain and loss of genes. Our study indicated that lineage-specific evolution could have played a significant role in shaping their genome architecture and conferring their ability to thrive in different habitats.IMPORTANCEPhylogenetic analyses revealed that the Aquibium lineage (AL) and non-N2-fixing Mesorhizobium lineage (N-ML) were monophyletically grouped into distinct clusters separate from the N2-fixing Mesorhizobium lineage (ML). The N-ML, an evolutionary intermediate species having characteristics of both ancestral and descendant species, could provide a genomic snapshot of the genetic changes that occur during adaptation. Genomic analyses of AL, N-ML, and ML revealed that changes in the levels of genes related to transporters, chemotaxis, and nitrogen fixation likely reflect adaptations to different environmental conditions. Our study sheds light on the complex and dynamic nature of the evolution of rhizobia in response to changes in their environment and highlights the crucial role of genomic analysis in understanding these processes.}, }
@article {pmid38411865, year = {2024}, author = {Seo, B and Jeon, K and Kim, WK and Jang, YJ and Cha, KH and Ko, G}, title = {Strain-Specific Anti-Inflammatory Effects of Faecalibacterium prausnitzii Strain KBL1027 in Koreans.}, journal = {Probiotics and antimicrobial proteins}, volume = {}, number = {}, pages = {}, pmid = {38411865}, issn = {1867-1314}, support = {E0170600-07//Korea Food Research Institute/ ; RS-2023-00223831//National Research Foundation of Korea/ ; }, abstract = {Faecalibacterium prausnitzii is one of the most dominant commensal bacteria in the human gut, and certain anti-inflammatory functions have been attributed to a single microbial anti-inflammatory molecule (MAM). Simultaneously, substantial diversity among F. prausnitzii strains is acknowledged, emphasizing the need for strain-level functional studies aimed at developing innovative probiotics. Here, two distinct F. prausnitzii strains, KBL1026 and KBL1027, were isolated from Korean donors, exhibiting notable differences in the relative abundance of F. prausnitzii. Both strains were identified as the core Faecalibacterium amplicon sequence variant (ASV) within the healthy Korean cohort, and their MAM sequences showed a high similarity of 98.6%. However, when a single strain was introduced to mice with dextran sulfate sodium (DSS)-induced colitis, KBL1027 showed the most significant ameliorative effects, including alleviation of colonic inflammation and restoration of gut microbial dysbiosis. Moreover, the supernatant from KBL1027 elevated the secretion of IL-10 cytokine more than that of KBL1026 in mouse bone marrow-derived macrophage (BMDM) cells, suggesting that the strain-specific, anti-inflammatory efficacy of KBL1027 might involve effector compounds other than MAM. Through analysis of the Faecalibacterium pan-genome and comparative genomics, strain-specific functions related to extracellular polysaccharide biosynthesis were identified in KBL1027, which could contribute to the observed morphological disparities. Collectively, our findings highlight the strain-specific, anti-inflammatory functions of F. prausnitzii, even within the same core ASV, emphasizing the influence of their human origin.}, }
@article {pmid38410456, year = {2024}, author = {Kogay, R and Wolf, YI and Koonin, EV}, title = {Defense systems and horizontal gene transfer in bacteria.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.02.09.579689}, pmid = {38410456}, abstract = {Horizontal gene transfer (HGT) is a fundamental process in the evolution of prokaryotes, making major contributions to diversification and adaptation. Typically, HGT is facilitated by mobile genetic elements (MGEs), such as conjugative plasmids and phages that generally impose fitness costs on their hosts. However, a substantial fraction of bacterial genes is involved in defense mechanisms that limit the propagation of MGEs, raising the possibility that they can actively restrict HGT. Here we examine whether defense systems curb HGT by exploring the connections between HGT rate and the presence of 73 defense systems in 12 bacterial species. We found that only 6 defense systems, 3 of which are different CRISPR-Cas subtypes, are associated with the reduced gene gain rate on the scale of species evolution. The hosts of such defense systems tend to have a smaller pangenome size and harbor fewer phage-related genes compared to genomes lacking these systems, suggesting that these defense mechanisms inhibit HGT by limiting the integration of prophages. We hypothesize that restriction of HGT by defense systems is species-specific and depends on various ecological and genetic factors, including the burden of MGEs and fitness effect of HGT in bacterial populations.}, }
@article {pmid38408562, year = {2024}, author = {Huy, NQ and Linh, NC and Son, NT and Ngoc, DB and Tam, TTT and Hang, LTT and Thuyet, BT and Song, LH and Van Quyen, D and Hayer, J and Bañuls, AL and Sy, BT}, title = {Genomic insights into an extensively drug-resistant and hypervirulent Burkholderia dolosa N149 isolate of a novel sequence type (ST2237) from a Vietnamese patient hospitalized for stroke.}, journal = {Journal of global antimicrobial resistance}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.jgar.2024.02.009}, pmid = {38408562}, issn = {2213-7173}, abstract = {OBJECTIVES: Burkholderia dolosa is a clinically important opportunistic pathogen in inpatients. Here we characterized an extensively drug-resistant and hypervirulent B. dolosa isolate from a patient hospitalized for stroke.
METHODS: Resistance to 41 antibiotics was tested with the agar disc diffusion, minimum inhibitory concentration, or broth microdilution method. The complete genome was assembled using short-reads and long-reads and the hybrid de novo assembly method. Allelic profiles obtained by multilocus sequence typing were analyzed using the PubMLST database. Antibiotic-resistance and virulence genes were predicted in silico using public databases and the "baargin" workflow. B. dolosa N149 phylogenetic relationships with all available B. dolosa strains and Burkholderia cepacia complex strains were analyzed using the pangenome obtained with Roary.
RESULTS: B. dolosa N149 displayed extensive resistance to 31 antibiotics and intermediate resistance to 4 antibiotics. The complete genome included three circular chromosomes (6,338,630 bp in total) and one plasmid (167,591 bp). Genotypic analysis revealed various gene clusters (acr, amr, amp, emr, ade, bla and tet) associated with resistance to 35 antibiotic classes. The major intrinsic resistance mechanisms were multidrug efflux pump alterations, inactivation and reduced permeability of targeted antibiotics. Moreover, 91 virulence genes (encoding proteins involved in adherence, formation of capsule, biofilm and colony, motility, phagocytosis inhibition, secretion systems, protease secretion, transmission and quorum sensing) were identified. B. dolosa N149 was assigned to a novel sequence type (ST2237) and formed a mono-phylogenetic clade separated from other B. dolosa strains.
CONCLUSION: This study provided insights into the antimicrobial resistance and virulence mechanisms of B. dolosa.}, }
@article {pmid38407244, year = {2024}, author = {Selvaraj Anand, S and Wu, CT and Bremer, J and Bhatti, M and Treangen, TJ and Kalia, A and Shelburne, SA and Shropshire, WC}, title = {Identification of a novel CG307 sub-clade in third-generation-cephalosporin-resistant Klebsiella pneumoniae causing invasive infections in the USA.}, journal = {Microbial genomics}, volume = {10}, number = {2}, pages = {}, doi = {10.1099/mgen.0.001201}, pmid = {38407244}, issn = {2057-5858}, abstract = {Despite the notable clinical impact, recent molecular epidemiology regarding third-generation-cephalosporin-resistant (3GC-R) Klebsiella pneumoniae in the USA remains limited. We performed whole-genome sequencing of 3GC-R K. pneumoniae bacteraemia isolates collected from March 2016 to May 2022 at a tertiary care cancer centre in Houston, TX, USA, using Illumina and Oxford Nanopore Technologies platforms. A comprehensive comparative genomic analysis was performed to dissect population structure, transmission dynamics and pan-genomic signatures of our 3GC-R K. pneumoniae population. Of the 178 3GC-R K. pneumoniae bacteraemias that occurred during our study time frame, we were able to analyse 153 (86 %) bacteraemia isolates, 126 initial and 27 recurrent isolates. While isolates belonging to the widely prevalent clonal group (CG) 258 were rarely observed, the predominant CG, 307, accounted for 37 (29 %) index isolates and displayed a significant correlation (Pearson correlation test P value=0.03) with the annual frequency of 3GC-R K. pneumoniae bacteraemia. Interestingly, only 11 % (4/37) of CG307 isolates belonged to the commonly detected 'Texas-specific' clade that has been observed in previous Texas-based K. pneumoniae antimicrobial-resistance surveillance studies. We identified nearly half of our CG307 isolates (n=18) belonged to a novel, monophyletic CG307 sub-clade characterized by the chromosomally encoded bla SHV-205 and unique accessory genome content. This CG307 sub-clade was detected in various regions of the USA, with genome sequences from 24 additional strains becoming recently available in the National Center for Biotechnology Information (NCBI) SRA database. Collectively, this study underscores the emergence and dissemination of a distinct CG307 sub-clade that is a prevalent cause of 3GC-R K. pneumoniae bacteraemia among cancer patients seen in Houston, TX, and has recently been isolated throughout the USA.}, }
@article {pmid38402521, year = {2024}, author = {van Westerhoven, AC and Aguilera-Galvez, C and Nakasato-Tagami, G and Shi-Kunne, X and Martinez de la Parte, E and Chavarro-Carrero, E and Meijer, HJG and Feurtey, A and Maryani, N and Ordóñez, N and Schneiders, H and Nijbroek, K and Wittenberg, AHJ and Hofstede, R and García-Bastidas, F and Sørensen, A and Swennen, R and Drenth, A and Stukenbrock, EH and Kema, GHJ and Seidl, MF}, title = {Segmental duplications drive the evolution of accessory regions in a major crop pathogen.}, journal = {The New phytologist}, volume = {}, number = {}, pages = {}, doi = {10.1111/nph.19604}, pmid = {38402521}, issn = {1469-8137}, support = {AG - 442//Bill and Melinda Gates Foundation/ ; 20 04 04 02//Stichting Dioraphte/ ; }, abstract = {Many pathogens evolved compartmentalized genomes with conserved core and variable accessory regions (ARs) that carry effector genes mediating virulence. The fungal plant pathogen Fusarium oxysporum has such ARs, often spanning entire chromosomes. The presence of specific ARs influences the host range, and horizontal transfer of ARs can modify the pathogenicity of the receiving strain. However, how these ARs evolve in strains that infect the same host remains largely unknown. We defined the pan-genome of 69 diverse F. oxysporum strains that cause Fusarium wilt of banana, a significant constraint to global banana production, and analyzed the diversity and evolution of the ARs. Accessory regions in F. oxysporum strains infecting the same banana cultivar are highly diverse, and we could not identify any shared genomic regions and in planta-induced effectors. We demonstrate that segmental duplications drive the evolution of ARs. Furthermore, we show that recent segmental duplications specifically in accessory chromosomes cause the expansion of ARs in F. oxysporum. Taken together, we conclude that extensive recent duplications drive the evolution of ARs in F. oxysporum, which contribute to the evolution of virulence.}, }
@article {pmid38399738, year = {2024}, author = {Straková, D and Sánchez-Porro, C and de la Haba, RR and Ventosa, A}, title = {Decoding the Genomic Profile of the Halomicroarcula Genus: Comparative Analysis and Characterization of Two Novel Species.}, journal = {Microorganisms}, volume = {12}, number = {2}, pages = {}, pmid = {38399738}, issn = {2076-2607}, support = {PID2020-118136GB-I00//MCIN/AEI/10.13039/501100011033/ ; P20_01066 and BIO-213//Junta de Andalucía/ ; }, abstract = {The genus Halomicroarcula, classified within the family Haloarculaceae, presently comprises eight haloarchaeal species isolated from diverse saline habitats, such as solar salterns, hypersaline soils, marine salt, and marine algae. Here, a detailed taxogenomic study and comparative genomic analysis of the genus Halomicroarcula was carried out. In addition, two strains, designated S1CR25-12[T] and S3CR25-11[T], that were isolated from hypersaline soils located in the Odiel Saltmarshes in Huelva (Spain) were included in this study. The 16S rRNA and rpoB' gene sequence analyses affiliated the two strains to the genus Halomicroarcula. Typically, the species of the genus Halomicroarcula possess multiple heterogeneous copies of the 16S rRNA gene, which can lead to misclassification of the taxa and overestimation of the prokaryotic diversity. In contrast, the application of overall genome relatedness indexes (OGRIs) augments the capacity for the precise taxonomic classification and categorization of prokaryotic organisms. The relatedness indexes of the two new isolates, particularly digital DNA-DNA hybridization (dDDH), orthologous average nucleotide identity (OrthoANI), and average amino acid identity (AAI), confirmed that strains S1CR25-12[T] (= CECT 30620[T] = CCM 9252[T]) and S3CR25-11[T] (= CECT 30621[T] = CCM 9254[T]) constitute two novel species of the genus Halomicroarcula. The names Halomicroarcula saliterrae sp. nov. and Halomicroarcula onubensis sp. nov. are proposed for S1CR25-12[T] and S3CR25-11[T], respectively. Metagenomic fragment recruitment analysis, conducted using seven shotgun metagenomic datasets, revealed that the species belonging to the genus Halomicroarcula were predominantly recruited from hypersaline soils found in the Odiel Saltmarshes and the ponds of salterns with high salt concentrations. This reinforces the understanding of the extreme halophilic characteristics associated with the genus Halomicroarcula. Finally, comparing pan-genomes across the twenty Halomicroarcula and Haloarcula species allowed for the identification of commonalities and differences between the species of these two related genera.}, }
@article {pmid38399654, year = {2024}, author = {Rhoads, DD and Pummill, J and Alrubaye, AAK}, title = {Molecular Genomic Analyses of Enterococcus cecorum from Sepsis Outbreaks in Broilers.}, journal = {Microorganisms}, volume = {12}, number = {2}, pages = {}, doi = {10.3390/microorganisms12020250}, pmid = {38399654}, issn = {2076-2607}, support = {none//Arkansas Biosciences Institute/ ; }, abstract = {Extensive genomic analyses of Enterococcus cecorum isolates from sepsis outbreaks in broilers suggest a polyphyletic origin, likely arising from core genome mutations rather than gene acquisition. This species is a normal intestinal flora of avian species with particular isolates associated with osteomyelitis. More recently, this species has been associated with sepsis outbreaks affecting broilers during the first 3 weeks post-hatch. Understanding the genetic and management basis of this new phenotype is critical for developing strategies to mitigate this emerging problem. Phylogenomic analyses of 227 genomes suggest that sepsis isolates are polyphyletic and closely related to both commensal and osteomyelitis isolate genomes. Pangenome analyses detect no gene acquisitions that distinguish all the sepsis isolates. Core genome single nucleotide polymorphism analyses have identified a number of mutations, affecting the protein-coding sequences, that are enriched in sepsis isolates. The analysis of the protein substitutions supports the mutational origins of sepsis isolates.}, }
@article {pmid38397433, year = {2024}, author = {Nedashkovskaya, O and Balabanova, L and Otstavnykh, N and Zhukova, N and Detkova, E and Seitkalieva, A and Bystritskaya, E and Noskova, Y and Tekutyeva, L and Isaeva, M}, title = {In-Depth Genome Characterization and Pan-Genome Analysis of Strain KMM 296, a Producer of Highly Active Alkaline Phosphatase; Proposal for the Reclassification of Cobetia litoralis and Cobetia pacifica as the Later Heterotypic Synonyms of Cobetia amphilecti and Cobetia marina, and Emended Description of the Species Cobetia amphilecti and Cobetia marina.}, journal = {Biomolecules}, volume = {14}, number = {2}, pages = {}, doi = {10.3390/biom14020196}, pmid = {38397433}, issn = {2218-273X}, support = {15.BRK.21.0004 (contract no. 075-15-2021-1052)//the Ministry of Science and Higher Education, Russian Federation/ ; }, abstract = {A strictly aerobic, Gram-stain-negative, rod-shaped, and motile bacterium, designated strain KMM 296, isolated from the coelomic fluid of the mussel Crenomytilus grayanus, was investigated in detail due to its ability to produce a highly active alkaline phosphatase CmAP of the structural family PhoA. A previous taxonomic study allocated the strain to the species Cobetia marina, a member of the family Halomonadaceae of the class Gammaproteobacteria. However, 16S rRNA gene sequencing showed KMM 296's relatedness to Cobetia amphilecti NRIC 0815[T]. The isolate grew with 0.5-19% NaCl at 4-42 °C and hydrolyzed Tweens 20 and 40 and L-tyrosine. The DNA G+C content was 62.5 mol%. The prevalent fatty acids were C18:1 ω7c, C12:0 3-OH, C18:1 ω7c, C12:0, and C17:0 cyclo. The polar lipid profile was characterized by the presence of phosphatidylethanolamine, phosphatidylglycerol, phosphatidic acid, and also an unidentified aminolipid, phospholipid, and a few unidentified lipids. The major respiratory quinone was Q-8. According to phylogenomic and chemotaxonomic evidence, and the nearest neighbors, the strain KMM 296 represents a member of the species C. amphilecti. The genome-based analysis of C. amphilecti NRIC 0815[T] and C. litoralis NRIC 0814[T] showed their belonging to a single species. In addition, the high similarity between the C. pacifica NRIC 0813[T] and C. marina LMG 2217[T] genomes suggests their affiliation to one species. Based on the rules of priority, C. litoralis should be reclassified as a later heterotypic synonym of C. amphilecti, and C. pacifica is a later heterotypic synonym of C. marina. The emended descriptions of the species C. amphilecti and C. marina are also proposed.}, }
@article {pmid38396752, year = {2024}, author = {Evseev, PV and Shneider, MM and Kolupaeva, LV and Kasimova, AA and Timoshina, OY and Perepelov, AV and Shpirt, AM and Shelenkov, AA and Mikhailova, YV and Suzina, NE and Knirel, YA and Miroshnikov, KA and Popova, AV}, title = {New Obolenskvirus Phages Brutus and Scipio: Biology, Evolution, and Phage-Host Interaction.}, journal = {International journal of molecular sciences}, volume = {25}, number = {4}, pages = {}, doi = {10.3390/ijms25042074}, pmid = {38396752}, issn = {1422-0067}, support = {20-75-10113//Russian Science Foundation/ ; }, abstract = {Two novel virulent phages of the genus Obolenskvirus infecting Acinetobacter baumannii, a significant nosocomial pathogen, have been isolated and studied. Phages Brutus and Scipio were able to infect A. baumannii strains belonging to the K116 and K82 capsular types, respectively. The biological properties and genomic organization of the phages were characterized. Comparative genomic, phylogenetic, and pangenomic analyses were performed to investigate the relationship of Brutus and Scipio to other bacterial viruses and to trace the possible origin and evolutionary history of these phages and other representatives of the genus Obolenskvirus. The investigation of enzymatic activity of the tailspike depolymerase encoded in the genome of phage Scipio, the first reported virus infecting A. baumannii of the K82 capsular type, was performed. The study of new representatives of the genus Obolenskvirus and mechanisms of action of depolymerases encoded in their genomes expands knowledge about the diversity of viruses within this taxonomic group and strategies of Obolenskvirus-host bacteria interaction.}, }
@article {pmid38396294, year = {2024}, author = {Sepich-Poore, GD and McDonald, D and Kopylova, E and Guccione, C and Zhu, Q and Austin, G and Carpenter, C and Fraraccio, S and Wandro, S and Kosciolek, T and Janssen, S and Metcalf, JL and Song, SJ and Kanbar, J and Miller-Montgomery, S and Heaton, R and Mckay, R and Patel, SP and Swafford, AD and Korem, T and Knight, R}, title = {Robustness of cancer microbiome signals over a broad range of methodological variation.}, journal = {Oncogene}, volume = {}, number = {}, pages = {}, pmid = {38396294}, issn = {1476-5594}, support = {U24 CA248454/CA/NCI NIH HHS/United States ; }, abstract = {In 2020, we identified cancer-specific microbial signals in The Cancer Genome Atlas (TCGA) [1]. Multiple peer-reviewed papers independently verified or extended our findings [2-12]. Given this impact, we carefully considered concerns by Gihawi et al. [13] that batch correction and database contamination with host sequences artificially created the appearance of cancer type-specific microbiomes. (1) We tested batch correction by comparing raw and Voom-SNM-corrected data per-batch, finding predictive equivalence and significantly similar features. We found consistent results with a modern microbiome-specific method (ConQuR [14]), and when restricting to taxa found in an independent, highly-decontaminated cohort. (2) Using Conterminator [15], we found low levels of human contamination in our original databases (~1% of genomes). We demonstrated that the increased detection of human reads in Gihawi et al. [13] was due to using a newer human genome reference. (3) We developed Exhaustive, a method twice as sensitive as Conterminator, to clean RefSeq. We comprehensively host-deplete TCGA with many human (pan)genome references. We repeated all analyses with this and the Gihawi et al. [13] pipeline, and found cancer type-specific microbiomes. These extensive re-analyses and updated methods validate our original conclusion that cancer type-specific microbial signatures exist in TCGA, and show they are robust to methodology.}, }
@article {pmid38389535, year = {2024}, author = {Patakova, P and Vasylkivska, M and Sedlar, K and Jureckova, K and Bezdicek, M and Lovecka, P and Branska, B and Kastanek, P and Krofta, K}, title = {Whole genome sequencing and characterization of Pantoea agglomerans DBM 3797, endophyte, isolated from fresh hop (Humulus lupulus L.).}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1305338}, pmid = {38389535}, issn = {1664-302X}, abstract = {BACKGROUND: This paper brings new information about the genome and phenotypic characteristics of Pantoea agglomerans strain DBM 3797, isolated from fresh Czech hop (Humulus lupulus) in the Saaz hop-growing region. Although P. agglomerans strains are frequently isolated from different materials, there are not usually thoroughly characterized even if they have versatile metabolism and those isolated from plants may have a considerable potential for application in agriculture as a support culture for plant growth.
METHODS: P. agglomerans DBM 3797 was cultured under aerobic and anaerobic conditions, its metabolites were analyzed by HPLC and it was tested for plant growth promotion abilities, such as phosphate solubilization, siderophore and indol-3-acetic acid productions. In addition, genomic DNA was extracted, sequenced and de novo assembly was performed. Further, genome annotation, pan-genome analysis and selected genome analyses, such as CRISPR arrays detection, antibiotic resistance and secondary metabolite genes identification were carried out.
RESULTS AND DISCUSSION: The typical appearance characteristics of the strain include the formation of symplasmata in submerged liquid culture and the formation of pale yellow colonies on agar. The genetic information of the strain (in total 4.8 Mb) is divided between a chromosome and two plasmids. The strain lacks any CRISPR-Cas system but is equipped with four restriction-modification systems. The phenotypic analysis focused on growth under both aerobic and anaerobic conditions, as well as traits associated with plant growth promotion. At both levels (genomic and phenotypic), the production of siderophores, indoleacetic acid-derived growth promoters, gluconic acid, and enzyme activities related to the degradation of complex organic compounds were found. Extracellular gluconic acid production under aerobic conditions (up to 8 g/l) is probably the result of glucose oxidation by the membrane-bound pyrroloquinoline quinone-dependent enzyme glucose dehydrogenase. The strain has a number of properties potentially beneficial to the hop plant and its closest relatives include the strains also isolated from the aerial parts of plants, yet its safety profile needs to be addressed in follow-up research.}, }
@article {pmid38389084, year = {2024}, author = {Miao, J and Wei, X and Cao, C and Sun, J and Xu, Y and Zhang, Z and Wang, Q and Pan, Y and Wang, Z}, title = {Pig pangenome graph reveals functional features of non-reference sequences.}, journal = {Journal of animal science and biotechnology}, volume = {15}, number = {1}, pages = {32}, pmid = {38389084}, issn = {1674-9782}, support = {2022YFF1000500//National Key Research and Development Program of China/ ; 31941007//National Natural Science Foundation of China/ ; 2016C02054-2//Zhejiang province agriculture (livestock) varieties breeding Key Technology R&D Program/ ; }, abstract = {BACKGROUND: The reliance on a solitary linear reference genome has imposed a significant constraint on our comprehensive understanding of genetic variation in animals. This constraint is particularly pronounced for non-reference sequences (NRSs), which have not been extensively studied.
RESULTS: In this study, we constructed a pig pangenome graph using 21 pig assemblies and identified 23,831 NRSs with a total length of 105 Mb. Our findings revealed that NRSs were more prevalent in breeds exhibiting greater genetic divergence from the reference genome. Furthermore, we observed that NRSs were rarely found within coding sequences, while NRS insertions were enriched in immune-related Gene Ontology terms. Notably, our investigation also unveiled a close association between novel genes and the immune capacity of pigs. We observed substantial differences in terms of frequencies of NRSs between Eastern and Western pigs, and the heat-resistant pigs exhibited a substantial number of NRS insertions in an 11.6 Mb interval on chromosome X. Additionally, we discovered a 665 bp insertion in the fourth intron of the TNFRSF19 gene that may be associated with the ability of heat tolerance in Southern Chinese pigs.
CONCLUSIONS: Our findings demonstrate the potential of a graph genome approach to reveal important functional features of NRSs in pig populations.}, }
@article {pmid38388650, year = {2024}, author = {Pena-Fernández, N and Ocejo, M and van der Graaf-van Bloois, L and Lavín, JL and Kortabarria, N and Collantes-Fernández, E and Hurtado, A and Aduriz, G}, title = {Comparative pangenomic analysis of Campylobacter fetus isolated from Spanish bulls and other mammalian species.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {4347}, pmid = {38388650}, issn = {2045-2322}, support = {Pre2018-086113 funded by MCIN/AEI/ 10.13039/501100011033 and by "ESF Investing in your future"//Ministerio de Ciencia e Innovación/ ; }, abstract = {Campylobacter fetus comprises two closely related mammal-associated subspecies: Campylobacter fetus subsp. fetus (Cff) and Campylobacter fetus subsp. venerealis (Cfv). The latter causes bovine genital campylobacteriosis, a sexually-transmitted disease endemic in Spain that results in significant economic losses in the cattle industry. Here, 33 C. fetus Spanish isolates were whole-genome sequenced and compared with 62 publicly available C. fetus genomes from other countries. Genome-based taxonomic identification revealed high concordance with in silico PCR, confirming Spanish isolates as Cff (n = 4), Cfv (n = 9) and Cfv biovar intermedius (Cfvi, n = 20). MLST analysis assigned the Spanish isolates to 6 STs, including three novel: ST-76 and ST-77 for Cfv and ST-78 for Cff. Core genome SNP phylogenetic analysis of the 95 genomes identified multiple clusters, revealing associations at subspecies and biovar level between genomes with the same ST and separating the Cfvi genomes from Spain and other countries. A genome-wide association study identified pqqL as a Cfv-specific gene and a potential candidate for more accurate identification methods. Functionality analysis revealed variations in the accessory genome of C. fetus subspecies and biovars that deserve further studies. These results provide valuable information about the regional variants of C. fetus present in Spain and the genetic diversity and predicted functionality of the different subspecies.}, }
@article {pmid38385549, year = {2024}, author = {Arizala, D and Arif, M}, title = {Impact of homologous recombination on core genome evolution and host adaptation of Pectobacterium parmentieri.}, journal = {Genome biology and evolution}, volume = {}, number = {}, pages = {}, doi = {10.1093/gbe/evae032}, pmid = {38385549}, issn = {1759-6653}, abstract = {Homologous recombination is a major force mechanism driving bacterial evolution, host adaptability and acquisition of novel virulence traits. Pectobacterium parmentieri is a plant bacterial pathogen distributed worldwide, primarily affecting potatoes, by causing soft rot and blackleg diseases. The goal of this investigation was to understand the impact of homologous recombination on the genomic evolution of P. parmentieri. Analysis of P. parmentieri genomes using Roary revealed a dynamic pan-genome with 3,742 core genes and over 55% accessory genome variability. Bayesian population structure analysis identified seven lineages, indicating species heterogeneity. ClonalFrameML analysis displayed 5,125 recombination events, with the lineage 4 exhibiting the highest events. fastGEAR analysis identified 486 ancestral and 941 recent recombination events ranging 43 bp - 119 kb and 36 bp - 13.96 kb, respectively, suggesting ongoing adaptation. Notably, 11% (412 genes) of the core genome underwent recent recombination, with lineage 1 as the main donor. The prevalence of recent recombination (double compared to ancient) events implies continuous adaptation, possibly driven by global potato trade. Recombination events were found in genes involved in vital cellular processes (DNA replication, DNA repair, RNA processing, homeostasis, and metabolism), pathogenicity determinants (type secretion systems, cell-wall degrading enzymes, iron scavengers, lipopolysaccharides, flagellum, etc.), antimicrobial compounds (phenazine and colicin) and even CRISPR-Cas genes. Overall, these results emphasize the potential role of homologous recombination in P. parmentieri's evolutionary dynamics, influencing host colonization, pathogenicity, adaptive immunity, and ecological fitness.}, }
@article {pmid38385476, year = {2024}, author = {Tariq, DE}, title = {Pangenomic analyses of tuberculosis strains to identify resistomes using computational approaches.}, journal = {JPMA. The Journal of the Pakistan Medical Association}, volume = {74}, number = {1 (Supple-2)}, pages = {S74-S78}, doi = {10.47391/JPMA-DUHS-S15}, pmid = {38385476}, issn = {0030-9982}, abstract = {OBJECTIVE: To locate resistomes in tuberculosis strains, to determine the severity of drug resistance, and to infer its implications with respect to high tuberculosis prevalence in a Third World setting.
METHODS: The pangenomic study was conducted from October 2022 to January 2023 in Sir Syed University of Engineering and Technology, Karachi, and comprised 2012-22 data on multiple sequence alignment to assess the genetic evolution of tuberculosis strains. Antibiotic resistance drug classes were identified using the Canadian Antibiotic Resistance Database, which entailed multidrug-resistant and extremely drug-resistant strains. Also, GenBank was used for tuberculosis genome FASTA (fast-all; nucleotide and protein sequence representation) files, prediction of resistome sequences on the basis of Canadian Antibiotic Resistance Database, and multiple sequence alignment was done in Mauve.
RESULTS: Evolutionarily, the 6 strains identified were structurally similar with polymorphisms in their core chromosomal regions. Their resistome genes showed perfect hits for isoniazid, rifamycin, cephalosporin, fluoroquinolone, aminoglycosides, penem, penam and cephamycin.
CONCLUSION: Drugs discovered in antibiotic resistance genes are now less effective in treatment, and have the potential to develop into more dangerous bacteria, if not monitored. For treatment, staying long durations in hospitals for quality healthcare and supervision in third world countries is unaffordable.}, }
@article {pmid38379925, year = {2024}, author = {Turco, S and Russo, S and Pietrucci, D and Filippi, A and Milanesi, M and Luzzago, C and Garbarino, C and Palladini, G and Chillemi, G and Ricchi, M}, title = {High clonality of Mycobacterium avium subsp. paratuberculosis field isolates from red deer revealed by two different methodological approaches of comparative genomic analysis.}, journal = {Frontiers in veterinary science}, volume = {11}, number = {}, pages = {1301667}, pmid = {38379925}, issn = {2297-1769}, abstract = {Mycobacterium avium subsp. paratuberculosis (MAP) is the aetiological agent of paratuberculosis (Johne's disease) in both domestic and wild ruminants. In the present study, using a whole-genome sequence (WGS) approach, we investigated the genetic diversity of 15 Mycobacterium avium field strains isolated in the last 10 years from red deer inhabiting the Stelvio National Park and affected by paratuberculosis. Combining de novo assembly and a reference-based method, followed by a pangenome analysis, we highlight a very close relationship among 13 MAP field isolates, suggesting that a single infecting event occurred in this population. Moreover, two isolates have been classified as Mycobacterium avium subsp. hominissuis, distinct from the other MAPs under comparison but close to each other. This is the first time that this subspecies has been found in Italy in samples without evident epidemiological correlations, having been isolated in two different locations of the Stelvio National Park and in different years. Our study highlights the importance of a multidisciplinary approach incorporating molecular epidemiology and ecology into traditional infectious disease knowledge in order to investigate the nature of infectious disease in wildlife populations.}, }
@article {pmid38378816, year = {2024}, author = {Schreiber, M and Jayakodi, M and Stein, N and Mascher, M}, title = {Plant pangenomes for crop improvement, biodiversity and evolution.}, journal = {Nature reviews. Genetics}, volume = {}, number = {}, pages = {}, pmid = {38378816}, issn = {1471-0064}, abstract = {Plant genome sequences catalogue genes and the genetic elements that regulate their expression. Such inventories further research aims as diverse as mapping the molecular basis of trait diversity in domesticated plants or inquiries into the origin of evolutionary innovations in flowering plants millions of years ago. The transformative technological progress of DNA sequencing in the past two decades has enabled researchers to sequence ever more genomes with greater ease. Pangenomes - complete sequences of multiple individuals of a species or higher taxonomic unit - have now entered the geneticists' toolkit. The genomes of crop plants and their wild relatives are being studied with translational applications in breeding in mind. But pangenomes are applicable also in ecological and evolutionary studies, as they help classify and monitor biodiversity across the tree of life, deepen our understanding of how plant species diverged and show how plants adapt to changing environments or new selection pressures exerted by human beings.}, }
@article {pmid38376942, year = {2024}, author = {Truong, TC and Park, H and Kim, JH and Tran, VT and Kim, W}, title = {The evolutionary phylodynamics of human parechovirus A type 3 reveal multiple recombination events in South Korea.}, journal = {Journal of medical virology}, volume = {96}, number = {2}, pages = {e29477}, doi = {10.1002/jmv.29477}, pmid = {38376942}, issn = {1096-9071}, support = {NRF-2021R1C1C2003223//National Research Foundation of Korea/ ; NRF-2022R1A2C2012209//National Research Foundation of Korea/ ; }, abstract = {Human parechovirus A (HPeV-A) is a causative agent of respiratory and gastrointestinal illnesses, acute flaccid paralysis encephalitis, meningitis, and neonatal sepsis. To clarify the characteristics of HPeV-A infection in children, 391 fecal specimens were collected from January 2014 to October 2015 from patients with acute gastroenteritis in Seoul, South Korea. Of these, 221/391 (56.5%) HPeV-A positive samples were found in children less than 2 years old. Three HPeV-A genotypes HPeV-A1 (117/221; 52.94%), HPeV-A3 (100/221; 45.25%), and HPeV-A6 (4/221; 1.81%) were detected, among which HPeV-A3 was predominant with the highest recorded value of 58.6% in 2015. Moreover, recombination events in the Korean HPeV-A3 strains were detected. Phylogenetic analysis revealed that the capsid-encoding regions and noncapsid gene 2A of the four Korean HPeV-A3 strains are closely related to the HPeV-A3 strains isolated in Canada in 2007 (Can82853-01), Japan in 2008 (A308/99), and Taiwan in 2011 (TW-03067-2011) while noncapsid genes P2 (2B-2C) and P3 (3A-3D) are closely related to those of HPeV-A1 strains BNI-788St (Germany in 2008) and TW-71594-2010 (Taiwan in 2010). This first report on the whole-genome analysis of HPeV-A3 in Korea provides insight into the evolving status and pathogenesis of HPeVs in children.}, }
@article {pmid38376382, year = {2024}, author = {Cooper, HB and Vezina, B and Hawkey, J and Passet, V and López-Fernández, S and Monk, JM and Brisse, S and Holt, KE and Wyres, KL}, title = {A validated pangenome-scale metabolic model for the Klebsiella pneumoniae species complex.}, journal = {Microbial genomics}, volume = {10}, number = {2}, pages = {}, doi = {10.1099/mgen.0.001206}, pmid = {38376382}, issn = {2057-5858}, abstract = {The Klebsiella pneumoniae species complex (KpSC) is a major source of nosocomial infections globally with high rates of resistance to antimicrobials. Consequently, there is growing interest in understanding virulence factors and their association with cellular metabolic processes for developing novel anti-KpSC therapeutics. Phenotypic assays have revealed metabolic diversity within the KpSC, but metabolism research has been neglected due to experiments being difficult and cost-intensive. Genome-scale metabolic models (GSMMs) represent a rapid and scalable in silico approach for exploring metabolic diversity, which compile genomic and biochemical data to reconstruct the metabolic network of an organism. Here we use a diverse collection of 507 KpSC isolates, including representatives of globally distributed clinically relevant lineages, to construct the most comprehensive KpSC pan-metabolic model to date, KpSC pan v2. Candidate metabolic reactions were identified using gene orthology to known metabolic genes, prior to manual curation via extensive literature and database searches. The final model comprised a total of 3550 reactions, 2403 genes and can simulate growth on 360 unique substrates. We used KpSC pan v2 as a reference to derive strain-specific GSMMs for all 507 KpSC isolates, and compared these to GSMMs generated using a prior KpSC pan-reference (KpSC pan v1) and two single-strain references. We show that KpSC pan v2 includes a greater proportion of accessory reactions (8.8 %) than KpSC pan v1 (2.5 %). GSMMs derived from KpSC pan v2 also generate more accurate growth predictions, with high median accuracies of 95.4 % (aerobic, n=37 isolates) and 78.8 % (anaerobic, n=36 isolates) for 124 matched carbon substrates. KpSC pan v2 is freely available at https://github.com/kelwyres/KpSC-pan-metabolic-model, representing a valuable resource for the scientific community, both as a source of curated metabolic information and as a reference to derive accurate strain-specific GSMMs. The latter can be used to investigate the relationship between KpSC metabolism and traits of interest, such as reservoirs, epidemiology, drug resistance or virulence, and ultimately to inform novel KpSC control strategies.}, }
@article {pmid38376357, year = {2024}, author = {Benning, S and Pritsch, K and Radl, V and Siani, R and Wang, Z and Schloter, M}, title = {(Pan)genomic analysis of two Rhodococcus isolates and their role in phenolic compound degradation.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0378323}, doi = {10.1128/spectrum.03783-23}, pmid = {38376357}, issn = {2165-0497}, abstract = {The genus Rhodococcus is recognized for its potential to degrade a large range of aromatic substances, including plant-derived phenolic compounds. We used comparative genomics in the context of the broader Rhodococcus pan-genome to study genomic traits of two newly described Rhodococcus strains (type-strain Rhodococcus pseudokoreensis R79[T] and Rhodococcus koreensis R85) isolated from apple rhizosphere. Of particular interest was their ability to degrade phenolic compounds as part of an integrated approach to treat apple replant disease (ARD) syndrome. The pan-genome of the genus Rhodococcus based on 109 high-quality genomes was open with a small core (1.3%) consisting of genes assigned to basic cell functioning. The range of genome sizes in Rhodococcus was high, from 3.7 to 10.9 Mbp. Genomes from host-associated strains were generally smaller compared to environmental isolates which were characterized by exceptionally large genome sizes. Due to large genomic differences, we propose the reclassification of distinct groups of rhodococci like the Rhodococcus equi cluster to new genera. Taxonomic species affiliation was the most important factor in predicting genetic content and clustering of the genomes. Additionally, we found genes that discriminated between the strains based on habitat. All members of the genus Rhodococcus had at least one gene involved in the pathway for the degradation of benzoate, while biphenyl degradation was mainly restricted to strains in close phylogenetic relationships with our isolates. The ~40% of genes still unclassified in larger Rhodococcus genomes, particularly those of environmental isolates, need more research to explore the metabolic potential of this genus.IMPORTANCERhodococcus is a diverse, metabolically powerful genus, with high potential to adapt to different habitats due to the linear plasmids and large genome sizes. The analysis of its pan-genome allowed us to separate host-associated from environmental strains, supporting taxonomic reclassification. It was shown which genes contribute to the differentiation of the genomes based on habitat, which can possibly be used for targeted isolation and screening for desired traits. With respect to apple replant disease (ARD), our isolates showed genome traits that suggest potential for application in reducing plant-derived phenolic substances in soil, which makes them good candidates for further testing against ARD.}, }
@article {pmid38375235, year = {2024}, author = {Lagerstrom, KM and Scales, NC and Hadly, EA}, title = {Impressive pan-genomic diversity of E. coli from a wild animal community near urban development reflects human impacts.}, journal = {iScience}, volume = {27}, number = {3}, pages = {109072}, pmid = {38375235}, issn = {2589-0042}, abstract = {Human and domesticated animal waste infiltrates global freshwater, terrestrial, and marine environments, widely disseminating fecal microbes, antibiotics, and other chemical pollutants. Emerging evidence suggests that guts of wild animals are being invaded by our microbes, including Escherichia coli, which face anthropogenic selective pressures to gain antimicrobial resistance (AMR) and increase virulence. However, wild animal sources remain starkly under-represented among genomic sequence repositories. We sequenced whole genomes of 145 E. coli isolates from 55 wild and 13 domestic animal fecal samples, averaging 2 (ranging 1-7) isolates per sample, on a preserve imbedded in a human-dominated landscape in California Bay Area, USA, to assess AMR, virulence, and pan-genomic diversity. With single nucleotide polymorphism analyses we predict potential transmission routes. We illustrate the usefulness of E. coli to aid our understanding of and ability to surveil the emergence of zoonotic pathogens created by the mixing of human and wild bacteria in the environment.}, }
@article {pmid38370750, year = {2024}, author = {Bolognini, D and Halgren, A and Lou, RN and Raveane, A and Rocha, JL and Guarracino, A and Soranzo, N and Chin, J and Garrison, E and Sudmant, PH}, title = {Global diversity, recurrent evolution, and recent selection on amylase structural haplotypes in humans.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.02.07.579378}, pmid = {38370750}, abstract = {The adoption of agriculture, first documented ∼12,000 years ago in the Fertile Crescent, triggered a rapid shift toward starch-rich diets in human populations. Amylase genes facilitate starch digestion and increased salivary amylase copy number has been observed in some modern human populations with high starch intake, though evidence of recent selection is lacking. Here, using 52 long-read diploid assemblies and short read data from ∼5,600 contemporary and ancient humans, we resolve the diversity, evolutionary history, and selective impact of structural variation at the amylase locus. We find that both salivary and pancreatic amylase genes have higher copy numbers in populations with agricultural subsistence compared to fishing, hunting, and pastoral groups. We identify 28 distinct amylase structural architectures and demonstrate that identical structures have arisen independently multiple times throughout recent human history. Using a pangenome graph-based approach to infer structural haplotypes across thousands of humans, we identify extensively duplicated haplotypes present at higher frequencies in modern agricultural populations. Leveraging 534 ancient human genomes we find that duplication-containing haplotypes have increased in frequency more than seven-fold over the last 12,000 years providing evidence for recent selection in Eurasians at this locus comparable in magnitude to that at lactase. Together, our study highlights the strong impact of the agricultural revolution on human genomes and the importance of long-read sequencing in identifying signatures of selection at structurally complex loci.}, }
@article {pmid38370713, year = {2024}, author = {Lypaczewski, P and Chac, D and Dunmire, CN and Tandoc, KM and Chowdhury, F and Khan, AI and Bhuiyan, T and Harris, JB and LaRocque, RC and Calderwood, SB and Ryan, ET and Qadri, F and Shapiro, BJ and Weil, AA}, title = {Diversity of Vibrio cholerae O1 through the human gastrointestinal tract during cholera.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.02.08.579476}, pmid = {38370713}, abstract = {UNLABELLED: Vibrio cholerae O1 causes the diarrheal disease cholera, and the small intestine is the site of active infection. During cholera, cholera toxin is secreted from V. cholerae and induces a massive fluid influx into the small intestine, which causes vomiting and diarrhea. Typically, V. cholerae genomes are sequenced from bacteria passed in stool, but rarely from vomit, a fluid that may more closely represents the site of active infection. We hypothesized that the V. cholerae O1 population bottlenecks along the gastrointestinal tract would result in reduced genetic variation in stool compared to vomit. To test this, we sequenced V. cholerae genomes from ten cholera patients with paired vomit and stool samples. Genetic diversity was low in both vomit and stool, consistent with a single infecting population rather than co-infection with divergent V. cholerae O1 lineages. The number of single nucleotide variants decreased between vomit and stool in four patients, increased in two, and remained unchanged in four. The number of genes encoded in the V. cholerae genome decreased between vomit and stool in eight patients and increased in two. Pangenome analysis of assembled short-read sequencing demonstrated that the toxin-coregulated pilus operon more frequently contained deletions in genomes from vomit compared to stool. However, these deletions were not detected by PCR or long-read sequencing, indicating that interpreting gene presence or absence patterns from short-read data alone may be incomplete. Overall, we found that V. cholerae O1 isolated from stool is genetically similar to V. cholerae recovered from the upper intestinal tract.
IMPORTANCE: Vibrio cholerae O1, the bacterium that causes cholera, is ingested in contaminated food or water and then colonizes the upper small intestine and is excreted in stool. Shed V. cholerae genomes are usually studied, but V. cholerae isolated from vomit may be more representative of where V. cholerae colonizes in the upper intestinal epithelium. V. cholerae may experience bottlenecks, or large reductions in bacterial population sizes or genetic diversity, as it passes through the gut. Passage through the gut may select for distinct V. cholerae mutants that are adapted for survival and gut colonization. We did not find strong evidence for such adaptive mutations, and instead observed that passage through the gut results in modest reductions in V. cholerae genetic diversity, and only in some patients. These results fill a gap in our understanding of the V. cholerae life cycle, transmission, and evolution.}, }
@article {pmid38370577, year = {2023}, author = {Yuan, C and An, T and Li, X and Zou, J and Lin, Z and Gu, J and Hu, R and Fang, Z}, title = {Genomic analysis of Ralstonia pickettii reveals the genetic features for potential pathogenicity and adaptive evolution in drinking water.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1272636}, pmid = {38370577}, issn = {1664-302X}, abstract = {Ralstonia pickettii, the most critical clinical pathogen of the genus Ralstonia, has been identified as a causative agent of numerous harmful infections. Additionally, Ralstonia pickettii demonstrates adaptability to extreme environmental conditions, such as those found in drinking water. In this study, we conducted a comprehensive genomic analysis to investigate the genomic characteristics related to potential pathogenicity and adaptive evolution in drinking water environments of Ralstonia pickettii. Through phylogenetic analysis and population genetic analysis, we divided Ralstonia pickettii into five Groups, two of which were associated with drinking water environments. The open pan-genome with a large and flexible gene repertoire indicated a high genetic plasticity. Significant differences in functional enrichment were observed between the core- and pan-genome of different groups. Diverse mobile genetic elements (MGEs), extensive genomic rearrangements, and horizontal gene transfer (HGT) events played a crucial role in generating genetic diversity. In drinking water environments, Ralstonia pickettii exhibited strong adaptability, and the acquisition of specific adaptive genes was potentially facilitated by genomic islands (GIs) and HGT. Furthermore, environmental pressures drove the adaptive evolution of Ralstonia pickettii, leading to the accumulation of unique mutations in key genes. These mutations may have a significant impact on various physiological functions, particularly carbon metabolism and energy metabolism. The presence of virulence-related elements associated with macromolecular secretion systems, virulence factors, and antimicrobial resistance indicated the potential pathogenicity of Ralstonia pickettii, making it capable of causing multiple nosocomial infections. This study provides comprehensive insights into the potential pathogenicity and adaptive evolution of Ralstonia pickettii in drinking water environments from a genomic perspective.}, }
@article {pmid38365240, year = {2024}, author = {Shen, L and Liu, Y and Chen, L and Lei, T and Ren, P and Ji, M and Song, W and Lin, H and Su, W and Wang, S and Rooman, M and Pucci, F}, title = {Genomic basis of environmental adaptation in the widespread poly-extremophilic Exiguobacterium group.}, journal = {The ISME journal}, volume = {18}, number = {1}, pages = {}, doi = {10.1093/ismejo/wrad020}, pmid = {38365240}, issn = {1751-7370}, support = {U21A20176//National Natural Science Foundation of China/ ; 2019QZKK0503//Second Tibetan Plateau Scientific Expedition and Research/ ; 92251304//National Natural Science Foundation of China/ ; swzy202008//Open Project Fund of Anhui Provincial Key Laboratory of Protection and Utilization of Important Biological Resources/ ; 2022AH010012//Anhui Provincial Engineering Research Centre for Molecular Detection and Diagnostics/ ; }, abstract = {Delineating cohesive ecological units and determining the genetic basis for their environmental adaptation are among the most important objectives in microbiology. In the last decade, many studies have been devoted to characterizing the genetic diversity in microbial populations to address these issues. However, the impact of extreme environmental conditions, such as temperature and salinity, on microbial ecology and evolution remains unclear so far. In order to better understand the mechanisms of adaptation, we studied the (pan)genome of Exiguobacterium, a poly-extremophile bacterium able to grow in a wide range of environments, from permafrost to hot springs. To have the genome for all known Exiguobacterium type strains, we first sequenced those that were not yet available. Using a reverse-ecology approach, we showed how the integration of phylogenomic information, genomic features, gene and pathway enrichment data, regulatory element analyses, protein amino acid composition, and protein structure analyses of the entire Exiguobacterium pangenome allows to sharply delineate ecological units consisting of mesophilic, psychrophilic, halophilic-mesophilic, and halophilic-thermophilic ecotypes. This in-depth study clarified the genetic basis of the defined ecotypes and identified some key mechanisms driving the environmental adaptation to extreme environments. Our study points the way to organizing the vast microbial diversity into meaningful ecologically units, which, in turn, provides insight into how microbial communities adapt and respond to different environmental conditions in a changing world.}, }
@article {pmid38364871, year = {2024}, author = {Wu, Z and Li, T and Jiang, Z and Zheng, J and Gu, Y and Liu, Y and Liu, Y and Xie, Z}, title = {Human pangenome analysis of sequences missing from the reference genome reveals their widespread evolutionary, phenotypic, and functional roles.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkae086}, pmid = {38364871}, issn = {1362-4962}, support = {2019YFA0904400//National Key Research and Development Program of China/ ; 202201020336//Science and Technology Program of Guangzhou, China/ ; }, abstract = {Nonreference sequences (NRSs) are DNA sequences present in global populations but absent in the current human reference genome. However, the extent and functional significance of NRSs in the human genomes and populations remains unclear. Here, we de novo assembled 539 genomes from five genetically divergent human populations using long-read sequencing technology, resulting in the identification of 5.1 million NRSs. These were merged into 45284 unique NRSs, with 29.7% being novel discoveries. Among these NRSs, 38.7% were common across the five populations, and 35.6% were population specific. The use of a graph-based pangenome approach allowed for the detection of 565 transcript expression quantitative trait loci on NRSs, with 426 of these being novel findings. Moreover, 26 NRS candidates displayed evidence of adaptive selection within human populations. Genes situated in close proximity to or intersecting with these candidates may be associated with metabolism and type 2 diabetes. Genome-wide association studies revealed 14 NRSs to be significantly associated with eight phenotypes. Additionally, 154 NRSs were found to be in strong linkage disequilibrium with 258 phenotype-associated SNPs in the GWAS catalogue. Our work expands the understanding of human NRSs and provides novel insights into their functions, facilitating evolutionary and biomedical researches.}, }
@article {pmid38361606, year = {2024}, author = {Bonnie, JK and Ahmed, OY and Langmead, B}, title = {DandD: Efficient measurement of sequence growth and similarity.}, journal = {iScience}, volume = {27}, number = {3}, pages = {109054}, doi = {10.1016/j.isci.2024.109054}, pmid = {38361606}, issn = {2589-0042}, abstract = {Genome assembly databases are growing rapidly. The redundancy of sequence content between a new assembly and previous ones is neither conceptually nor algorithmically easy to measure. We introduce pertinent methods and DandD, a tool addressing how much new sequence is gained when a sequence collection grows. DandD can describe how much structural variation is discovered in each new human genome assembly and when discoveries will level off in the future. DandD uses a measure called δ ("delta"), developed initially for data compression and chiefly dependent on k-mer counts. DandD rapidly estimates δ using genomic sketches. We propose δ as an alternative to k-mer-specific cardinalities when computing the Jaccard coefficient, thereby avoiding the pitfalls of a poor choice of k. We demonstrate the utility of DandD's functions for estimating δ, characterizing the rate of pangenome growth, and computing all-pairs similarities using k-independent Jaccard.}, }
@article {pmid38356529, year = {2024}, author = {Zhou, L and Liu, D and Zhu, Y and Zhang, Z and Chen, S and Zhao, G and Zheng, H}, title = {Advance typing of Vibrio parahaemolyticus through the mtlA and aer gene: A high-resolution, cost-effective approach.}, journal = {Heliyon}, volume = {10}, number = {3}, pages = {e25642}, pmid = {38356529}, issn = {2405-8440}, abstract = {Vibrio parahaemolyticus is a significant cause of foodborne illness, and its incidence worldwide is on the rise. It is thus imperative to develop a straightforward and efficient method for typing strains of this pathogen. In this study, we conducted a pangenome analysis of 75 complete genomes of V. parahaemolyticus and identified the core gene mtlA with the highest degree of variation, which distinguished 44 strains and outperformed traditional seven-gene-based MLST when combined with aer, another core gene with high degree of variation. The mtlA gene had higher resolution to type strains with a close relationship compared to the traditional MLST genes in the phylogenetic tree built by core genomes. Strong positive selection was also detected in the gene mtlA (ω > 1), representing adaptive and evolution in response to the environment. Therefore, the panel of gene mtlA and aer may serve as a tool for the typing of V. parahaemolyticus, potentially contributing to the prevention and control of this foodborne disease.}, }
@article {pmid38355307, year = {2024}, author = {Leonard, AS and Mapel, XM and Pausch, H}, title = {Pangenome genotyped structural variation improves molecular phenotype mapping in cattle.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.278267.123}, pmid = {38355307}, issn = {1549-5469}, abstract = {Expression and splicing quantitative trait loci (e/sQTL) are large contributors to phenotypic variability. Achieving sufficient statistical power for e/sQTL mapping requires large cohorts with both genotypes and molecular phenotypes, and so the genomic variation is often called from short-read alignments which are unable to comprehensively resolve structural variation. Here we build a pangenome from 16 HiFi haplotype-resolved assemblies to identify small and structural variation and genotype them with PanGenie in 307 short-read samples. We find high (>90%) concordance of PanGenie-genotyped and DeepVariant-called small variation, and confidently genotype close to 21M small and 43k structural variants in the larger population. We validate 85% of these structural variants (with MAF>0.1) directly with a subset of 25 short-read samples that also have medium coverage HiFi reads. We then conduct e/sQTL mapping with this comprehensive variant set in a subset of 117 cattle that have testis transcriptome data and find 92 structural variants as causal candidates for eQTL and 73 for sQTL. We find that roughly half of top associated structural variants affecting expression or splicing are transposable elements, such as SV-eQTLs for STN1 and MYH7 and SV-sQTLs for CEP89 and ASAH2 Extensive linkage disequilibrium between small and structural variation results in only 28 additional eQTL and 17 sQTL discovered when including SVs, although many top associated SVs are compelling candidates.}, }
@article {pmid38352482, year = {2024}, author = {Raghuram, V and Petit, RA and Karol, Z and Mehta, R and Weissman, DB and Read, TD}, title = {Average Nucleotide Identity based Staphylococcus aureus strain grouping allows identification of strain-specific genes in the pangenome.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.01.29.577756}, pmid = {38352482}, abstract = {UNLABELLED: Staphylococcus aureus causes both hospital and community acquired infections in humans worldwide. Due to the high incidence of infection S. aureus is also one of the most sampled and sequenced pathogens today, providing an outstanding resource to understand variation at the bacterial subspecies level. We processed and downsampled 83,383 public S. aureus Illumina whole genome shotgun sequences and 1,263 complete genomes to produce 7,954 representative substrains. Pairwise comparison of core gene Average Nucleotide Identity (ANI) revealed a natural boundary of 99.5% that could be used to define 145 distinct strains within the species. We found that intermediate frequency genes in the pangenome (present in 10-95% of genomes) could be divided into those closely linked to strain background ("strain-concentrated") and those highly variable within strains ("strain-diffuse"). Non-core genes had different patterns of chromosome location; notably, strain-diffuse associated with prophages, strain-concentrated with the vSaβ genome island and rare genes (<10% frequency) concentrated near the origin of replication. Antibiotic genes were enriched in the strain-diffuse class, while virulence genes were distributed between strain-diffuse, strain-concentrated, core and rare classes. This study shows how different patterns of gene movement help create strains as distinct subspecies entities and provide insight into the diverse histories of important S. aureus functions.
IMPORTANCE: We analyzed the genomic diversity of Staphylococcus aureus , a globally prevalent bacterial species that causes serious infections in humans. Our goal was to build a genetic picture of the different strains of S. aureus and which genes may be associated with them. We used a large public dataset (>84,000 genomes) that was re-processed and subsampled to remove redundancy. We found that individual genomes could be grouped into strains by sharing > 99.5% identical nucleotide sequence of the core part of their genome. We also showed that a portion of genes that are present in intermediate frequency in the species are strongly associated with some strains but completely absent from others, suggesting a role in strain-specificity. This work lays the foundation for understanding individual gene histories of the S. aureus species and also outlines strategies for processing large bacterial genomic datasets.}, }
@article {pmid38351383, year = {2024}, author = {Li, X and Wang, Y and Cai, C and Ji, J and Han, F and Zhang, L and Chen, S and Zhang, L and Yang, Y and Tang, Q and Bucher, J and Wang, X and Yang, L and Zhuang, M and Zhang, K and Lv, H and Bonnema, G and Zhang, Y and Cheng, F}, title = {Large-scale gene expression alterations introduced by structural variation drive morphotype diversification in Brassica oleracea.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {38351383}, issn = {1546-1718}, support = {31972411//National Natural Science Foundation of China (National Science Foundation of China)/ ; 31722048//National Natural Science Foundation of China (National Science Foundation of China)/ ; 32172578//National Natural Science Foundation of China (National Science Foundation of China)/ ; 201809110159//China Scholarship Council (CSC)/ ; }, abstract = {Brassica oleracea, globally cultivated for its vegetable crops, consists of very diverse morphotypes, characterized by specialized enlarged organs as harvested products. This makes B. oleracea an ideal model for studying rapid evolution and domestication. We constructed a B. oleracea pan-genome from 27 high-quality genomes representing all morphotypes and their wild relatives. We identified structural variations (SVs) among these genomes and characterized these in 704 B. oleracea accessions using graph-based genome tools. We show that SVs exert bidirectional effects on the expression of numerous genes, either suppressing through DNA methylation or promoting probably by harboring transcription factor-binding elements. The following examples illustrate the role of SVs modulating gene expression: SVs promoting BoPNY and suppressing BoCKX3 in cauliflower/broccoli, suppressing BoKAN1 and BoACS4 in cabbage and promoting BoMYBtf in ornamental kale. These results provide solid evidence for the role of SVs as dosage regulators of gene expression, driving B. oleracea domestication and diversification.}, }
@article {pmid38346372, year = {2024}, author = {Chen, Y and Li, X and Liu, Z and Hu, M and Ma, J and Luo, Y and Zhang, Q and Li, L and Zhao, X and Zhao, M and Liu, W and Liu, Y}, title = {Genomic analysis and experimental pathogenic characterization of Riemerella anatipestifer isolates from chickens in China.}, journal = {Poultry science}, volume = {103}, number = {4}, pages = {103497}, doi = {10.1016/j.psj.2024.103497}, pmid = {38346372}, issn = {1525-3171}, abstract = {Waterfowl have a high likelihood of being infected with Riemerella anatipestifer. Although the pathogen is found in domestic ducks, turkeys, geese, and wild birds, there is little information available about the consequences of infection during egg laying and hatching in chickens. Here, we present the first report of a novel sequence type of R. anatipestifer S63 isolated from chickens in China. On the basis of pan-genome analysis, we showed S63's genome occupies a distinct branch with other R. anatipestifer isolates from other hosts. Galleria mellonella larval tests indicated that S63 is less virulent than R. anatipestifer Ra36 isolated from ducks. Ducks and hens are susceptible to S63 infection. There is no mortality rate for chickens or ducks, but adult chickens experience neurological symptoms that reduce egg production and hatching rates. In chickens, S63 might be passed vertically from parents to offspring, resulting in "jelly-like" lifeless embryos. Using quantitative PCR, S63 was detected in the brain, liver, reproductive organs, and embryos. As far as we know, this is the first report of R. anatipestifer in hens, a disease that can reduce egg productivity, lower hatching rates, and produce jelly-like lifeless embryos, and the first report to raise the possibility that hens can be infected by roosters via semen.}, }
@article {pmid38339052, year = {2024}, author = {Zhang, T and Chen, X and Yan, W and Li, M and Huang, W and Liu, Q and Li, Y and Guo, C and Shu, Y}, title = {Comparative Analysis of Chloroplast Pan-Genomes and Transcriptomics Reveals Cold Adaptation in Medicago sativa.}, journal = {International journal of molecular sciences}, volume = {25}, number = {3}, pages = {}, doi = {10.3390/ijms25031776}, pmid = {38339052}, issn = {1422-0067}, support = {LH2022C050//Natural Science Foundation of Heilongjiang Province/ ; HSDSSCX2023-42//the Innovative Project for Postgraduate Students of Harbin Normal University/ ; FKL-202203//the Open Fund of Yunnan Province Flower Breeding Key Laboratory/ ; 202301BD070001-208//Agriculture Joint Special Project of Science and Technology Plan Project of Yunnan Science and Technology Department/ ; 530000210000000013742//the Green Food Brand Build a Special Project (Floriculture) supported by Yunnan Provincial Fi-nance Department/ ; U21A20182//the Natural and Science Foundation of China/ ; Qian Liu//Construction of Tengchong Rural Revitalization Technological Innovation County/ ; }, abstract = {Alfalfa (Medicago sativa) is a perennial forage legume that is widely distributed all over the world; therefore, it has an extremely complex genetic background. Though population structure and phylogenetic studies have been conducted on a large group of alfalfa nuclear genomes, information about the chloroplast genomes is still lacking. Chloroplast genomes are generally considered to be conservative and play an important role in population diversity analysis and species adaptation in plants. Here, 231 complete alfalfa chloroplast genomes were successfully assembled from 359 alfalfa resequencing data, on the basis of which the alfalfa chloroplast pan-genome was constructed. We investigated the genetic variations of the alfalfa chloroplast genome through comparative genomic, genetic diversity, phylogenetic, population genetic structure, and haplotype analysis. Meanwhile, the expression of alfalfa chloroplast genes under cold stress was explored through transcriptome analysis. As a result, chloroplast genomes of 231 alfalfa lack an IR region, and the size of the chloroplast genome ranges from 125,192 bp to 126,105 bp. Using population structure, haplotypes, and construction of a phylogenetic tree, it was found that alfalfa populations could be divided into four groups, and multiple highly variable regions were found in the alfalfa chloroplast genome. Transcriptome analysis showed that tRNA genes were significantly up-regulated in the cold-sensitive varieties, while rps7, rpl32, and ndhB were down-regulated, and the editing efficiency of ycf1, ycf2, and ndhF was decreased in the cold-tolerant varieties, which may be due to the fact that chloroplasts store nutrients through photosynthesis to resist cold. The huge number of genetic variants in this study provide powerful resources for molecular markers.}, }
@article {pmid38337024, year = {2024}, author = {Andorf, CM and Haley, OC and Hayford, RK and Portwood, JL and Harding, S and Sen, S and Cannon, EK and Gardiner, JM and Kim, HS and Woodhouse, MR}, title = {PanEffect: a pan-genome visualization tool for variant effects in maize.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btae073}, pmid = {38337024}, issn = {1367-4811}, abstract = {UNLABELLED: Understanding the effects of genetic variants is crucial for accurately predicting traits and functional outcomes. Recent approaches have utilized artificial intelligence and protein language models to score all possible missense variant effects at the proteome level for a single genome, but a reliable tool is needed to explore these effects at the pan-genome level. To address this gap, we introduce a new tool called PanEffect. We implemented PanEffect at MaizeGDB to enable a comprehensive examination of the potential effects of coding variants across 50 maize genomes. The tool allows users to visualize over 550 million possible amino acid substitutions in the B73 maize reference genome and to observe the effects of the 2.3 million natural variations in the maize pan-genome. Each variant effect score, calculated from the Evolutionary Scale Modeling (ESM) protein language model, shows the log-likelihood ratio difference between B73 and all variants in the pan-genome. These scores are shown using heatmaps spanning benign outcomes to potential functional consequences. Additionally, PanEffect displays secondary structures and functional domains along with the variant effects, offering additional functional and structural context. Using PanEffect, researchers now have a platform to explore protein variants and identify genetic targets for crop enhancement.
AVAILABILITY: The PanEffect code is freely available on GitHub (https://github.com/Maize-Genetics-and-Genomics-Database/PanEffect). A maize implementation of PanEffect and underlying datasets are available at MaizeGDB (https://www.maizegdb.org/effect/maize/).
SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, }
@article {pmid38334660, year = {2024}, author = {Bachari, A and Nassar, N and Telukutla, S and Zomer, R and Piva, TJ and Mantri, N}, title = {Evaluating the Mechanism of Cell Death in Melanoma Induced by the Cannabis Extract PHEC-66.}, journal = {Cells}, volume = {13}, number = {3}, pages = {}, doi = {10.3390/cells13030268}, pmid = {38334660}, issn = {2073-4409}, support = {Not Applicable//MGC Pharmaceuticals Ltd/ ; }, abstract = {Research suggests the potential of using cannabinoid-derived compounds to function as anticancer agents against melanoma cells. Our recent study highlighted the remarkable in vitro anticancer effects of PHEC-66, an extract from Cannabis sativa, on the MM418-C1, MM329, and MM96L melanoma cell lines. However, the complete molecular mechanism behind this action remains to be elucidated. This study aims to unravel how PHEC-66 brings about its antiproliferative impact on these cell lines, utilising diverse techniques such as real-time polymerase chain reaction (qPCR), assays to assess the inhibition of CB1 and CB2 receptors, measurement of reactive oxygen species (ROS), apoptosis assays, and fluorescence-activated cell sorting (FACS) for apoptosis and cell cycle analysis. The outcomes obtained from this study suggest that PHEC-66 triggers apoptosis in these melanoma cell lines by increasing the expression of pro-apoptotic markers (BAX mRNA) while concurrently reducing the expression of anti-apoptotic markers (Bcl-2 mRNA). Additionally, PHEC-66 induces DNA fragmentation, halting cell progression at the G1 cell cycle checkpoint and substantially elevating intracellular ROS levels. These findings imply that PHEC-66 might have potential as an adjuvant therapy in the treatment of malignant melanoma. However, it is essential to conduct further preclinical investigations to delve deeper into its potential and efficacy.}, }
@article {pmid38332778, year = {2024}, author = {Sakurai, A and Suzuki, M and Hayashi, K and Doi, Y}, title = {Taxonomic classification of genus Aeromonas using open reading frame-based binarized structure network analysis.}, journal = {Fujita medical journal}, volume = {10}, number = {1}, pages = {8-15}, doi = {10.20407/fmj.2023-007}, pmid = {38332778}, issn = {2189-7255}, abstract = {OBJECTIVES: Taxonomic assignment based on whole-genome sequencing data facilitates clear demarcation of species within a complex genus. Here, we applied a unique pan-genome phylogenetic method, open reading frame (ORF)-based binarized structure network analysis (OSNA), for taxonomic inference of Aeromonas spp., a complex taxonomic group consisting of 30 species.
METHODS: Data from 335 publicly available Aeromonas genomes, including the reference genomes of 30 species, were used to build a phylogenetic tree using OSNA. In OSNA, whole-genome structures are expressed as binary sequences based on the presence or absence of ORFs, and a tree is generated using neighbor-net, a distance-based method for constructing phylogenetic networks from binary sequences. The tree built by OSNA was compared to that constructed by a core-genome single-nucleotide polymorphism (SNP)-based analysis. Furthermore, the orthologous average nucleotide identity (OrthoANI) values of the sequences that clustered in a single clade in the OSNA-based tree were calculated.
RESULTS: The phylogenetic tree constructed with OSNA successfully delineated the majority of species of the genus Aeromonas forming conspecific clades for individual species, which was corroborated by OrthoANI values. Moreover, the OSNA-based phylogenetic tree demonstrated high compositional similarity to the core-genome SNP-based phylogenetic tree, supported by the Fowlkes-Mallows index.
CONCLUSIONS: We propose that OSNA is a useful tool in predicting the taxonomic classification of complex bacterial genera.}, }
@article {pmid38329369, year = {2024}, author = {Newcomer, EP and Fishbein, SRS and Zhang, K and Hink, T and Reske, KA and Cass, C and Iqbal, ZH and Struttmann, EL and Burnham, C-AD and Dubberke, ER and Dantas, G}, title = {Genomic surveillance of Clostridioides difficile transmission and virulence in a healthcare setting.}, journal = {mBio}, volume = {}, number = {}, pages = {e0330023}, doi = {10.1128/mbio.03300-23}, pmid = {38329369}, issn = {2150-7511}, abstract = {Clostridioides difficile infection (CDI) is a major cause of healthcare-associated diarrhea, despite the widespread implementation of contact precautions for patients with CDI. Here, we investigate strain contamination in a hospital setting and the genomic determinants of disease outcomes. Across two wards over 6 months, we selectively cultured C. difficile from patients (n = 384) and their environments. Whole-genome sequencing (WGS) of 146 isolates revealed that most C. difficile isolates were from clade 1 (131/146, 89.7%), while only one isolate of the hypervirulent ST1 was recovered. Of culture-positive admissions (n = 79), 19 (24%) patients were colonized with toxigenic C. difficile on admission to the hospital. We defined 25 strain networks at ≤2 core gene single nucleotide polymorphisms; two of these networks contain strains from different patients. Strain networks were temporally linked (P < 0.0001). To understand the genomic correlates of the disease, we conducted WGS on an additional cohort of C. difficile (n = 102 isolates) from the same hospital and confirmed that clade 1 isolates are responsible for most CDI cases. We found that while toxigenic C. difficile isolates are associated with the presence of cdtR, nontoxigenic isolates have an increased abundance of prophages. Our pangenomic analysis of clade 1 isolates suggests that while toxin genes (tcdABER and cdtR) were associated with CDI symptoms, they are dispensable for patient colonization. These data indicate that toxigenic and nontoxigenic C. difficile contamination persist in a hospital setting and highlight further investigation into how accessory genomic repertoires contribute to C. difficile colonization and disease.IMPORTANCEClostridioides difficile infection remains a leading cause of hospital-associated diarrhea, despite increased antibiotic stewardship and transmission prevention strategies. This suggests a changing genomic landscape of C. difficile. Our study provides insight into the nature of prevalent C. difficile strains in a hospital setting and transmission patterns among carriers. Longitudinal sampling of surfaces and patient stool revealed that both toxigenic and nontoxigenic strains of C. difficile clade 1 dominate these two wards. Moreover, quantification of transmission in carriers of these clade 1 isolates underscores the need to revisit infection prevention measures in this patient group. We identified unique genetic signatures associated with virulence in this clade. Our data highlight the complexities of preventing transmission of this pathogen in a hospital setting and the need to investigate the mechanisms of in vivo persistence and virulence of prevalent lineages in the host gut microbiome.}, }
@article {pmid38322985, year = {2024}, author = {Zhong, C and Hu, G and Hu, C and Xu, C and Zhang, Z and Ning, K}, title = {Comparative genomics analysis reveals genetic characteristics and nitrogen fixation profile of Bradyrhizobium.}, journal = {iScience}, volume = {27}, number = {2}, pages = {108948}, doi = {10.1016/j.isci.2024.108948}, pmid = {38322985}, issn = {2589-0042}, abstract = {Bradyrhizobium is a genus of nitrogen-fixing bacteria, with some species producing nodules in leguminous plants. Investigations into Bradyrhizobium have recently revealed its substantial genetic resources and agricultural benefits, but a comprehensive survey of its genetic diversity and functional properties is lacking. Using a panel of various strains (N = 278), this study performed a comparative genomics analysis to anticipate genes linked with symbiotic nitrogen fixation. Bradyrhizobium's pan-genome consisted of 84,078 gene families, containing 824 core genes and 42,409 accessory genes. Core genes were mainly involved in crucial cell processes, while accessory genes served diverse functions, including nitrogen fixation and nodulation. Three distinct genetic profiles were identified based on the presence/absence of gene clusters related to nodulation, nitrogen fixation, and secretion systems. Most Bradyrhizobium strains from soil and non-leguminous plants lacked major nif/nod genes and were evolutionarily more closely related. These findings shed light on Bradyrhizobium's genetic features for symbiotic nitrogen fixation.}, }
@article {pmid38307885, year = {2024}, author = {Zheng, Z and Zhu, M and Zhang, J and Liu, X and Hou, L and Liu, W and Yuan, S and Luo, C and Yao, X and Liu, J and Yang, Y}, title = {A sequence-aware merger of genomic structural variations at population scale.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {960}, pmid = {38307885}, issn = {2041-1723}, abstract = {Merging structural variations (SVs) at the population level presents a significant challenge, yet it is essential for conducting comprehensive genotypic analyses, especially in the era of pangenomics. Here, we introduce PanPop, a tool that utilizes an advanced sequence-aware SV merging algorithm to efficiently merge SVs of various types. We demonstrate that PanPop can merge and optimize the majority of multiallelic SVs into informative biallelic variants. We show its superior precision and lower rates of missing data compared to alternative software solutions. Our approach not only enables the filtering of SVs by leveraging multiple SV callers for enhanced accuracy but also facilitates the accurate merging of large-scale population SVs. These capabilities of PanPop will help to accelerate future SV-related studies.}, }
@article {pmid38304712, year = {2024}, author = {Chen, P and Wang, S and Li, H and Qi, X and Hou, Y and Ma, T}, title = {Comparative genomic analyses of Cutibacterium granulosum provide insights into genomic diversity.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1343227}, doi = {10.3389/fmicb.2024.1343227}, pmid = {38304712}, issn = {1664-302X}, abstract = {Cutibacterium granulosum, a commensal bacterium found on human skin, formerly known as Propionibacterium granulosum, rarely causes infections and is generally considered non-pathogenic. Recent research has revealed the transferability of the multidrug-resistant plasmid pTZC1 between C. granulosum and Cutibacterium acnes, the latter being an opportunistic pathogen in surgical site infections. However, there is a noticeable lack of research on the genome of C. granulosum, and the genetic landscape of this species remains largely uncharted. We investigated the genomic features and evolutionary structure of C. granulosum by analyzing a total of 30 Metagenome-Assembled Genomes (MAGs) and isolate genomes retrieved from public databases, as well as those generated in this study. A pan-genome of 6,077 genes was identified for C. granulosum. Remarkably, the 'cloud genes' constituted 62.38% of the pan-genome. Genes associated with mobilome: prophages, transposons [X], defense mechanisms [V] and replication, recombination and repair [L] were enriched in the cloud genome. Phylogenomic analysis revealed two distinct mono-clades, highlighting the genomic diversity of C. granulosum. The genomic diversity was further confirmed by the distribution of Average Nucleotide Identity (ANI) values. The functional profiles analysis of C. granulosum unveiled a wide range of potential Antibiotic Resistance Genes (ARGs) and virulence factors, suggesting its potential tolerance to various environmental challenges. Subtype I-E of the CRISPR-Cas system was the most abundant in these genomes, a feature also detected in C. acnes genomes. Given the widespread distribution of C. granulosum strains within skin microbiome, our findings make a substantial contribution to our broader understanding of the genetic diversity, which may open new avenues for investigating the mechanisms and treatment of conditions such as acne vulgaris.}, }
@article {pmid38302106, year = {2024}, author = {Hayeck, TJ and Li, Y and Mosbruger, TL and Bradfield, JP and Gleason, AG and Damianos, G and Shaw, GT and Duke, JL and Conlin, LK and Turner, TN and Fernández-Viña, MA and Sarmady, M and Monos, DS}, title = {The Impact of Patterns in Linkage Disequilibrium and Sequencing Quality on the Imprint of Balancing Selection.}, journal = {Genome biology and evolution}, volume = {}, number = {}, pages = {}, doi = {10.1093/gbe/evae009}, pmid = {38302106}, issn = {1759-6653}, abstract = {Regions under balancing selection are characterized by dense polymorphisms and multiple persistent haplotypes, along with other sequence complexities. Successful identification of these patterns depends on both the statistical approach and the quality of sequencing. To address this challenge, at first, a new statistical method called LD-ABF was developed, employing efficient Bayesian techniques to effectively test for balancing selection. LD-ABF demonstrated the most robust detection of selection in a variety of simulation scenarios, compared against a range of existing tests/tools (Tajima's D, HKA, Dng, BetaScan, and BalLerMix). Furthermore, the impact of the quality of sequencing on detection of balancing selection was explored, as well, using: 1) SNP genotyping and exome data, 2) targeted high-resolution HLA genotyping (IHIW), and 3) whole-genome long-read sequencing data (Pangenome). In the analysis of SNP genotyping and exome data, we identified known targets and 38 new selection signatures in genes not previously linked to balancing selection. To further investigate the impact of sequencing quality on detection of balancing selection, a detailed investigation of the MHC was performed with high-resolution HLA typing data. Higher quality sequencing revealed the HLA-DQ genes consistently demonstrated strong selection signatures otherwise not observed from the sparser SNP array and exome data. The HLA-DQ selection signature was also replicated in the Pangenome samples using considerably less samples but, with high quality long-read sequence data. The improved statistical method, coupled with higher quality sequencing, leads to more consistent identification of selection and enhanced localization of variants under selection, particularly in complex regions.}, }
@article {pmid38298071, year = {2024}, author = {Lee, J and Cha, IT and Lee, KE and Son, YK and Cho, S and Seol, D}, title = {Complete genome sequence and potential pathogenic assessment of Flavobacterium plurextorum RSG-18 isolated from the gut of Schlegel's black rockfish, Sebastes schlegelii.}, journal = {Environmental microbiology reports}, volume = {}, number = {}, pages = {}, doi = {10.1111/1758-2229.13226}, pmid = {38298071}, issn = {1758-2229}, support = {NIBR202134204//National Institute of Biological Resources, Ministry of Environment/ ; }, abstract = {Flavobacterium plurextorum is a potential fish pathogen of interest, previously isolated from diseased rainbow trout (Oncorhynchus mykiss) and oomycete-infected chum salmon (Oncorhynchus keta) eggs. We report here the first complete genome sequence of F. plurextorum RSG-18 isolated from the gut of Schlegel's black rockfish (Sebastes schlegelii). The genome of RSG-18 consists of a circular chromosome of 5,610,911 bp with a 33.57% GC content, containing 4858 protein-coding genes, 18 rRNAs, 63 tRNAs and 1 tmRNA. A comparative analysis was conducted on 11 Flavobacterium species previously reported as pathogens or isolated from diseased fish to confirm the potential pathogenicity of RSG-18. In the SEED classification, RSG-18 was found to have 36 genes categorized in 'Virulence, Disease and Defense'. Across all Flavobacterium species, a total of 16 antibiotic resistance genes and 61 putative virulence factors were identified. All species had at least one phage region and type I, III and IX secretion systems. In pan-genomic analysis, core genes consist of genes linked to phages, integrases and matrix-tolerated elements associated with pathology. The complete genome sequence of F. plurextorum RSG-18 will serve as a foundation for future research, enhancing our understanding of Flavobacterium pathogenicity in fish and contributing to the development of effective prevention strategies.}, }
@article {pmid38295902, year = {2024}, author = {Chen, Y and Xiang, G and Liu, P and Zhou, X and Guo, P and Wu, Z and Yang, J and Chen, P and Huang, J and Liao, K}, title = {Prevalence and Molecular Characteristics of Ceftazidime-avibactam Resistance among carbapenem-resistant Pseudomonas aeruginosa Clinical Isolates.}, journal = {Journal of global antimicrobial resistance}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.jgar.2024.01.014}, pmid = {38295902}, issn = {2213-7173}, abstract = {BACKGROUND: Resistance against ceftazidime-avibactam (CZA) in carbapenem-resistant Pseudomonas aeruginosa (CRPA) is emerging. This study was aimed at detecting the prevalence and molecular characteristics of CZA-resistant CRPA clinical isolates in Guangdong Province, China.
METHODS: The antimicrobial susceptibility profile of these strains was determined. A subset of sixteen CZA-resistant CRPA isolates was analyzed by whole genome sequencing (WGS). Genetic surroundings of carbapenem resistance genes and pan-genome-wide association analysis were further studied.
RESULTS: Of the 250 CRPA isolates, CZA resistance rate was 6.4% (16/250). The minimum inhibitory concentration (MIC) of CZA range was from 0.25 to >256 mg/L. MIC50 and MIC90 were 2/4 and 8/4 mg/L, respectively. Among the sixteen CZA-resistant CRPA strains, 31.3% (5/16) of them carried class B carbapenem resistance genes including blaIMP-4, blaIMP-45 and blaVIM-2, located on IncP-2 megaplasmids or chromosome, respectively. Pan-genome-wide association analysis of accessory genes for CZA-susceptible or -resistant CRPA isolates showed that PA1874, a hypothetical protein containing BapA prefix-like domain, was enriched in CZA-resistant group significantly.
CONCLUSIONS: Class B carbapenem resistance genes play important roles in CZA resistance. Meanwhile, PA1874 gene may be a novel mechanism involving in CZA resistance. It is necessary to continually monitor CZA-resistant CRPA isolates.}, }
@article {pmid38293557, year = {2023}, author = {Kim, B and Han, SR and Lee, H and Oh, TJ}, title = {Insights into group-specific pattern of secondary metabolite gene cluster in Burkholderia genus.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1302236}, doi = {10.3389/fmicb.2023.1302236}, pmid = {38293557}, issn = {1664-302X}, abstract = {Burkholderia is a versatile strain that has expanded into several genera. It has been steadily reported that the genome features of Burkholderia exhibit activities ranging from plant growth promotion to pathogenicity across various isolation areas. The objective of this study was to investigate the secondary metabolite patterns of 366 Burkholderia species through comparative genomics. Samples were selected based on assembly quality assessment and similarity below 80% in average nucleotide identity. Duplicate samples were excluded. Samples were divided into two groups using FastANI analysis. Group A included B. pseudomallei complex. Group B included B. cepacia complex. The limitations of MLST were proposed. The detection of genes was performed, including environmental and virulence-related genes. In the pan-genome analysis, each complex possessed a similar pattern of cluster for orthologous groups. Group A (n = 185) had 14,066 cloud genes, 2,465 shell genes, 682 soft-core genes, and 2,553 strict-core genes. Group B (n = 181) had 39,867 cloud genes, 4,986 shell genes, 324 soft-core genes, 222 core genes, and 2,949 strict-core genes. AntiSMASH was employed to analyze the biosynthetic gene cluster (BGC). The results were then utilized for network analysis using BiG-SCAPE and CORASON. Principal component analysis was conducted and a table was constructed using the results obtained from antiSMASH. The results were divided into Group A and Group B. We expected the various species to show similar patterns of secondary metabolite gene clusters. For in-depth analysis, a network analysis of secondary metabolite gene clusters was conducted, exemplified by BiG-SCAPE analysis. Depending on the species and complex, Burkholderia possessed several kinds of siderophore. Among them, ornibactin was possessed in most Burkholderia and was clustered into 4,062 clans. There was a similar pattern of gene clusters depending on the species. NRPS_04014 belonged to siderophore BGCs including ornibactin and indigoidine. However, it was observed that each family included a similar species. This suggests that, besides siderophores being species-specific, the ornibactin gene cluster itself might also be species-specific. The results suggest that siderophores are associated with environmental adaptation, possessing a similar pattern of siderophore gene clusters among species, which could provide another perspective on species-specific environmental adaptation mechanisms.}, }
@article {pmid38290434, year = {2024}, author = {Joubert, PM and Krasileva, KV}, title = {Distinct genomic contexts predict gene presence-absence variation in different pathotypes of Magnaporthe oryzae.}, journal = {Genetics}, volume = {}, number = {}, pages = {}, doi = {10.1093/genetics/iyae012}, pmid = {38290434}, issn = {1943-2631}, abstract = {Fungi use the accessory gene content of their pangenomes to adapt to their environments. While gene presence-absence variation (PAV) contributes to shaping accessory gene reservoirs, the genomic contexts that shape these events remain unclear. Since pangenome studies are typically species-wide and do not analyze different populations separately, it is yet to be uncovered whether PAV patterns and mechanisms are consistent across populations. Fungal plant pathogens are useful models for studying PAV because they rely on it to adapt to their hosts, and members of a species often infect distinct hosts. We analyzed gene PAV in the blast fungus, Magnaporthe oryzae (syn. Pyricularia oryzae), and found that PAV genes involved in host-pathogen and microbe-microbe interactions may drive the adaptation of the fungus to its environment. We then analyzed genomic and epigenomic features of PAV and observed that proximity to transposable elements, gene GC content, gene length, expression level in the host, and histone H3K27me3 marks were different between PAV genes and conserved genes. We used these features to construct a model that was able to predict whether a gene is likely to experience PAV with high precision (86.06%) and recall (92.88%) in M. oryzae. Finally, we found that PAV genes in the rice and wheat pathotypes of M. oryzae differed in their number and their genomic context. Our results suggest that genomic and epigenomic features of gene PAV can be used to better understand and predict fungal pangenome evolution. We also show that substantial intra-species variation can exist in these features.}, }
@article {pmid38281938, year = {2024}, author = {Zaccaron, AZ and Stergiopoulos, I}, title = {Analysis of five near-complete genome assemblies of the tomato pathogen Cladosporium fulvum uncovers additional accessory chromosomes and structural variations induced by transposable elements effecting the loss of avirulence genes.}, journal = {BMC biology}, volume = {22}, number = {1}, pages = {25}, pmid = {38281938}, issn = {1741-7007}, support = {1557995//Directorate for Biological Sciences/ ; CA-D-PPA-2185-H//National Institute of Food and Agriculture/ ; }, abstract = {BACKGROUND: Fungal plant pathogens have dynamic genomes that allow them to rapidly adapt to adverse conditions and overcome host resistance. One way by which this dynamic genome plasticity is expressed is through effector gene loss, which enables plant pathogens to overcome recognition by cognate resistance genes in the host. However, the exact nature of these loses remains elusive in many fungi. This includes the tomato pathogen Cladosporium fulvum, which is the first fungal plant pathogen from which avirulence (Avr) genes were ever cloned and in which loss of Avr genes is often reported as a means of overcoming recognition by cognate tomato Cf resistance genes. A recent near-complete reference genome assembly of C. fulvum isolate Race 5 revealed a compartmentalized genome architecture and the presence of an accessory chromosome, thereby creating a basis for studying genome plasticity in fungal plant pathogens and its impact on avirulence genes.
RESULTS: Here, we obtained near-complete genome assemblies of four additional C. fulvum isolates. The genome assemblies had similar sizes (66.96 to 67.78 Mb), number of predicted genes (14,895 to 14,981), and estimated completeness (98.8 to 98.9%). Comparative analysis that included the genome of isolate Race 5 revealed high levels of synteny and colinearity, which extended to the density and distribution of repetitive elements and of repeat-induced point (RIP) mutations across homologous chromosomes. Nonetheless, structural variations, likely mediated by transposable elements and effecting the deletion of the avirulence genes Avr4E, Avr5, and Avr9, were also identified. The isolates further shared a core set of 13 chromosomes, but two accessory chromosomes were identified as well. Accessory chromosomes were significantly smaller in size, and one carried pseudogenized copies of two effector genes. Whole-genome alignments further revealed genomic islands of near-zero nucleotide diversity interspersed with islands of high nucleotide diversity that co-localized with repeat-rich regions. These regions were likely generated by RIP, which generally asymmetrically affected the genome of C. fulvum.
CONCLUSIONS: Our results reveal new evolutionary aspects of the C. fulvum genome and provide new insights on the importance of genomic structural variations in overcoming host resistance in fungal plant pathogens.}, }
@article {pmid38279113, year = {2024}, author = {Rajput, J and Chandra, G and Jain, C}, title = {Co-linear chaining on pangenome graphs.}, journal = {Algorithms for molecular biology : AMB}, volume = {19}, number = {1}, pages = {4}, pmid = {38279113}, issn = {1748-7188}, abstract = {Pangenome reference graphs are useful in genomics because they compactly represent the genetic diversity within a species, a capability that linear references lack. However, efficiently aligning sequences to these graphs with complex topology and cycles can be challenging. The seed-chain-extend based alignment algorithms use co-linear chaining as a standard technique to identify a good cluster of exact seed matches that can be combined to form an alignment. Recent works show how the co-linear chaining problem can be efficiently solved for acyclic pangenome graphs by exploiting their small width and how incorporating gap cost in the scoring function improves alignment accuracy. However, it remains open on how to effectively generalize these techniques for general pangenome graphs which contain cycles. Here we present the first practical formulation and an exact algorithm for co-linear chaining on cyclic pangenome graphs. We rigorously prove the correctness and computational complexity of the proposed algorithm. We evaluate the empirical performance of our algorithm by aligning simulated long reads from the human genome to a cyclic pangenome graph constructed from 95 publicly available haplotype-resolved human genome assemblies. While the existing heuristic-based algorithms are faster, the proposed algorithm provides a significant advantage in terms of accuracy. Implementation (https://github.com/at-cg/PanAligner).}, }
@article {pmid38278862, year = {2024}, author = {Mondol, SM and Islam, I and Islam, MR and Shakil, SK and Rakhi, NN and Mustary, JF and Amiruzzaman, and Gomes, DJ and Shahjalal, HM and Rahaman, MM}, title = {Genomic landscape of NDM-1 producing multidrug-resistant Providencia stuartii causing burn wound infections in Bangladesh.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {2246}, pmid = {38278862}, issn = {2045-2322}, support = {LS2019935//Ministry of Education, Government of the People's Republic of Bangladesh/ ; }, abstract = {The increasing antimicrobial resistance in Providencia stuartii (P. stuartii) worldwide, particularly concerning for immunocompromised and burn patients, has raised concern in Bangladesh, where the significance of this infectious opportunistic pathogen had been previously overlooked, prompting a need for investigation. The two strains of P. stuartii (P. stuartii SHNIBPS63 and P. stuartii SHNIBPS71) isolated from wound swab of two critically injured burn patients were found to be multidrug-resistant and P. stuartii SHNIBPS63 showed resistance to all the 22 antibiotics tested as well as revealed the co-existence of blaVEB-6 (Class A), blaNDM-1 (Class B), blaOXA-10 (Class D) beta lactamase genes. Complete resistance to carbapenems through the production of NDM-1, is indicative of an alarming situation as carbapenems are considered to be the last line antibiotic to combat this pathogen. Both isolates displayed strong biofilm-forming abilities and exhibited resistance to copper, zinc, and iron, in addition to carrying multiple genes associated with metal resistance and the formation of biofilms. The study also encompassed a pangenome analysis utilizing a dataset of eighty-six publicly available P. stuartii genomes (n = 86), revealing evidence of an open or expanding pangenome for P. stuartii. Also, an extensive genome-wide analysis of all the P. stuartii genomes revealed a concerning global prevalence of diverse antimicrobial resistance genes, with a particular alarm raised over the abundance of carbapenem resistance gene blaNDM-1. Additionally, this study highlighted the notable genetic diversity within P. stuartii, significant informations about phylogenomic relationships and ancestry, as well as potential for cross-species transmission, raising important implications for public health and microbial adaptation across different environments.}, }
@article {pmid38271481, year = {2024}, author = {Barbitoff, YA and Ushakov, MO and Lazareva, TE and Nasykhova, YA and Glotov, AS and Predeus, AV}, title = {Bioinformatics of germline variant discovery for rare disease diagnostics: current approaches and remaining challenges.}, journal = {Briefings in bioinformatics}, volume = {25}, number = {2}, pages = {}, doi = {10.1093/bib/bbad508}, pmid = {38271481}, issn = {1477-4054}, support = {075-15-2021-1058//Ministry of Science and Higher Education of Russian Federation/ ; }, abstract = {Next-generation sequencing (NGS) has revolutionized the field of rare disease diagnostics. Whole exome and whole genome sequencing are now routinely used for diagnostic purposes; however, the overall diagnosis rate remains lower than expected. In this work, we review current approaches used for calling and interpretation of germline genetic variants in the human genome, and discuss the most important challenges that persist in the bioinformatic analysis of NGS data in medical genetics. We describe and attempt to quantitatively assess the remaining problems, such as the quality of the reference genome sequence, reproducible coverage biases, or variant calling accuracy in complex regions of the genome. We also discuss the prospects of switching to the complete human genome assembly or the human pan-genome and important caveats associated with such a switch. We touch on arguably the hardest problem of NGS data analysis for medical genomics, namely, the annotation of genetic variants and their subsequent interpretation. We highlight the most challenging aspects of annotation and prioritization of both coding and non-coding variants. Finally, we demonstrate the persistent prevalence of pathogenic variants in the coding genome, and outline research directions that may enhance the efficiency of NGS-based disease diagnostics.}, }
@article {pmid38270699, year = {2024}, author = {Singh, S and Singh, R and Priyadarsini, S and Ola, AL}, title = {Genomics empowering conservation action and improvement of celery in the face of climate change.}, journal = {Planta}, volume = {259}, number = {2}, pages = {42}, pmid = {38270699}, issn = {1432-2048}, abstract = {Integration of genomic approaches like whole genome sequencing, functional genomics, evolutionary genomics, and CRISPR/Cas9-based genome editing has accelerated the improvement of crop plants including leafy vegetables like celery in the face of climate change. The anthropogenic climate change is a real peril to the existence of life forms on our planet, including human and plant life. Climate change is predicted to be a significant threat to biodiversity and food security in the coming decades and is rapidly transforming global farming systems. To avoid the ghastly future in the face of climate change, the elucidation of shifts in the geographical range of plant species, species adaptation, and evolution is necessary for plant scientists to develop climate-resilient strategies. In the post-genomics era, the increasing availability of genomic resources and integration of multifaceted genomics elements is empowering biodiversity conservation action, restoration efforts, and identification of genomic regions adaptive to climate change. Genomics has accelerated the true characterization of crop wild relatives, genomic variations, and the development of climate-resilient varieties to ensure food security for 10 billion people by 2050. In this review, we have summarized the applications of multifaceted genomic tools, like conservation genomics, whole genome sequencing, functional genomics, genome editing, pangenomics, in the conservation and adaptation of plant species with a focus on celery, an aromatic and medicinal Apiaceae vegetable. We focus on how conservation scientists can utilize genomics and genomic data in conservation and improvement.}, }
@article {pmid38268053, year = {2024}, author = {Uruén, C and Fernandez, A and Arnal, JL and Del Pozo, M and Amoribieta, MC and de Blas, I and Jurado, P and Calvo, JH and Gottschalk, M and González-Vázquez, LD and Arenas, M and Marín, CM and Arenas, J}, title = {Genomic and phenotypic analysis of invasive Streptococcus suis isolated in Spain reveals genetic diversification and associated virulence traits.}, journal = {Veterinary research}, volume = {55}, number = {1}, pages = {11}, pmid = {38268053}, issn = {1297-9716}, support = {PID2020-114617RB-100//Ministerio de Ciencia e Innovación/Agencia Española de investigación/ ; LMP58_21//Departamento de Educación, Cultura y Deporte, Gobierno de Aragón/ ; }, abstract = {Streptococcus suis is a zoonotic pathogen that causes a major health problem in the pig production industry worldwide. Spain is one of the largest pig producers in the world. This work aimed to investigate the genetic and phenotypic features of invasive S. suis isolates recovered in Spain. A panel of 156 clinical isolates recovered from 13 Autonomous Communities, representing the major pig producers, were analysed. MLST and serotyping analysis revealed that most isolates (61.6%) were assigned to ST1 (26.3%), ST123 (18.6%), ST29 (9.6%), and ST3 (7.1%). Interestingly, 34 new STs were identified, indicating the emergence of novel genetic lineages. Serotypes 9 (27.6%) and 1 (21.8%) prevailed, followed by serotypes 7 (12.8%) and 2 (12.2%). Analysis of 13 virulence-associated genes showed significant associations between ST, serotype, virulence patterns, and clinical features, evidencing particular virulence traits associated with genetic clusters. The pangenome was generated, and the core genome was distributed in 7 Bayesian groups where each group included a variable set of over- and under-represented genes of different categories. The study provides comprehensive data and knowledge to improve the design of new vaccines, antimicrobial treatments, and bacterial typing approaches.}, }
@article {pmid38265421, year = {2024}, author = {Kothe, CI and Monnet, C and Irlinger, F and Virsolvy, M and Frühling, A and Neumann-Schaal, M and Wolf, J and Renault, P}, title = {Halomonas citrativorans sp. nov., Halomonas casei sp. nov. and Halomonas colorata sp. nov., isolated from French cheese rinds.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {74}, number = {1}, pages = {}, doi = {10.1099/ijsem.0.006234}, pmid = {38265421}, issn = {1466-5034}, abstract = {Eight Gram-stain-negative bacterial strains were isolated from cheese rinds sampled in France. On the basis of 16S rRNA gene sequence analysis, all isolates were assigned to the genus Halomonas. Phylogenetic investigations, including 16S rRNA gene studies, multilocus sequence analysis, reconstruction of a pan-genome phylogenetic tree with the concatenated core-genome content and average nucleotide identity (ANI) calculations, revealed that they constituted three novel and well-supported clusters. The closest relative species, determined using the whole-genome sequences of the strains, were Halomonas zhanjiangensis for two groups of cheese strains, sharing 82.4 and 93.1 % ANI, and another cluster sharing 92.2 % ANI with the Halomonas profundi type strain. The strains isolated herein differed from the previously described species by ANI values <95 % and several biochemical, enzymatic and colony characteristics. The results of phenotypic, phylogenetic and chemotaxonomic analyses indicated that the isolates belonged to three novel Halomonas species, for which the names Halomonas citrativorans sp. nov., Halomonas casei sp. nov. and Halomonas colorata sp. nov. are proposed, with isolates FME63[T] (=DSM 113315[T]=CIRM-BIA2430[T]=CIP 111880[T]=LMG 32013[T]), FME64[T] (=DSM 113316[T]=CIRM-BIA2431[T]=CIP 111877[T]=LMG 32015[T]) and FME66[T] (=DSM 113318[T]=CIRM-BIA2433[T]=CIP 111876[T]=LMG 32014[T]) as type strains, respectively.}, }
@article {pmid38261993, year = {2024}, author = {Teyssonniere, EM and Shichino, Y and Mito, M and Friedrich, A and Iwasaki, S and Schacherer, J}, title = {Translation variation across genetic backgrounds reveals a post-transcriptional buffering signature in yeast.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkae030}, pmid = {38261993}, issn = {1362-4962}, support = {772505/ERC_/European Research Council/International ; S10 OD018174/CD/ODCDC CDC HHS/United States ; }, abstract = {Gene expression is known to vary among individuals, and this variability can impact the phenotypic diversity observed in natural populations. While the transcriptome and proteome have been extensively studied, little is known about the translation process itself. Here, we therefore performed ribosome and transcriptomic profiling on a genetically and ecologically diverse set of natural isolates of the Saccharomyces cerevisiae yeast. Interestingly, we found that the Euclidean distances between each profile and the expression fold changes in each pairwise isolate comparison were higher at the transcriptomic level. This observation clearly indicates that the transcriptional variation observed in the different isolates is buffered through a phenomenon known as post-transcriptional buffering at the translation level. Furthermore, this phenomenon seemed to have a specific signature by preferentially affecting essential genes as well as genes involved in complex-forming proteins, and low transcribed genes. We also explored the translation of the S. cerevisiae pangenome and found that the accessory genes related to introgression events displayed similar transcription and translation levels as the core genome. By contrast, genes acquired through horizontal gene transfer events tended to be less efficiently translated. Together, our results highlight both the extent and signature of the post-transcriptional buffering.}, }
@article {pmid38260597, year = {2024}, author = {Villani, F and Guarracino, A and Ward, RR and Green, T and Emms, M and Pravenec, M and Prins, P and Garrison, E and Williams, RW and Chen, H and Colonna, V}, title = {Pangenome reconstruction in rats enhances genotype-phenotype mapping and novel variant discovery.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.01.10.575041}, pmid = {38260597}, abstract = {The HXB/BXH family of recombinant inbred rat strains is a unique genetic resource that has been extensively phenotyped over 25 years, resulting in a vast dataset of quantitative molecular and physiological phenotypes. We built a pangenome graph from 10x Genomics linked-read data for 31 recombinant inbred rats to study genetic variation and association mapping. The pangenome length was on average 2.4 times greater than the corresponding length of the reference mRatBN7.2, confirming the capture of substantial additional variation. We validated variants in challenging regions, including complex structural variants resolving into multiple haplotypes. Phenome-wide association analysis of validated SNPs uncovered variants associated with glucose/insulin levels and hippocampal gene expression. We propose an interaction between Pirl1l1, Cromogranine expression, TNF-α levels, and insulin regulation. This study demonstrates the utility of linked-read pangenomes for comprehensive variant detection and mapping phenotypic diversity in a widely used rat genetic reference panel.}, }
@article {pmid38259089, year = {2024}, author = {Chen, F and Yin, Y and Chen, H and Wang, R and Wang, S and Wang, H}, title = {Global genetic diversity and Asian clades evolution: a phylogeographic study of Staphylococcus aureus sequence type 5.}, journal = {Antimicrobial agents and chemotherapy}, volume = {}, number = {}, pages = {e0117523}, doi = {10.1128/aac.01175-23}, pmid = {38259089}, issn = {1098-6596}, abstract = {Staphylococcus aureus sequence type (ST) 5 has spread worldwide; however, phylogeographic studies on the evolution of global phylogenetic and Asian clades of ST5 are lacking. This study included 368 ST5 genome sequences, including 111 newly generated sequences. Primary phylogenetic analysis suggested that there are five clades, and geographical clustering of ST5 methicillin-resistant S. aureus (MRSA) was linked to the acquisition of S. aureus pathogenicity islands (SaPIs; enterotoxin gene island) and integration of the prophage φSa3. The most recent common ancestor of global S. aureus ST5 dates back to the mid-1940s, coinciding with the clinical introduction of penicillin. Bayesian phylogeographic inference allowed to ancestrally trace the Asian ST5 MRSA clade to Japan, which may have spread to major cities in China and Korea in the 1990s. Based on a pan-genome-wide association study, the emergence of Asian ST5 clades was attributed to the gain of prophages, SaPIs, and plasmids, as well as the coevolution of resistance genes. Clade IV displayed greater genomic diversity than the Asian MRSA clades. Collectively, our study provides in-depth insights into the global evolution of S. aureus ST5 mainly in China and the United States and reveals that different S. aureus ST5 clades have arisen independently in different parts of the world, with limited geographic dispersal across continents.}, }
@article {pmid38257915, year = {2023}, author = {Afordoanyi, DM and Akosah, YA and Shnakhova, L and Saparmyradov, K and Diabankana, RGC and Validov, S}, title = {Biotechnological Key Genes of the Rhodococcus erythropolis MGMM8 Genome: Genes for Bioremediation, Antibiotics, Plant Protection, and Growth Stimulation.}, journal = {Microorganisms}, volume = {12}, number = {1}, pages = {}, doi = {10.3390/microorganisms12010088}, pmid = {38257915}, issn = {2076-2607}, support = {RF-1930.61321X0001/15.IP.21.0020//Ministry of Education 362 and Science of the Russian Federation/ ; }, abstract = {Anthropogenic pollution, including residues from the green revolution initially aimed at addressing food security and healthcare, has paradoxically exacerbated environmental challenges. The transition towards comprehensive green biotechnology and bioremediation, achieved with lower financial investment, hinges on microbial biotechnology, with the Rhodococcus genus emerging as a promising contender. The significance of fully annotating genome sequences lies in comprehending strain constituents, devising experimental protocols, and strategically deploying these strains to address pertinent issues using pivotal genes. This study revolves around Rhodococcus erythropolis MGMM8, an associate of winter wheat plants in the rhizosphere. Through the annotation of its chromosomal genome and subsequent comparison with other strains, its potential applications were explored. Using the antiSMASH server, 19 gene clusters were predicted, encompassing genes responsible for antibiotics and siderophores. Antibiotic resistance evaluation via the Comprehensive Antibiotic Resistance Database (CARD) identified five genes (vanW, vanY, RbpA, iri, and folC) that were parallel to strain CCM2595. Leveraging the NCBI Prokaryotic Genome Annotation Pipeline (PGAP) for biodegradation, heavy metal resistance, and remediation genes, the presence of chlorimuron-ethyl, formaldehyde, benzene-desulfurization degradation genes, and heavy metal-related genes (ACR3, arsC, corA, DsbA, modA, and recG) in MGMM8 was confirmed. Furthermore, quorum-quenching signal genes, critical for curbing biofilm formation and virulence elicited by quorum-sensing in pathogens, were also discerned within MGMM8's genome. In light of these predictions, the novel isolate MGMM8 warrants phenotypic assessment to gauge its potential in biocontrol and bioremediation. This evaluation extends to isolating active compounds for potential antimicrobial activities against pathogenic microorganisms. The comprehensive genome annotation process has facilitated the genetic characterization of MGMM8 and has solidified its potential as a biotechnological strain to address global anthropogenic predicaments.}, }
@article {pmid38257891, year = {2023}, author = {Godoy, M and Montes de Oca, M and Suarez, R and Martinez, A and Pontigo, JP and Caro, D and Kusch, K and Coca, Y and Bohle, H and Bayliss, S and Kibenge, M and Kibenge, F}, title = {Genomics of Re-Emergent Aeromonas salmonicida in Atlantic Salmon Outbreaks.}, journal = {Microorganisms}, volume = {12}, number = {1}, pages = {}, doi = {10.3390/microorganisms12010064}, pmid = {38257891}, issn = {2076-2607}, abstract = {Furunculosis, caused by Aeromonas salmonicida, poses a significant threat to both salmonid and non-salmonid fish in diverse aquatic environments. This study explores the genomic intricacies of re-emergent A. salmonicida outbreaks in Atlantic salmon (Salmo salar). Previous clinical cases have exhibited pathological characteristics, such as periorbital hemorrhages and gastrointestinal abnormalities. Genomic sequencing of three Chilean isolates (ASA04, ASA05, and CIBA_5017) and 25 previously described genomes determined the pan-genome, phylogenomics, insertion sequences, and restriction-modification systems. Unique gene families have contributed to an improved understanding of the psychrophilic and mesophilic clades, while phylogenomic analysis has been used to identify mesophilic and psychrophilic strains, thereby further differentiating between typical and atypical psychrophilic isolates. Diverse insertion sequences and restriction-modification patterns have highlighted genomic structural differences, and virulence factor predictions can emphasize exotoxin disparities, especially between psychrophilic and mesophilic strains. Thus, a novel plasmid was characterized which emphasized the role of plasmids in virulence and antibiotic resistance. The analysis of antibiotic resistance factors revealed resistance against various drug classes in Chilean strains. Overall, this study elucidates the genomic dynamics of re-emergent A. salmonicida and provides novel insights into their virulence, antibiotic resistance, and population structure.}, }
@article {pmid38254124, year = {2024}, author = {Fan, J and Khan, J and Singh, NP and Pibiri, GE and Patro, R}, title = {Fulgor: a fast and compact k-mer index for large-scale matching and color queries.}, journal = {Algorithms for molecular biology : AMB}, volume = {19}, number = {1}, pages = {3}, pmid = {38254124}, issn = {1748-7188}, support = {R01HG009937/NH/NIH HHS/United States ; }, abstract = {The problem of sequence identification or matching-determining the subset of reference sequences from a given collection that are likely to contain a short, queried nucleotide sequence-is relevant for many important tasks in Computational Biology, such as metagenomics and pangenome analysis. Due to the complex nature of such analyses and the large scale of the reference collections a resource-efficient solution to this problem is of utmost importance. This poses the threefold challenge of representing the reference collection with a data structure that is efficient to query, has light memory usage, and scales well to large collections. To solve this problem, we describe an efficient colored de Bruijn graph index, arising as the combination of a k-mer dictionary with a compressed inverted index. The proposed index takes full advantage of the fact that unitigs in the colored compacted de Bruijn graph are monochromatic (i.e., all k-mers in a unitig have the same set of references of origin, or color). Specifically, the unitigs are kept in the dictionary in color order, thereby allowing for the encoding of the map from k-mers to their colors in as little as 1 + o(1) bits per unitig. Hence, one color per unitig is stored in the index with almost no space/time overhead. By combining this property with simple but effective compression methods for integer lists, the index achieves very small space. We implement these methods in a tool called Fulgor, and conduct an extensive experimental analysis to demonstrate the improvement of our tool over previous solutions. For example, compared to Themisto-the strongest competitor in terms of index space vs. query time trade-off-Fulgor requires significantly less space (up to 43% less space for a collection of 150,000 Salmonella enterica genomes), is at least twice as fast for color queries, and is 2-6[Formula: see text] faster to construct.}, }
@article {pmid38253726, year = {2024}, author = {Jeong, J and Ahn, S and Truong, TC and Kim, JH and Weerawongwiwat, V and Lee, JS and Yoon, JH and Sukhoom, A and Kim, W}, title = {Description of Mycolicibacterium arenosum sp. nov. Isolated from Coastal Sand on the Yellow Sea Coast.}, journal = {Current microbiology}, volume = {81}, number = {3}, pages = {73}, pmid = {38253726}, issn = {1432-0991}, support = {NIBR202102205//National Institute of Biological Resources/ ; NRF-2021R1C1C2003223//National Research Foundation of Korea/ ; 2017//Chung-Ang University/ ; }, abstract = {A Gram-staining-positive, aerobic, non-spore-forming bacterium was isolated from coastal sand samples from Incheon in the Republic of Korea and designated as strain CAU 1645[T]. The optimum conditions for growth were observed at 30 °C in growth media containing 1% (w/v) NaCl at pH 9.0. The predominant respiratory quinone was MK-9 and the major fatty acids were C16:0, C17:1 w7c, and summed feature 7. Similarly, the 16S rRNA gene sequence exhibited the highest similarity with Mycolicibacterium bacteremicum DSM 45578[T] and Mycolicibacterium neoaurum JCM 6365[T], both of which exhibited similarity rates of 97.2%. The genomic DNA G+C content was 68.2%. The whole genome of strain CAU 1645[T] was obtained and annotated with annotation using RAST server. The pan-genome analysis was determined using Prokka, Roary, and Phandango. In the pan-genome analysis, the strain CAU 1645[T] shared 40 core genes with closely related Mycolicibacterium species, including the AcpM gene, the meromycolate extension acyl carrier protein involved in forming impermeable cell walls in mycobacteria. Therefore, our findings demonstrated that the isolate represents a novel species of the genus Mycolicibacterium, for which we propose the name Mycolicibacterium arenosum sp. nov. The type strain is CAU 1645[T] (= KCTC 49724[T] = MCCC 1K07087[T]).}, }
@article {pmid38253606, year = {2024}, author = {Groza, C and Schwendinger-Schreck, C and Cheung, WA and Farrow, EG and Thiffault, I and Lake, J and Rizzo, WB and Evrony, G and Curran, T and Bourque, G and Pastinen, T}, title = {Pangenome graphs improve the analysis of structural variants in rare genetic diseases.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {657}, pmid = {38253606}, issn = {2041-1723}, abstract = {Rare DNA alterations that cause heritable diseases are only partially resolvable by clinical next-generation sequencing due to the difficulty of detecting structural variation (SV) in all genomic contexts. Long-read, high fidelity genome sequencing (HiFi-GS) detects SVs with increased sensitivity and enables assembling personal and graph genomes. We leverage standard reference genomes, public assemblies (n = 94) and a large collection of HiFi-GS data from a rare disease program (Genomic Answers for Kids, GA4K, n = 574 assemblies) to build a graph genome representing a unified SV callset in GA4K, identify common variation and prioritize SVs that are more likely to cause genetic disease (MAF < 0.01). Using graphs, we obtain a higher level of reproducibility than the standard reference approach. We observe over 200,000 SV alleles unique to GA4K, including nearly 1000 rare variants that impact coding sequence. With improved specificity for rare SVs, we isolate 30 candidate SVs in phenotypically prioritized genes, including known disease SVs. We isolate a novel diagnostic SV in KMT2E, demonstrating use of personal assemblies coupled with pangenome graphs for rare disease genomics. The community may interrogate our pangenome with additional assemblies to discover new SVs within the allele frequency spectrum relevant to genetic diseases.}, }
@article {pmid38249481, year = {2023}, author = {Deng, Y and Jiang, ZM and Han, XF and Su, J and Yu, LY and Liu, WH and Zhang, YQ}, title = {Corrigendum: Pangenome analysis of the genus Herbiconiux and proposal of four new species associated with Chinese medicinal plants.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1295710}, doi = {10.3389/fmicb.2023.1295710}, pmid = {38249481}, issn = {1664-302X}, abstract = {[This corrects the article DOI: 10.3389/fmicb.2023.1119226.].}, }
@article {pmid38246550, year = {2024}, author = {Song, Z and Ge, Y and Yu, X and Liu, R and Liu, C and Cheng, K and Guo, L and Yao, S}, title = {Development of a SNP-based strain-identified method for Streptococcus thermophilus CICC 6038 and Lactobacillus delbrueckii ssp. bulgaricus CICC 6047 using pan-genomics analysis.}, journal = {Journal of dairy science}, volume = {}, number = {}, pages = {}, doi = {10.3168/jds.2023-23655}, pmid = {38246550}, issn = {1525-3198}, abstract = {The health benefits conferred by probiotics is specific to individual probiotic strains, highlighting the importance of identifying specific strains for research and production purposes. Streptococcus thermophilus CICC 6038 and Lactobacillus delbrueckii ssp. bulgaricus CICC 6047 are exceedingly valuable for commercial use with an excellent mixed-culture fermentation. To differentiate these 2 strains from other S. thermophilus and L. delbrueckii ssp. bulgaricus, a specific, sensitive, accurate, rapid, convenient, and cost-effective method is required. In this study, we conducted a pan-genome analysis of S. thermophilus and L. delbrueckii ssp. bulgaricus to identify species-specific core genes, along with strain-specific single-nucleotide polymorphisms (SNPs). These genes were used to develop suitable PCR primers, and the conformity of sequence length and unique SNPs was confirmed by sequencing for qualitative identification at the strain level. The results demonstrated that SNPs analysis of PCR products derived from these primers could distinguish CICC 6038 and CICC 6047 accurately and reproducibly from the other strains of S. thermophilus and L. delbrueckii ssp. bulgaricus, respectively. The strain-specific PCR method based on SNPs herein is universally applicable for probiotics identification. It offers valuable insights into identifying probiotics at the strain level that is fit-for-purpose in quality control and compliance assessment of commercial dairy products.}, }
@article {pmid38238664, year = {2024}, author = {Peng, M and Lin, W and Zhou, A and Jiang, Z and Zhou, F and Wang, Z}, title = {High genetic diversity and different type VI secretion systems in Enterobacter species revealed by comparative genomics analysis.}, journal = {BMC microbiology}, volume = {24}, number = {1}, pages = {26}, pmid = {38238664}, issn = {1471-2180}, support = {32200094//National Natural Science Foundation of China/ ; PT012201//Hubei Key Laboratory of Biological Resources Protection and Utilization (Hubei Minzu University)/ ; 2022CFB674//Natural Science Foundation of Hubei Province/ ; }, abstract = {The human-pathogenic Enterobacter species are widely distributed in diverse environmental conditions, however, the understanding of the virulence factors and genetic variations within the genus is very limited. In this study, we performed comparative genomics analysis of 49 strains originated from diverse niches and belonged to eight Enterobacter species, in order to further understand the mechanism of adaption to the environment in Enterobacter. The results showed that they had an open pan-genome and high genomic diversity which allowed adaptation to distinctive ecological niches. We found the number of secretion systems was the highest among various virulence factors in these Enterobacter strains. Three types of T6SS gene clusters including T6SS-A, T6SS-B and T6SS-C were detected in most Enterobacter strains. T6SS-A and T6SS-B shared 13 specific core genes, but they had different gene structures, suggesting they probably have different biological functions. Notably, T6SS-C was restricted to E. cancerogenus. We detected a T6SS gene cluster, highly similar to T6SS-C (91.2%), in the remote related Citrobacter rodenitum, suggesting that this unique gene cluster was probably acquired by horizontal gene transfer. The genomes of Enterobacter strains possess high genetic diversity, limited number of conserved core genes, and multiple copies of T6SS gene clusters with differentiated structures, suggesting that the origins of T6SS were not by duplication instead by independent acquisition. These findings provide valuable information for better understanding of the functional features of Enterobacter species and their evolutionary relationships.}, }
@article {pmid38230932, year = {2024}, author = {Silva-Pereira, TT and Soler-Camargo, NC and Guimarães, AMS}, title = {Diversification of gene content in the Mycobacterium tuberculosis complex is determined by phylogenetic and ecological signatures.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0228923}, doi = {10.1128/spectrum.02289-23}, pmid = {38230932}, issn = {2165-0497}, abstract = {In this study, we analyzed the gene content of different ecotypes of the Mycobacterium tuberculosis complex (MTBC), the pathogens of tuberculosis. We found that changes in their gene content are associated with their ecological features, such as host preference. Gene loss was identified as the primary driver of these changes, which can vary even among different strains of the same ecotype. Our study also revealed that the gene content relatedness of these bacteria does not always mirror their evolutionary relationships. In addition, some genes of virulence can be variably lost among strains of the same MTBC ecotype, likely helping them to evade the immune system. Overall, our study highlights the importance of understanding how gene loss can lead to new adaptations in these bacteria and how different selective pressures may influence their genetic makeup.}, }
@article {pmid38229335, year = {2024}, author = {Venkatachalam, S and Jabir, T and Vipindas, PV and Krishnan, KP}, title = {Ecological significance of Candidatus ARS69 and Gemmatimonadota in the Arctic glacier foreland ecosystems.}, journal = {Applied microbiology and biotechnology}, volume = {108}, number = {1}, pages = {128}, pmid = {38229335}, issn = {1432-0614}, abstract = {The Gemmatimonadota phylum has been widely detected in diverse natural environments, yet their specific ecological roles in many habitats remain poorly investigated. Similarly, the Candidatus ARS69 phylum has been identified only in a few habitats, and literature on their metabolic functions is relatively scarce. In the present study, we investigated the ecological significance of phyla Ca. ARS69 and Gemmatimonadota in the Arctic glacier foreland (GF) ecosystems through genome-resolved metagenomics. We have reconstructed the first high-quality metagenome-assembled genome (MAG) belonging to Ca. ARS69 and 12 other MAGs belonging to phylum Gemmatimonadota from the three different Arctic GF samples. We further elucidated these two groups phylogenetic lineage and their metabolic function through phylogenomic and pangenomic analysis. The analysis showed that all the reconstructed MAGs potentially belonged to novel species. The MAGs belonged to Ca. ARS69 consist about 8296 gene clusters, of which only about 8% of single-copy core genes (n = 980) were shared among them. The study also revealed the potential ecological role of Ca. ARS69 is associated with carbon fixation, denitrification, sulfite oxidation, and reduction biochemical processes in the GF ecosystems. Similarly, the study demonstrates the widespread distribution of different classes of Gemmatimonadota across wide ranges of ecosystems and their metabolic functions, including in the polar region. KEY POINTS: • Glacier foreland ecosystems act as a natural laboratory to study microbial community structure. • We have reconstructed 13 metagenome-assembled genomes from the soil samples. • All the reconstructed MAGs belonged to novel species with different metabolic processes. • Ca. ARS69 and Gemmatimonadota MAGs were found to participate in carbon fixation and denitrification processes.}, }
@article {pmid38225047, year = {2024}, author = {Han, DM and Baek, JH and Choi, DG and Jeon, MS and Eyun, SI and Jeon, CO}, title = {Comparative pangenome analysis of Aspergillus flavus and Aspergillus oryzae reveals their phylogenetic, genomic, and metabolic homogeneity.}, journal = {Food microbiology}, volume = {119}, number = {}, pages = {104435}, doi = {10.1016/j.fm.2023.104435}, pmid = {38225047}, issn = {1095-9998}, abstract = {Aspergillus flavus and Aspergillus oryzae are closely related fungal species with contrasting roles in food safety and fermentation. To comprehensively investigate their phylogenetic, genomic, and metabolic characteristics, we conducted an extensive comparative pangenome analysis using complete, dereplicated genome sets for both species. Phylogenetic analyses, employing both the entirety of the identified single-copy orthologous genes and six housekeeping genes commonly used for fungal classification, did not reveal clear differentiation between A. flavus and A. oryzae genomes. Upon analyzing the aflatoxin biosynthesis gene clusters within the genomes, we observed that non-aflatoxin-producing strains were dispersed throughout the phylogenetic tree, encompassing both A. flavus and A. oryzae strains. This suggests that aflatoxin production is not a distinguishing trait between the two species. Furthermore, A. oryzae and A. flavus strains displayed remarkably similar genomic attributes, including genome sizes, gene contents, and G + C contents, as well as metabolic features and pathways. The profiles of CAZyme genes and secondary metabolite biosynthesis gene clusters within the genomes of both species further highlight their similarity. Collectively, these findings challenge the conventional differentiation of A. flavus and A. oryzae as distinct species and highlight their phylogenetic, genomic, and metabolic homogeneity, potentially indicating that they may indeed belong to the same species.}, }
@article {pmid38224489, year = {2024}, author = {Wendisch, VF and Brito, LF and Passaglia, LMP}, title = {Genome-based analyses to learn from and about Paenibacillus sonchi genomovar Riograndensis SBR5T.}, journal = {Genetics and molecular biology}, volume = {46}, number = {3 Suppl 1}, pages = {e20230115}, doi = {10.1590/1678-4685-GMB-2023-0115}, pmid = {38224489}, issn = {1415-4757}, abstract = {Paenibacillus sonchi genomovar Riograndensis SBR5T is a plant growth-promoting rhizobacterium (PGPR) isolated in the Brazilian state of Rio Grande do Sul from the rhizosphere of Triticum aestivum. It fixes nitrogen, produces siderophores as well as the phytohormone indole-3-acetic acid, solubilizes phosphate and displays antagonist activity against Listeria monocytogenes and Pectobacterium carotovorum. Comprehensive omics analysis and the development of genetic tools are key to characterizing and engineering such non-model microorganisms. Therefore, the complete genome of SBR5T was sequenced, and shown to encode 6,705 proteins, 87 tRNAs, and 27 rRNAs and it enabled a landscape transcriptome analysis that unveiled conserved transcriptional and translational patterns and characterized operon structures and riboswitches. The pangenome of P. sonchi species is open with a stable core pangenome. At the same time, the analysis of genes coding for nitrogenases revealed that the trait of nitrogen fixation is sparse within the Paenibacillaceae family and the presence of Fe-only nitrogenase in the P. sonchi group was exclusive to SBR5T. The development of genetic tools for SBR5T enabled genetic transformation, plasmid construction for constitutive and inducible gene expression, and gene repression using the CRISPRi system. Altogether, the work with P. sonchi can guide the study of non-model bacteria with economic potential.}, }
@article {pmid38217963, year = {2024}, author = {Monterrubio-López, GP and Llamas-Monroy, JL and Martínez-Gómez, ÁA and Delgadillo-Gutiérrez, K}, title = {Novel vaccine candidates of Bordetella pertussis identified by reverse vaccinology.}, journal = {Biologicals : journal of the International Association of Biological Standardization}, volume = {85}, number = {}, pages = {101740}, doi = {10.1016/j.biologicals.2023.101740}, pmid = {38217963}, issn = {1095-8320}, abstract = {Whooping cough is a disease caused by Bordetella pertussis, whose morbidity has increased, motivating the improvement of current vaccines. Reverse vaccinology is a strategy that helps identify proteins with good characteristics fast and with fewer resources. In this work, we applied reverse vaccinology to study the B. pertussis proteome and pangenome with several in-silico tools. We analyzed the B. pertussis Tohama I proteome with NERVE software and compared 234 proteins with B. parapertussis, B. bronchiseptica, and B. holmessi. VaxiJen was used to calculate an antigenicity value; our threshold was 0.6, selecting 84 proteins. The candidates were depurated and grouped in eight family proteins to select representative candidates, according to bibliographic information and their immunological response predicted with ABCpred, Bcepred, IgPred, and C-ImmSim. Additionally, a pangenome study was conducted with 603 B. pertussis strains and PanRV software, identifying 3421 core proteins that were analyzed to select the best candidates. Finally, we selected 15 proteins from the proteome study and seven proteins from the pangenome analysis as good vaccine candidates.}, }
@article {pmid38216873, year = {2024}, author = {Yang, Z and Yang, X and Wang, M and Jia, R and Chen, S and Liu, M and Zhao, X and Yang, Q and Wu, Y and Zhang, S and Huang, J and Ou, X and Mao, S and Gao, Q and Sun, D and Tian, B and Zhu, D and Cheng, A}, title = {Genome-wide association study reveals serovar-associated genetic loci in Riemerella anatipestifer.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {57}, pmid = {38216873}, issn = {1471-2164}, abstract = {BACKGROUND: The disease caused by Riemerella anatipestifer (R. anatipestifer, RA) results in large economic losses to the global duck industry every year. Serovar-related genomic variation, such as the O-antigen and capsular polysaccharide (CPS) gene clusters, has been widely used for serotyping in many gram-negative bacteria. RA has been classified into at least 21 serovars based on slide agglutination, but the molecular basis of serotyping is unknown. In this study, we performed a pan-genome-wide association study (Pan-GWAS) to identify the genetic loci associated with RA serovars.
RESULTS: The results revealed a significant association between the putative CPS synthesis gene locus and the serological phenotype. Further characterization of the CPS gene clusters in 11 representative serovar strains indicated that they were highly diverse and serovar-specific. The CPS gene cluster contained the key genes wzx and wzy, which are involved in the Wzx/Wzy-dependent pathway of CPS synthesis. Similar CPS loci have been found in some other species within the family Weeksellaceae. We have also shown that deletion of the wzy gene in RA results in capsular defects and cross-agglutination.
CONCLUSIONS: This study indicates that the CPS synthesis gene cluster of R. anatipestifer is a serotype-specific genetic locus. Importantly, our finding provides a new perspective for the systematic analysis of the genetic basis of the R anatipestifer serovars and a potential target for establishing a complete molecular serotyping scheme.}, }
@article {pmid38216606, year = {2024}, author = {Schreiber, M and Wonneberger, R and Haaning, AM and Coulter, M and Russell, J and Himmelbach, A and Fiebig, A and Muehlbauer, GJ and Stein, N and Waugh, R}, title = {Genomic resources for a historical collection of cultivated two-row European spring barley genotypes.}, journal = {Scientific data}, volume = {11}, number = {1}, pages = {66}, pmid = {38216606}, issn = {2052-4463}, support = {BB/S004610/1//RCUK | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; BB/S004610/1//RCUK | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; BB/S004610/1//RCUK | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; BB/S004610/1//RCUK | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; MU 3589/1-1//Deutsche Forschungsgemeinschaft (German Research Foundation)/ ; MU 3589/1-1//Deutsche Forschungsgemeinschaft (German Research Foundation)/ ; 1844331//National Science Foundation (NSF)/ ; 1844331//National Science Foundation (NSF)/ ; }, abstract = {Barley genomic resources are increasing rapidly, with the publication of a barley pangenome as one of the latest developments. Two-row spring barley cultivars are intensely studied as they are the source of high-quality grain for malting and distilling. Here we provide data from a European two-row spring barley population containing 209 different genotypes registered for the UK market between 1830 to 2014. The dataset encompasses RNA-sequencing data from six different tissues across a range of barley developmental stages, phenotypic datasets from two consecutive years of field-grown trials in the United Kingdom, Germany and the USA; and whole genome shotgun sequencing from all cultivars, which was used to complement the RNA-sequencing data for variant calling. The outcomes are a filtered SNP marker file, a phenotypic database and a large gene expression dataset providing a comprehensive resource which allows for downstream analyses like genome wide association studies or expression associations.}, }
@article {pmid38214698, year = {2024}, author = {Park, S and Kim, I and Chhetri, G and Jung, Y and Woo, H and Seo, T}, title = {Cellulomonas alba sp. nov. and Cellulomonas edaphi sp. nov., isolated from wetland soils.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {74}, number = {1}, pages = {}, doi = {10.1099/ijsem.0.006235}, pmid = {38214698}, issn = {1466-5034}, abstract = {Two novel strains were isolated from wetland soils in Goyang, Republic of Korea. The two Gram-stain-positive, facultatively anaerobic, rod-shaped bacterial-type strains were designated MW4[T] and MW9[T]. Phylogenomic analysis based on whole-genome sequences suggested that both strains belonged to the genus Cellulomonas. The cells of strain MW4[T] were non-motile and grew at 20-40 °C (optimum, 35 °C), at pH 6.0-10.0 (optimum, pH 8.0) and in the presence of 0-1.0% NaCl (optimum, 0 %). The cells of strain MW9[T] were non-motile and grew at 20-40 °C (optimum, 35 °C), at pH 5.0-9.0 (optimum, pH 8.0) and in the presence of 0-1.0% NaCl (optimum, 0 %). The average nucleotide identity (77.1-88.1 %) and digital DNA-DNA hybridization values (21.0-34.8 %) between the two novel strains and with their closely related strains fell within the range for the genus Cellulomonas. The novel strains MW4[T] and MW9[T] and reference strains possessed alkane synthesis gene clusters (oleA, oleB, oleC and oleD). Phylogenomic, phylogenetic, average nucleotide identity, digital DNA-DNA hybridization, physiological and biochemical data indicated that the novel strains were distinct from other members of the family Cellulomonadaceae. We propose the names Cellulomonas alba sp. nov. (type strain MW4[T]=KACC 23260[T]=TBRC 17645[T]) and Cellulomons edaphi sp. nov. (type strain MW9[T]=KACC 23261[T]=TBRC 17646[T]) for the two strains.}, }
@article {pmid38213027, year = {2024}, author = {Ferrero-Serrano, Á and Chakravorty, D and Kirven, KJ and Assmann, SM}, title = {Oryza CLIMtools: A Genome-Environment Association Resource Reveals Adaptive Roles for Heterotrimeric G Proteins in the Regulation of Rice Agronomic Traits.}, journal = {Plant communications}, volume = {}, number = {}, pages = {100813}, doi = {10.1016/j.xplc.2024.100813}, pmid = {38213027}, issn = {2590-3462}, abstract = {Modern crop varieties display a degree of mismatch between their current distributions and the suitability of the local climate for their productivity. To this end, we present Oryza CLIMtools (https://gramene.org/CLIMtools/oryza_v1.0/), the first resource for pan-genome prediction of climate-associated genetic variants in a crop species. Oryza CLIMtools consists of interactive web-based databases that allow the user to: i) explore the local environments of traditional rice varieties (landraces) in South-Eastern Asia, and; ii) investigate the environment by genome associations for 658 Indica and 283 Japonica rice landrace accessions collected from georeferenced local environments and included in the 3K Rice Genomes Project. We exemplify the value of these resources, identifying an interplay between flowering time and temperature in the local environment that is facilitated by adaptive natural variation in OsHD2 and disrupted by a natural variant in OsSOC1. Prior QTL analysis has suggested the importance of heterotrimeric G proteins in the control of agronomic traits. Accordingly, we analyzed the climate associations of natural variants in the different heterotrimeric G protein subunits. We identified a coordinated role of G proteins in adaptation to the prevailing Potential Evapotranspiration gradient and their regulation of key agronomic traits including plant height and seed and panicle length. We conclude by highlighting the prospect of targeting heterotrimeric G proteins to produce crops that are climate resilient.}, }
@article {pmid38203838, year = {2024}, author = {Bin Hafeez, A and Pełka, K and Worobo, R and Szweda, P}, title = {In Silico Safety Assessment of Bacillus Isolated from Polish Bee Pollen and Bee Bread as Novel Probiotic Candidates.}, journal = {International journal of molecular sciences}, volume = {25}, number = {1}, pages = {}, doi = {10.3390/ijms25010666}, pmid = {38203838}, issn = {1422-0067}, abstract = {Bacillus species isolated from Polish bee pollen (BP) and bee bread (BB) were characterized for in silico probiotic and safety attributes. A probiogenomics approach was used, and in-depth genomic analysis was performed using a wide array of bioinformatics tools to investigate the presence of virulence and antibiotic resistance properties, mobile genetic elements, and secondary metabolites. Functional annotation and Carbohydrate-Active enZYmes (CAZYme) profiling revealed the presence of genes and a repertoire of probiotics properties promoting enzymes. The isolates BB10.1, BP20.15 (isolated from bee bread), and PY2.3 (isolated from bee pollen) genome mining revealed the presence of several genes encoding acid, heat, cold, and other stress tolerance mechanisms, adhesion proteins required to survive and colonize harsh gastrointestinal environments, enzymes involved in the metabolism of dietary molecules, antioxidant activity, and genes associated with the synthesis of vitamins. In addition, genes responsible for the production of biogenic amines (BAs) and D-/L-lactate, hemolytic activity, and other toxic compounds were also analyzed. Pan-genome analyses were performed with 180 Bacillus subtilis and 204 Bacillus velezensis genomes to mine for any novel genes present in the genomes of our isolates. Moreover, all three isolates also consisted of gene clusters encoding secondary metabolites.}, }
@article {pmid38203357, year = {2023}, author = {Liu, K and Xu, H and Gao, X and Lu, Y and Wang, L and Ren, Z and Chen, C}, title = {Pan-Genome Analysis of TIFY Gene Family and Functional Analysis of CsTIFY Genes in Cucumber.}, journal = {International journal of molecular sciences}, volume = {25}, number = {1}, pages = {}, doi = {10.3390/ijms25010185}, pmid = {38203357}, issn = {1422-0067}, support = {32372703//the National Natural Science Foundation of China/ ; 32172605//the National Natural Science Foundation of China/ ; ZR2022MC084//the Shandong Natural Science Foundation/ ; }, abstract = {Cucumbers are frequently affected by gray mold pathogen Botrytis cinerea, a pathogen that causes inhibited growth and reduced yield. Jasmonic acid (JA) plays a primary role in plant responses to biotic stresses, and the jasmonate-ZIM-Domain (JAZ) proteins are key regulators of the JA signaling pathway. In this study, we used the pan-genome of twelve cucumber varieties to identify cucumber TIFY genes. Our findings revealed that two CsTIFY genes were present in all twelve cucumber varieties and showed no differences in protein sequence, gene structure, and motif composition. This suggests their evolutionary conservation across different cucumber varieties and implies that they may play a crucial role in cucumber growth. On the other hand, the other fourteen CsTIFY genes exhibited variations in protein sequence and gene structure or conserved motifs, which could be the result of divergent evolution, as these genes adapt to different cultivation and environmental conditions. Analysis of the expression profiles of the CsTIFY genes showed differential regulation by B. cinerea. Transient transfection plants overexpressing CsJAZ2, CsJAZ6, or CsZML2 were found to be more susceptible to B. cinerea infection compared to control plants. Furthermore, these plants infected by the pathogen showed lower levels of the enzymatic activities of POD, SOD and CAT. Importantly, after B. cinerea infection, the content of JA was upregulated in the plants, and cucumber cotyledons pretreated with exogenous MeJA displayed increased resistance to B. cinerea infection compared to those pretreated with water. Therefore, this study explored key TIFY genes in the regulation of cucumber growth and adaptability to different cultivation environments based on bioinformatics analysis and demonstrated that CsJAZs negatively regulate cucumber disease resistance to gray mold via multiple signaling pathways.}, }
@article {pmid38200255, year = {2024}, author = {Sosinsky, A and Ambrose, J and Cross, W and Turnbull, C and Henderson, S and Jones, L and Hamblin, A and Arumugam, P and Chan, G and Chubb, D and Noyvert, B and Mitchell, J and Walker, S and Bowman, K and Pasko, D and Buongermino Pereira, M and Volkova, N and Rueda-Martin, A and Perez-Gil, D and Lopez, J and Pullinger, J and Siddiq, A and Zainy, T and Choudhury, T and Yavorska, O and Fowler, T and Bentley, D and Kingsley, C and Hing, S and Deans, Z and Rendon, A and Hill, S and Caulfield, M and Murugaesu, N}, title = {Insights for precision oncology from the integration of genomic and clinical data of 13,880 tumors from the 100,000 Genomes Cancer Programme.}, journal = {Nature medicine}, volume = {}, number = {}, pages = {}, pmid = {38200255}, issn = {1546-170X}, support = {C1298/A8362//Cancer Research UK (CRUK)/ ; C17422/A25154.//Cancer Research UK (CRUK)/ ; Barts Biomedical Research Centre//DH | National Institute for Health Research (NIHR)/ ; }, abstract = {The Cancer Programme of the 100,000 Genomes Project was an initiative to provide whole-genome sequencing (WGS) for patients with cancer, evaluating opportunities for precision cancer care within the UK National Healthcare System (NHS). Genomics England, alongside NHS England, analyzed WGS data from 13,880 solid tumors spanning 33 cancer types, integrating genomic data with real-world treatment and outcome data, within a secure Research Environment. Incidence of somatic mutations in genes recommended for standard-of-care testing varied across cancer types. For instance, in glioblastoma multiforme, small variants were present in 94% of cases and copy number aberrations in at least one gene in 58% of cases, while sarcoma demonstrated the highest occurrence of actionable structural variants (13%). Homologous recombination deficiency was identified in 40% of high-grade serous ovarian cancer cases with 30% linked to pathogenic germline variants, highlighting the value of combined somatic and germline analysis. The linkage of WGS and longitudinal life course clinical data allowed the assessment of treatment outcomes for patients stratified according to pangenomic markers. Our findings demonstrate the utility of linking genomic and real-world clinical data to enable survival analysis to identify cancer genes that affect prognosis and advance our understanding of how cancer genomics impacts patient outcomes.}, }
@article {pmid38191433, year = {2024}, author = {Zhang, RY and Wang, YR and Liu, RL and Rhee, SK and Zhao, GP and Quan, ZX}, title = {Metagenomic characterization of a novel non-ammonia-oxidizing Thaumarchaeota from hadal sediment.}, journal = {Microbiome}, volume = {12}, number = {1}, pages = {7}, pmid = {38191433}, issn = {2049-2618}, support = {2021R1A2C3004015//National Research Foundation of Korea/ ; 2018YFC0310600//the National Key R&D Program of China/ ; 31870109, 31811540398//the National Natural Science Foundation of China (NSFC)/ ; }, abstract = {BACKGROUND: The hadal sediment, found at an ocean depth of more than 6000 m, is geographically isolated and under extremely high hydrostatic pressure, resulting in a unique ecosystem. Thaumarchaeota are ubiquitous marine microorganisms predominantly present in hadal environments. While there have been several studies on Thaumarchaeota there, most of them have primarily focused on ammonia-oxidizing archaea (AOA). However, systematic metagenomic research specifically targeting heterotrophic non-AOA Thaumarchaeota is lacking.
RESULTS: In this study, we explored the metagenomes of Challenger Deep hadal sediment, focusing on the Thaumarchaeota. Functional analysis of sequence reads revealed the potential contribution of Thaumarchaeota to recalcitrant dissolved organic matter degradation. Metagenome assembly binned one new group of hadal sediment-specific and ubiquitously distributed non-AOA Thaumarchaeota, named Group-3.unk. Pathway reconstruction of this new type of Thaumarchaeota also supports heterotrophic characteristics of Group-3.unk, along with ABC transporters for the uptake of amino acids and carbohydrates and catabolic utilization of these substrates. This new clade of Thaumarchaeota also contains aerobic oxidation of carbon monoxide-related genes. Complete glyoxylate cycle is a distinctive feature of this clade in supplying intermediates of anabolic pathways. The pan-genomic and metabolic analyses of metagenome-assembled genomes belonging to Group-3.unk Thaumarchaeota have highlighted distinctions, including the dihydroxy phthalate decarboxylase gene associated with the degradation of aromatic compounds and the absence of genes related to the synthesis of some types of vitamins compared to AOA. Notably, Group-3.unk shares a common feature with deep ocean AOA, characterized by their high hydrostatic pressure resistance, potentially associated with the presence of V-type ATP and di-myo-inositol phosphate syntheses-related genes. The enrichment of organic matter in hadal sediments might be attributed to the high recruitment of sequence reads of the Group-3.unk clade of heterotrophic Thaumarchaeota in the trench sediment. Evolutionary and genetic dynamic analyses suggest that Group-3 non-AOA consists of mesophilic Thaumarchaeota organisms. These results indicate a potential role in the transition from non-AOA to AOA Thaumarchaeota and from thermophilic to mesophilic Thaumarchaeota, shedding light on recent evolutionary pathways.
CONCLUSIONS: One novel clade of heterotrophic non-AOA Thaumarchaeota was identified through metagenome analysis of sediments from Challenger Deep. Our study provides insight into the ecology and genomic characteristics of the new sub-group of heterotrophic non-AOA Thaumarchaeota, thereby extending the knowledge of the evolution of Thaumarchaeota. Video Abstract.}, }
@article {pmid38189173, year = {2024}, author = {Biderre-Petit, C and Courtine, D and Hennequin, C and Galand, PE and Bertilsson, S and Debroas, D and Monjot, A and Lepère, C and Divne, AM and Hochart, C}, title = {A pan-genomic approach reveals novel Sulfurimonas clade in the ferruginous meromictic Lake Pavin.}, journal = {Molecular ecology resources}, volume = {}, number = {}, pages = {e13923}, doi = {10.1111/1755-0998.13923}, pmid = {38189173}, issn = {1755-0998}, abstract = {The permanently anoxic waters in meromictic lakes create suitable niches for the growth of bacteria using sulphur metabolisms like sulphur oxidation. In Lake Pavin, the anoxic water mass hosts an active cryptic sulphur cycle that interacts narrowly with iron cycling, however the metabolisms of the microorganisms involved are poorly known. Here we combined metagenomics, single-cell genomics, and pan-genomics to further expand our understanding of the bacteria and the corresponding metabolisms involved in sulphur oxidation in this ferruginous sulphide- and sulphate-poor meromictic lake. We highlighted two new species within the genus Sulfurimonas that belong to a novel clade of chemotrophic sulphur oxidisers exclusive to freshwaters. We moreover conclude that this genus holds a key-role not only in limiting sulphide accumulation in the upper part of the anoxic layer but also constraining carbon, phosphate and iron cycling.}, }
@article {pmid38188626, year = {2023}, author = {Karthik, K and Subramanian, S and Vinoli Priyadharshini, M and Jawahar, A and Anbazhagan, S and Kathiravan, RS and Thomas, P and Babu, RPA and Gopalan Tirumurugaan, K and Raj, GD}, title = {Whole genome sequencing and comparative genomics of Mycobacterium orygis isolated from different animal hosts to identify specific diagnostic markers.}, journal = {Frontiers in cellular and infection microbiology}, volume = {13}, number = {}, pages = {1302393}, pmid = {38188626}, issn = {2235-2988}, abstract = {INTRODUCTION: Mycobacterium orygis, a member of MTBC has been identified in higher numbers in the recent years from animals of South Asia. Comparative genomics of this important zoonotic pathogen is not available which can provide data on the molecular difference between other MTBC members. Hence, the present study was carried out to isolate, whole genome sequence M. orygis from different animal species (cattle, buffalo and deer) and to identify molecular marker for the differentiation of M. orygis from other MTBC members.
METHODS: Isolation and whole genome sequencing of M. orygis was carried out for 9 samples (4 cattle, 4 deer and 1 buffalo) died due to tuberculosis. Comparative genomics employing 53 genomes (44 from database and 9 newly sequenced) was performed to identify SNPs, spoligotype, pangenome structure, and region of difference.
RESULTS: M. orygis was isolated from water buffalo and sambar deer which is the first of its kind report worldwide. Comparative pangenomics of all M. orygis strains worldwide (n= 53) showed a closed pangenome structure which is also reported for the first time. Pairwise SNP between TANUVAS_2, TANUVAS_4, TANUVAS_5, TANUVAS_7 and NIRTAH144 was less than 15 indicating that the same M. orygis strain may be the cause for infection. Region of difference prediction showed absence of RD7, RD8, RD9, RD10, RD12, RD301, RD315 in all the M. orygis analyzed. SNPs in virulence gene, PE35 was found to be unique to M. orygis which can be used as marker for identification.
CONCLUSION: The present study is yet another supportive evidence that M. orygis is more prevalent among animals in South Asia and the zoonotic potential of this organism needs to be evaluated.}, }
@article {pmid38187556, year = {2023}, author = {Oles, RE and Terrazas, MC and Loomis, LR and Hsu, CY and Tribelhorn, C and Ferre, PB and Ea, A and Bryant, M and Young, J and Carrow, HC and Sandborn, WJ and Dulai, P and Sivagnanam, M and Pride, D and Knight, R and Chu, H}, title = {Pangenome comparison of Bacteroides fragilis genomospecies unveil genetic diversity and ecological insights.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.12.20.572674}, pmid = {38187556}, abstract = {Bacteroides fragilis is a Gram-negative commensal bacterium commonly found in the human colon that differentiates into two genomospecies termed division I and II. We leverage a comprehensive collection of 694 B. fragilis whole genome sequences and report differential gene abundance to further support the recent proposal that divisions I and II represent separate species. In division I strains, we identify an increased abundance of genes related to complex carbohydrate degradation, colonization, and host niche occupancy, confirming the role of division I strains as gut commensals. In contrast, division II strains display an increased prevalence of plant cell wall degradation genes and exhibit a distinct geographic distribution, primarily originating from Asian countries, suggesting dietary influences. Notably, division II strains have an increased abundance of genes linked to virulence, survival in toxic conditions, and antimicrobial resistance, consistent with a higher incidence of these strains in bloodstream infections. This study provides new evidence supporting a recent proposal for classifying divisions I and II B. fragilis strains as distinct species, and our comparative genomic analysis reveals their niche-specific roles.}, }
@article {pmid38183874, year = {2023}, author = {Yu, K and Huang, Z and Xiao, Y and Gao, H and Bai, X and Wang, D}, title = {Global spread characteristics of CTX-M-type extended-spectrum β-lactamases: A genomic epidemiology analysis.}, journal = {Drug resistance updates : reviews and commentaries in antimicrobial and anticancer chemotherapy}, volume = {73}, number = {}, pages = {101036}, doi = {10.1016/j.drup.2023.101036}, pmid = {38183874}, issn = {1532-2084}, abstract = {BACKGROUND: Extended-spectrum β-lactamases (ESBLs) producing bacteria have spread worldwide and become a global public health concern. Plasmid-mediated transfer of ESBLs is an important route for resistance acquisition.
METHODS: We collected 1345 complete sequences of plasmids containing CTX-Ms from public database. The global transmission pattern of plasmids and evolutionary dynamics of CTX-Ms have been inferred. We applied the pan-genome clustering based on plasmid genomes and evolution analysis to demonstrate the transmission events.
FINDINGS: Totally, 48 CTX-Ms genotypes and 186 incompatible types of plasmids were identified. The geographical distribution of CTX-Ms showed significant differences across countries and continents. CTX-M-14 and CTX-M-55 were found to be the dominant genotypes in Asia, while CTX-M-1 played a leading role in Europe. The plasmids can be divided into 12 lineages, some of which forming distinct geographical clusters in Asia and Europe, while others forming hybrid populations. The Inc types of plasmids are lineage-specific, with the CTX-M-1_IncI1-I (Alpha) and CTX-M-65_IncFII (pHN7A8)/R being the dominant patterns of cross-host and cross-regional transmission. The IncI-I (Alpha) plasmids with the highest number, were presumed to form communication groups in Europe-Asia and Asia-America-Oceania, showing the transmission model as global dissemination and regional microevolution. Meanwhile, the main kinetic elements of blaCTX-Ms showed genotypic preferences. ISEcpl and IS26 were most frequently involved in the transfer of CTX-M-14 and CTX-M-65, respectively. IS15 has become a crucial participant in mediating the dissemination of blaCTX-Ms. Interestingly, blaTEM and blaCTX-Ms often coexisted in the same transposable unit. Furthermore, antibiotic resistance genes associated with aminoglycosides, sulfonamides and cephalosporins showed a relatively high frequency of synergistic effects with CTX-Ms.
CONCLUSIONS: We recognized the dominant blaCTX-Ms and mainstream plasmids of different continents. The results of this study provide support for a more effective response to the risks associated with the evolution of blaCTX-Ms-bearing plasmids, and lay the foundation for genotype-specific epidemiological surveillance of resistance, which are of important public health implications.}, }
@article {pmid38181886, year = {2024}, author = {Verma, N and Sharma, T and Bhardwaj, A and Ramana, VV}, title = {Comparative genomics and characterization of a multidrug-resistant Acinetobacter baumannii VRL-M19 isolated from a crowded setting in India.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {}, number = {}, pages = {105549}, doi = {10.1016/j.meegid.2023.105549}, pmid = {38181886}, issn = {1567-7257}, abstract = {A crowded vegetable market serves as a mass gathering, posing a potential risk for infection transmission. In this study, we isolated a multidrug-resistant Acinetobacter baumannii strain, VRL-M19, from the air of such a market and conducted comparative genomics and phenotypic characterization. Antimicrobial susceptibility testing, genome sequencing using Illumina HiSeq X10, and pan-genome analysis with 788 clinical isolates identified core, accessory, and unique drug-resistant determinants. Mutational analysis of drug-resistance genes, virulence factor annotation, in vitro pathogenicity assessment, subsystem analysis, Multilocus sequence typing, and whole genome phylogenetic analysis were performed. VRL-M19 exhibited multidrug resistance with 69 determinants, and analysis across 788 clinical isolates and 350 Indian isolates revealed more accessory genes (52 out of 69) in the Indian isolates. Multiple mutations were observed in drug target modification genes, and the strain was identified as a moderate biofilm-former with 55 virulence factors. Whole genome phylogenetics indicated a close relationship between VRL-M19 and clinical A. baumannii strains. In conclusion, our comprehensive study suggests that VRL-M19 is a multidrug-resistant, potential pathogen with biofilm-forming capabilities, closely associated with clinical A. baumannii strains.}, }
@article {pmid38177691, year = {2024}, author = {Domingo-Sananes, MR and Meehan, CJ}, title = {The population genetics of prokaryotic pangenomes.}, journal = {Nature ecology & evolution}, volume = {}, number = {}, pages = {}, pmid = {38177691}, issn = {2397-334X}, }
@article {pmid38177690, year = {2024}, author = {Douglas, GM and Shapiro, BJ}, title = {Pseudogenes act as a neutral reference for detecting selection in prokaryotic pangenomes.}, journal = {Nature ecology & evolution}, volume = {}, number = {}, pages = {}, pmid = {38177690}, issn = {2397-334X}, abstract = {A long-standing question is to what degree genetic drift and selection drive the divergence in rare accessory gene content between closely related bacteria. Rare genes, including singletons, make up a large proportion of pangenomes (all genes in a set of genomes), but it remains unclear how many such genes are adaptive, deleterious or neutral to their host genome. Estimates of species' effective population sizes (Ne) are positively associated with pangenome size and fluidity, which has independently been interpreted as evidence for both neutral and adaptive pangenome models. We hypothesized that pseudogenes, used as a neutral reference, could be used to distinguish these models. We find that most functional categories are depleted for rare pseudogenes when a genome encodes only a single intact copy of a gene family. In contrast, transposons are enriched in pseudogenes, suggesting they are mostly neutral or deleterious to the host genome. Thus, even if individual rare accessory genes vary in their effects on host fitness, we can confidently reject a model of entirely neutral or deleterious rare genes. We also define the ratio of singleton intact genes to singleton pseudogenes (si/sp) within a pangenome, compare this measure across 668 prokaryotic species and detect a signal consistent with the adaptive value of many rare accessory genes. Taken together, our work demonstrates that comparing with pseudogenes can improve inferences of the evolutionary forces driving pangenome variation.}, }
@article {pmid38173673, year = {2023}, author = {Sarr, M and Alou, MT and Padane, A and Diouf, FS and Beye, M and Sokhna, C and Fenollar, F and Mboup, S and Raoult, D and Million, M}, title = {A review of the literature of Listeria monocytogenes in Africa highlights breast milk as an overlooked human source.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1213953}, pmid = {38173673}, issn = {1664-302X}, abstract = {According to the latest WHO estimates (2015) of the global burden of foodborne diseases, Listeria monocytogenes is responsible for one of the most serious foodborne infections and commonly results in severe clinical outcomes. The 2013 French MONALISA prospective cohort identified that women born in Africa has a 3-fold increase in the risk of maternal neonatal listeriosis. One of the largest L. monocytogenes outbreaks occurred in South Africa in 2017-2018 with over 1,000 cases. Moreover, recent findings identified L. monocytogenes in human breast milk in Mali and Senegal with its relative abundance positively correlated with severe acute malnutrition. These observations suggest that the carriage of L. monocytogenes in Africa should be further explored, starting with the existing literature. For that purpose, we searched the peer-reviewed and grey literature published dating back to 1926 to date using six databases. Ultimately, 225 articles were included in this review. We highlighted that L. monocytogenes is detected in various sample types including environmental samples, food samples as well as animal and human samples. These studies were mostly conducted in five east African countries, four west African countries, four north African countries, and two Southern African countries. Moreover, only ≈ 0.2% of the Listeria monocytogenes genomes available on NCBI were obtained from African samples, contracted with its detection. The pangenome resulting from the African Listeria monocytogenes samples revealed three clusters including two from South-African strains as well as one consisting of the strains isolated from breast milk in Mali and Senegal and, a vaginal post-miscarriage sample. This suggests there was a clonal complex circulating in Mali and Senegal. As this clone has not been associated to infections, further studies should be conducted to confirm its circulation in the region and explore its association with foodborne infections. Moreover, it is apparent that more resources should be allocated to the detection of L. monocytogenes as only 15/54 countries have reported its detection in the literature. It seems paramount to map the presence and carriage of L. monocytogenes in all African countries to prevent listeriosis outbreaks and the related miscarriages and confirm its association with severe acute malnutrition.}, }
@article {pmid38172677, year = {2024}, author = {Choi, DG and Baek, JH and Han, DM and Khan, SA and Jeon, CO}, title = {Comparative pangenome analysis of Enterococcus faecium and Enterococcus lactis provides new insights into the adaptive evolution by horizontal gene acquisitions.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {28}, pmid = {38172677}, issn = {1471-2164}, support = {Graduate Research Scholarship in 2018//Chung-Ang University/ ; PJ01710102//Rural Development Administration/ ; 2018R1A5A1025077//Ministry of Science and ICT, South Korea/ ; }, abstract = {BACKGROUND: Enterococcus faecium and E. lactis are phylogenetically closely related lactic acid bacteria that are ubiquitous in nature and are known to be beneficial or pathogenic. Despite their considerable industrial and clinical importance, comprehensive studies on their evolutionary relationships and genomic, metabolic, and pathogenic traits are still lacking. Therefore, we conducted comparative pangenome analyses using all available dereplicated genomes of these species.
RESULTS: E. faecium was divided into two subclades: subclade I, comprising strains derived from humans, animals, and food, and the more recent phylogenetic subclade II, consisting exclusively of human-derived strains. In contrast, E. lactis strains, isolated from diverse sources including foods, humans, animals, and the environment, did not display distinct clustering based on their isolation sources. Despite having similar metabolic features, noticeable genomic differences were observed between E. faecium subclades I and II, as well as E. lactis. Notably, E. faecium subclade II strains exhibited significantly larger genome sizes and higher gene counts compared to both E. faecium subclade I and E. lactis strains. Furthermore, they carried a higher abundance of antibiotic resistance, virulence, bacteriocin, and mobile element genes. Phylogenetic analysis of antibiotic resistance and virulence genes suggests that E. faecium subclade II strains likely acquired these genes through horizontal gene transfer, facilitating their effective adaptation in response to antibiotic use in humans.
CONCLUSIONS: Our study offers valuable insights into the adaptive evolution of E. faecium strains, enabling their survival as pathogens in the human environment through horizontal gene acquisitions.}, }
@article {pmid38170317, year = {2024}, author = {Lin, J and Xiao, Y and Liu, H and Gao, D and Duan, Y and Zhu, X}, title = {Combined transcriptomic and pangenomic analyses guide metabolic amelioration to enhance tiancimycins production.}, journal = {Applied microbiology and biotechnology}, volume = {108}, number = {1}, pages = {1-11}, pmid = {38170317}, issn = {1432-0614}, support = {2020zzts248//Fundamental Research Funds for Central Universities of the Central South University/ ; 81530092//National Natural Science Foundation of China/ ; B0803420//Chinese Ministry of Education 111/ ; }, abstract = {Exploration of high-yield mechanism is important for further titer improvement of valuable antibiotics, but how to achieve this goal is challenging. Tiancimycins (TNMs) are anthraquinone-fused enediynes with promising drug development potentials, but their prospective applications are limited by low titers. This work aimed to explore the intrinsic high-yield mechanism in previously obtained TNMs high-producing strain Streptomyces sp. CB03234-S for the further titer amelioration of TNMs. First, the typical ribosomal RpsL(K43N) mutation in CB03234-S was validated to be merely responsible for the streptomycin resistance but not the titer improvement of TNMs. Subsequently, the combined transcriptomic, pan-genomic and KEGG analyses revealed that the significant changes in the carbon and amino acid metabolisms could reinforce the metabolic fluxes of key CoA precursors, and thus prompted the overproduction of TNMs in CB03234-S. Moreover, fatty acid metabolism was considered to exert adverse effects on the biosynthesis of TNMs by shunting and reducing the accumulation of CoA precursors. Therefore, different combinations of relevant genes were respectively overexpressed in CB03234-S to strengthen fatty acid degradation. The resulting mutants all showed the enhanced production of TNMs. Among them, the overexpression of fadD, a key gene responsible for the first step of fatty acid degradation, achieved the highest 21.7 ± 1.1 mg/L TNMs with a 63.2% titer improvement. Our studies suggested that comprehensive bioinformatic analyses are effective to explore metabolic changes and guide rational metabolic reconstitution for further titer improvement of target products. KEY POINTS: • Comprehensive bioinformatic analyses effectively reveal primary metabolic changes. • Primary metabolic changes cause precursor enrichment to enhance TNMs production. • Strengthening of fatty acid degradation further improves the titer of TNMs.}, }
@article {pmid38168881, year = {2024}, author = {Triesch, S and Denton, AK and Bouvier, JW and Buchmann, JP and Reichel-Deland, V and Guerreiro, RNFM and Busch, N and Schlüter, U and Stich, B and Kelly, S and Weber, APM}, title = {Transposable elements contribute to the establishment of the glycine shuttle in Brassicaceae species.}, journal = {Plant biology (Stuttgart, Germany)}, volume = {}, number = {}, pages = {}, doi = {10.1111/plb.13601}, pmid = {38168881}, issn = {1438-8677}, support = {391465903/GRK 2466//Deutsche Forschungsgemeinschaft (German Research Foundation)/ ; 390686111//Germany's Excellence Strategy EXC-2048/1/ ; WE 2231/20-1//ERA-CAPS (European Research Network for Coordinating Action in Plant Sciences)/ ; 456082119//CRC (Collaborative Research Center)/ ; BB/J014427/1//BBSRC/ ; //Royal Society University Research Fellowship/ ; }, abstract = {C3 -C4 intermediate photosynthesis has evolved at least five times convergently in the Brassicaceae, despite this family lacking bona fide C4 species. The establishment of this carbon concentrating mechanism is known to require a complex suite of ultrastructural modifications, as well as changes in spatial expression patterns, which are both thought to be underpinned by a reconfiguration of existing gene-regulatory networks. However, to date, the mechanisms which underpin the reconfiguration of these gene networks are largely unknown. In this study, we used a pan-genomic association approach to identify genomic features that could confer differential gene expression towards the C3 -C4 intermediate state by analysing eight C3 species and seven C3 -C4 species from five independent origins in the Brassicaceae. We found a strong correlation between transposable element (TE) insertions in cis-regulatory regions and C3 -C4 intermediacy. Specifically, our study revealed 113 gene models in which the presence of a TE within a gene correlates with C3 -C4 intermediate photosynthesis. In this set, genes involved in the photorespiratory glycine shuttle are enriched, including the glycine decarboxylase P-protein whose expression domain undergoes a spatial shift during the transition to C3 -C4 photosynthesis. When further interrogating this gene, we discovered independent TE insertions in its upstream region which we conclude to be responsible for causing the spatial shift in GLDP1 gene expression. Our findings hint at a pivotal role of TEs in the evolution of C3 -C4 intermediacy, especially in mediating differential spatial gene expression.}, }
@article {pmid38168637, year = {2024}, author = {Guo, N and Wang, S and Wang, T and Duan, M and Zong, M and Miao, L and Han, S and Wang, G and Liu, X and Zhang, D and Jiao, C and Xu, H and Chen, L and Fei, Z and Li, J and Liu, F}, title = {Graph-based Pan-genome of Brassica oleracea Provides New Insights into Its Domestication and Morphotype Diversification.}, journal = {Plant communications}, volume = {}, number = {}, pages = {100791}, doi = {10.1016/j.xplc.2023.100791}, pmid = {38168637}, issn = {2590-3462}, abstract = {The domestication of Brassica oleracea has resulted in diverse morphological types with distinct patterns of organ development. Here we report a graph-based pan-genome of B. oleracea constructed with high-quality genome assemblies of different morphotypes. The pan-genome harbors over 200 structural variant (SV) hotspot regions enriched with auxin and flowering-related genes. Population genomic analyses reveal that early domestication of B. oleracea focused on leaf or stem development. Gene flows resulting from agricultural practices and variety improvement are detected among different morphotypes. Selective sweep and pan-genome analyses identify an auxin-responsive SAUR gene and a CLE family gene as crucial players in the leaf-stem differentiation during the early stage of B. oleracea domestication, and the BoKAN1 gene as instrumental in shaping the leafy heads of cabbage and Brussels sprouts. Our pan-genome and functional analyses further discover that variations in the BoFLC2 gene play key roles in the divergence of vernalization and flowering characteristics among different morphotypes, and variations in the first intron of BoFLC3 are involved in fine-tuning the flowering process in cauliflower. This study provides a comprehensive understanding of the pan-genome of B. oleracea and sheds light on the domestication and differential organ development of this globally important crop species.}, }
@article {pmid38168361, year = {2023}, author = {Sirén, J and Eskandar, P and Ungaro, MT and Hickey, G and Eizenga, JM and Novak, AM and Chang, X and Chang, PC and Kolmogorov, M and Carroll, A and Monlong, J and Paten, B}, title = {Personalized Pangenome References.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.12.13.571553}, pmid = {38168361}, abstract = {Pangenomes, by including genetic diversity, should reduce reference bias by better representing new samples compared to them. Yet when comparing a new sample to a pangenome, variants in the pangenome that are not part of the sample can be misleading, for example, causing false read mappings. These irrelevant variants are generally rarer in terms of allele frequency, and have previously been dealt with using allele frequency filters. However, this is a blunt heuristic that both fails to remove some irrelevant variants and removes many relevant variants. We propose a new approach, inspired by local ancestry inference methods, that imputes a personalized pangenome subgraph based on sampling local haplotypes according to k -mer counts in the reads. Our approach is tailored for the Giraffe short read aligner, as the indexes it needs for read mapping can be built quickly. We compare the accuracy of our approach to state-of-the-art methods using graphs from the Human Pangenome Reference Consortium. The resulting personalized pangenome pipelines provide faster pangenome read mapping than comparable pipelines that use a linear reference, reduce small variant genotyping errors by 4x relative to the Genome Analysis Toolkit (GATK) best-practice pipeline, and for the first time make short-read structural variant genotyping competitive with long-read discovery methods.}, }
@article {pmid38168234, year = {2023}, author = {Qiu, X and McGee, L and Hammitt, LL and Grant, LR and O'Brien, KL and Hanage, WP and Lipsitch, M}, title = {Prediction of post-PCV13 pneumococcal evolution using invasive disease data enhanced by inverse-invasiveness weighting.}, journal = {medRxiv : the preprint server for health sciences}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.12.10.23299786}, pmid = {38168234}, abstract = {BACKGROUND: After introduction of pneumococcal conjugate vaccines (PCVs), serotype replacement occurred in the population of Streptococcus pneumoniae. Predicting which pneumococcal clones and serotypes will become more common in carriage after vaccination can enhance vaccine design and public health interventions, while also improving our understanding of pneumococcal evolution. We sought to use invasive disease data to assess how well negative frequency-dependent selection (NFDS) models could explain pneumococcal carriage population evolution in the post-PCV13 epoch by weighting invasive data to approximate strain proportions in the carriage population.
METHODS: Invasive pneumococcal isolates were collected and sequenced during 1998-2018 by the Active Bacterial Core surveillance (ABCs) from the Centers for Disease Control and Prevention (CDC). To predict the post-PCV13 population dynamics in the carriage population using a NFDS model, all genomic data were processed under a bioinformatic pipeline of assembly, annotation, and pangenome analysis to define genetically similar sequence clusters (i.e., strains) and a set of accessory genes present in 5% to 95% of the isolates. The NFDS model predicted the strain proportion by calculating the post-vaccine strain composition in the weighted invasive disease population that would best match pre-vaccine accessory gene frequencies. To overcome the biases of invasive disease data, serotype-specific inverse-invasiveness weights were defined as the ratio of the proportion of the serotype in the carriage data to the proportion in the invasive data, using data from 1998-2001 in the United States, before conjugate vaccine introduction. The weights were applied to adjust both the observed strain proportion and the accessory gene frequencies.
RESULTS: Inverse-invasiveness weighting increased the correlation of accessory gene frequencies between invasive and carriage data with reduced residuals in linear or logit scale for pre-vaccine, post-PCV7, and post-PCV13. Similarly, weighting increased the correlation of accessory gene frequencies between different time periods in the invasive data. By weighting the invasive data, we were able to use the NFDS model to predict strain proportions in the carriage population in the post-PCV13 epoch, with the adjusted R-squared between predicted and observed strain proportions increasing from 0.176 to 0.544 after weighting.
CONCLUSIONS: The weighting system adjusted the invasive disease surveillance data to better represent the carriage population of S. pneumoniae . The NFDS mechanism predicted the strain proportions in the projected carriage population as estimated from the weighted invasive disease frequencies in the post-PCV13 epoch. Our methods enrich the value of genomic sequences from invasive disease surveillance, which is readily available, easy to collect, and of direct interest to public health.
IMPORTANCE: Streptococcus pneumoniae , a common colonizer in the human nasopharynx, can cause invasive diseases including pneumonia, bacteremia, and meningitis mostly in children under 5 years or older adults. The PCV7 was introduced in 2000 in the United States within the pediatric population to prevent disease and reduce deaths, followed by PCV13 in 2010, PCV15 in 2022, and PCV20 in 2023. After the removal of vaccine serotypes, the prevalence of carriage remained stable as the vacated pediatric ecological niche was filled with certain non-vaccine serotypes. Predicting which pneumococcal clones, and which serotypes, will be most successful in colonization after vaccination can enhance vaccine design and public health interventions, while also improving our understanding of pneumococcal evolution. While carriage data, which are collected from the pneumococcal population that is competing to colonize and transmit, are most directly relevant to evolutionary studies, invasive disease data are often more plentiful. Previously, evolutionary models based on negative frequency-dependent selection (NFDS) on the accessory genome were shown to predict which non-vaccine strains and serotypes were most successful in colonization following the introduction of PCV7. Here, we show that an inverse-invasiveness weighting system applied to invasive disease surveillance data allows the NFDS model to predict strain proportions in the projected carriage population in the post-PCV13/pre-PCV15 and -PCV20 epoch. The significance of our research lies in using a sample of invasive disease surveillance data to extend the use of NFDS as an evolutionary mechanism to predict post-PCV13 population dynamics. This has shown that we can correct for biased sampling that arises from differences in virulence and can enrich the value of genomic data from disease surveillance and advances our understanding of how NFDS impacts carriage population dynamics after both PCV7 and PCV13 vaccination.}, }
@article {pmid38163518, year = {2023}, author = {Abondio, P and Bruno, F and Passarino, G and Montesanto, A and Luiselli, D}, title = {Pangenomics: a new era in the field of neurodegenerative diseases.}, journal = {Ageing research reviews}, volume = {}, number = {}, pages = {102180}, doi = {10.1016/j.arr.2023.102180}, pmid = {38163518}, issn = {1872-9649}, abstract = {A pangenome is composed of all the genetic variability of a group of individuals, and its application to the study of neurodegenerative diseases may provide valuable insights into the underlying aspects of genetic heterogenetiy for these complex ailments, including gene expression, epigenetics, and translation mechanisms. Furthermore, a reference pangenome allows for the identification of previously undetected structural commonalities and differences among individuals, which may help in the diagnosis of a disease, support the prediction of what will happen over time (prognosis) and aid in developing novel treatments in the perspective of personalized medicine. Therefore, in the present review, the application of the pangenome concept to the study of neurodegenerative diseases will be discussed and analyzed for its potential to enable an improvement in diagnosis and prognosis for these illnesses, leading to the development of tailored treatments for individual patients from the knowledge of the genomic composition of a whole population.}, }
@article {pmid38158885, year = {2023}, author = {Lv, Y and Liu, C and Li, X and Wang, Y and He, H and He, W and Chen, W and Yang, L and Dai, X and Cao, X and Yu, X and Liu, J and Zhang, B and Wei, H and Zhang, H and Qian, H and Shi, C and Leng, Y and Liu, X and Guo, M and Wang, X and Zhang, Z and Wang, T and Zhang, B and Xu, Q and Cui, Y and Zhang, Q and Yuan, Q and Jahan, N and Ma, J and Zheng, X and Zhou, Y and Qian, Q and Guo, L and Shang, L}, title = {A centromere map based on super pan-genome highlights the structure and function of rice centromeres.}, journal = {Journal of integrative plant biology}, volume = {}, number = {}, pages = {}, doi = {10.1111/jipb.13607}, pmid = {38158885}, issn = {1744-7909}, abstract = {Rice (Oryza sativa) is a significant crop worldwide with a genome shaped by various evolutionary factors. Rice centromeres are crucial for chromosome segregation, and contain some unreported genes. Due to the diverse and complex centromere region, a comprehensive understanding of rice centromere structure and function at the population level is needed. We constructed a high-quality centromere map based on the rice super pan-genome consisting of a 251-accession panel comprising both cultivated and wild species of Asian and African rice. We showed that rice centromeres have diverse satellite repeat CentO, which vary across chromosomes and subpopulations, reflecting their distinct evolutionary patterns. We also revealed that long terminal repeats (LTRs), especially young Gypsy-type LTRs, are abundant in the peripheral CentO-enriched regions (CoERs) and drive rice centromere expansion and evolution. Furthermore, high-quality genome assembly and complete T2T reference genome enable us to obtain more centromeric genome information despite the mapping and cloning of centromere genes is challenging. We investigated the association between structural variations (SVs) and gene expression in the rice centromere. A centromere gene, OsMAB, that positively regulates rice tiller number, was further confirmed by eQTL, haplotype analysis and CRISPR/Cas9 methods. By revealing the new insights into the evolutionary patterns and biological roles of rice centromeres, our finding will facilitate future research on centromere biology and crop improvement. This article is protected by copyright. All rights reserved.}, }
@article {pmid38157192, year = {2023}, author = {Yu, Y and Chen, H}, title = {Human pangenome: far-reaching implications in precision medicine.}, journal = {Frontiers of medicine}, volume = {}, number = {}, pages = {}, pmid = {38157192}, issn = {2095-0225}, }
@article {pmid38147560, year = {2024}, author = {Beavan, A and Domingo-Sananes, MR and McInerney, JO}, title = {Contingency, repeatability, and predictability in the evolution of a prokaryotic pangenome.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {121}, number = {1}, pages = {e2304934120}, doi = {10.1073/pnas.2304934120}, pmid = {38147560}, issn = {1091-6490}, support = {BB/Y513374/1//UKRI | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; }, abstract = {Pangenomes exhibit remarkable variability in many prokaryotic species, much of which is maintained through the processes of horizontal gene transfer and gene loss. Repeated acquisitions of near-identical homologs can easily be observed across pangenomes, leading to the question of whether these parallel events potentiate similar evolutionary trajectories, or whether the remarkably different genetic backgrounds of the recipients mean that postacquisition evolutionary trajectories end up being quite different. In this study, we present a machine learning method that predicts the presence or absence of genes in the Escherichia coli pangenome based on complex patterns of the presence or absence of other accessory genes within a genome. Our analysis leverages the repeated transfer of genes through the E. coli pangenome to observe patterns of repeated evolution following similar events. We find that the presence or absence of a substantial set of genes is highly predictable from other genes alone, indicating that selection potentiates and maintains gene-gene co-occurrence and avoidance relationships deterministically over long-term bacterial evolution and is robust to differences in host evolutionary history. We propose that at least part of the pangenome can be understood as a set of genes with relationships that govern their likely cohabitants, analogous to an ecosystem's set of interacting organisms. Our findings indicate that intragenomic gene fitness effects may be key drivers of prokaryotic evolution, influencing the repeated emergence of complex gene-gene relationships across the pangenome.}, }
@article {pmid38145107, year = {2023}, author = {Dabbaghie, F and Srikakulam, SK and Marschall, T and Kalinina, OV}, title = {PanPA: generation and alignment of panproteome graphs.}, journal = {Bioinformatics advances}, volume = {3}, number = {1}, pages = {vbad167}, pmid = {38145107}, issn = {2635-0041}, abstract = {MOTIVATION: Compared to eukaryotes, prokaryote genomes are more diverse through different mechanisms, including a higher mutation rate and horizontal gene transfer. Therefore, using a linear representative reference can cause a reference bias. Graph-based pangenome methods have been developed to tackle this problem. However, comparisons in DNA space are still challenging due to this high diversity. In contrast, amino acid sequences have higher similarity due to evolutionary constraints, whereby a single amino acid may be encoded by several synonymous codons. Coding regions cover the majority of the genome in prokaryotes. Thus, panproteomes present an attractive alternative leveraging the higher sequence similarity while not losing much of the genome in non-coding regions.
RESULTS: We present PanPA, a method that takes a set of multiple sequence alignments of protein sequences, indexes them, and builds a graph for each multiple sequence alignment. In the querying step, it can align DNA or amino acid sequences back to these graphs. We first showcase that PanPA generates correct alignments on a panproteome from 1350 Escherichia coli. To demonstrate that panproteomes allow comparisons at longer phylogenetic distances, we compare DNA and protein alignments from 1073 Salmonella enterica assemblies against E.coli reference genome, pangenome, and panproteome using BWA, GraphAligner, and PanPA, respectively; with PanPA aligning around 22% more sequences. We also aligned a DNA short-reads whole genome sequencing (WGS) sample from S.enterica against the E.coli reference with BWA and the panproteome with PanPA, where PanPA was able to find alignment for 68% of the reads compared to 5% with BWA.
PanPA is available at https://github.com/fawaz-dabbaghieh/PanPA.}, }
@article {pmid38139397, year = {2023}, author = {Yin, S and Zhao, L and Liu, J and Sun, Y and Li, B and Wang, L and Ren, Z and Chen, C}, title = {Pan-genome Analysis of WOX Gene Family and Function Exploration of CsWOX9 in Cucumber.}, journal = {International journal of molecular sciences}, volume = {24}, number = {24}, pages = {}, doi = {10.3390/ijms242417568}, pmid = {38139397}, issn = {1422-0067}, support = {ZR2022MC084//Shandong Natural Science Foundation/ ; 31701923//National Natural Science Foundation of China/ ; 32372703//National Natural Science Foundation of China/ ; 32172605//National Natural Science Foundation of China/ ; }, abstract = {Cucumber is an economically important vegetable crop, and the warts (composed of spines and Tubercules) of cucumber fruit are an important quality trait that influences its commercial value. WOX transcription factors are known to have pivotal roles in regulating various aspects of plant growth and development, but their studies in cucumber are limited. Here, genome-wide identification of cucumber WOX genes was performed using the pan-genome analysis of 12 cucumber varieties. Our findings revealed diverse CsWOX genes in different cucumber varieties, with variations observed in protein sequences and lengths, gene structure, and conserved protein domains, possibly resulting from the divergent evolution of CsWOX genes as they adapt to diverse cultivation and environmental conditions. Expression profiles of the CsWOX genes demonstrated that CsWOX9 was significantly expressed in unexpanded ovaries, especially in the epidermis. Additionally, analysis of the CsWOX9 promoter revealed two binding sites for the C2H2 zinc finger protein. We successfully executed a yeast one-hybrid assay (Y1H) and a dual-luciferase (LUC) transaction assay to demonstrate that CsWOX9 can be transcriptionally activated by the C2H2 zinc finger protein Tu, which is crucial for fruit Tubercule formation in cucumber. Overall, our results indicated that CsWOX9 is a key component of the molecular network that regulates wart formation in cucumber fruits, and provide further insight into the function of CsWOX genes in cucumber.}, }
@article {pmid38138105, year = {2023}, author = {Zhang, Y and Pan, M and Wang, Q and Wang, L and Liao, L}, title = {Complete Genome Sequence and Pan-Genome Analysis of Shewanella oncorhynchi Z-P2, a Siderophore Putrebactin-Producing Bacterium.}, journal = {Microorganisms}, volume = {11}, number = {12}, pages = {}, doi = {10.3390/microorganisms11122961}, pmid = {38138105}, issn = {2076-2607}, support = {2022BEC030//Technological innovation Program of Hubei Province(Major project)/ ; }, abstract = {In this study, we reported the complete genome sequence of Shewanella oncorhynchi for the first time. S. oncorhynchi Z-P2 is a bacterium that produces the siderophore putrebactin. Its genome consists of a circular chromosome of 5,034,612 bp with a G + C content of 45.4%. A total of 4544 protein-coding genes, 109 tRNAs and 31 rRNAs were annotated by the RAST. Five non-ribosomal peptide synthetase (NRPS) and polyketide synthetase (PKS) gene clusters were identified by the antiSMASH analysis. The pan-genome analysis of Z-P2 and 10 Shewanella putrefaciens revealed 9228 pan-gene clusters and 2681 core gene clusters, with Z-P2 having 618 unique gene clusters. Additionally, the gene cluster involved in putrebactin biosynthesis in Z-P2 was annotated, and the mechanism of putrebactin biosynthesis was analyzed. The putrebactin produced by Z-P2 was detected using UPLC-MS analysis, with an [M + H][+] molecular ion at m/z 373.21. These findings provide valuable support for further research on the genetic engineering of putrebactin biosynthetic genes of Z-P2 and their potential applications.}, }
@article {pmid38136976, year = {2023}, author = {Serag, M and Plutino, M and Charles, P and Azulay, JP and Chaussenot, A and Paquis-Flucklinger, V and Ait-El-Mkadem Saadi, S and Rouzier, C}, title = {A Case Report of SYNE1 Deficiency-Mimicking Mitochondrial Disease and the Value of Pangenomic Investigations.}, journal = {Genes}, volume = {14}, number = {12}, pages = {}, doi = {10.3390/genes14122154}, pmid = {38136976}, issn = {2073-4425}, abstract = {Mitochondrial disorders are characterized by a huge clinical, biochemical, and genetic heterogeneity, which poses significant diagnostic challenges. Several studies report that more than 50% of patients with suspected mitochondrial disease could have a non-mitochondrial disorder. Thus, only the identification of the causative pathogenic variant can confirm the diagnosis. Herein, we describe the diagnostic journey of a family suspected of having a mitochondrial disorder who were referred to our Genetics Department. The proband presented with the association of cerebellar ataxia, COX-negative fibers on muscle histology, and mtDNA deletions. Whole exome sequencing (WES), supplemented by a high-resolution array, comparative genomic hybridization (array-CGH), allowed us to identify two pathogenic variants in the non-mitochondrial SYNE1 gene. The proband and her affected sister were found to be compound heterozygous for a known nonsense variant (c.13258C>T, p.(Arg4420Ter)), and a large intragenic deletion that was predicted to result in a loss of function. To our knowledge, this is the first report of a large intragenic deletion of SYNE1 in patients with cerebellar ataxia (ARCA1). This report highlights the interest in a pangenomic approach to identify the genetic basis in heterogeneous neuromuscular patients with the possible cause of mitochondrial disease. Moreover, even rare copy number variations should be considered in patients with a phenotype suggestive of SYNE1 deficiency.}, }
@article {pmid38134602, year = {2023}, author = {Mumtaz, MN and Irfan, M and Siraj, S and Khan, A and Khan, H and Imran, M and Khan, IA and Khan, A}, title = {Whole-genome sequencing of extensively drug-resistant Salmonella enterica serovar Typhi clinical isolates from the Peshawar region of Pakistan.}, journal = {Journal of infection and public health}, volume = {17}, number = {2}, pages = {271-282}, doi = {10.1016/j.jiph.2023.12.002},